HugoPhi · Mar 17, 2025
diff --git a/‎example/clfs.py
+65-3 b/‎example/clfs.py
+65-3
diff --git a/‎example/rebuild_date.ipynb ‎example/da_rebuild_sequence.ipynb b/‎example/rebuild_date.ipynb ‎example/da_rebuild_sequence.ipynb
diff --git a/‎example/data_analysis.ipynb ‎example/da_split_augment.ipynb b/‎example/data_analysis.ipynb ‎example/da_split_augment.ipynb
diff --git a/‎example/data_process.py
+1-1 b/‎example/data_process.py
+1-1
diff --git a/‎example/log/2025_03_17_17-24-24/hyper.toml
-11 b/‎example/log/2025_03_17_17-24-24/hyper.toml
-11
diff --git a/‎example/log/2025_03_17_17-24-24/test.csv
-3 b/‎example/log/2025_03_17_17-24-24/test.csv
-3
diff --git a/‎example/log/2025_03_17_17-24-24/valid.csv
-3 b/‎example/log/2025_03_17_17-24-24/valid.csv
-3
diff --git a/‎example/main.py
+4-3 b/‎example/main.py
+4-3
diff --git a/‎mlp_cifar10.ipynb
+1-1 b/‎mlp_cifar10.ipynb
+1-1
diff --git a/‎mlp_mnist.ipynb
+1-1 b/‎mlp_mnist.ipynb
+1-1
diff --git a/‎plugins/lrkit b/‎plugins/lrkit
diff --git a/‎plugins/minitorch/nn/JaxOptimized/fc.py
+2-2 b/‎plugins/minitorch/nn/JaxOptimized/fc.py
+2-2
@@ -1,7 +1,7 @@
 import jax.numpy as jnp
 from jax import random
 
-from plugins.minitorch.nn import Rnn, Dense
+from plugins.minitorch.nn import Conv, Rnn, Dense
 from plugins.minitorch import Initer
 from plugins.minitorch.optimizer import Adam
 from plugins.minitorch.loss import CrossEntropyLoss
@@ -101,5 +101,67 @@ def fit(self, x, y):
         self.optr.close()
 
 
-class conv3x3(Clfs):
-    pass
+class conv1dx3(Clfs):
+    def __init__(self, lr, epoches, batch_size, depth=1):
+        super(conv1dx3, self).__init__()
+
+        self.config = {
+            'conv1d:00': Conv.get_conv1d(9, 16, (3,)),   # 128 -> 126
+            'conv1d:01': Conv.get_conv1d(16, 16, (3,)),  # 126 -> 124
+            'maxpooling1d:0': Conv.get_max_pool1d(2),  # 124 -> 62
+            'conv1d:10': Conv.get_conv1d(16, 32, (3,)),  # 62 -> 60
+            'conv1d:11': Conv.get_conv1d(32, 32, (3,)),  # 60 -> 58
+            'maxpooling1d:1': Conv.get_max_pool1d(2),  # 58 -> 29
+            'conv1d:20': Conv.get_conv1d(32, 64, (3,)),  # 29 -> 27
+            'conv1d:21': Conv.get_conv1d(64, 64, (3,)),  # 27 -> 25
+            'maxpooling1d:2': Conv.get_max_pool1d(2),  # 25 -> 12
+            'fc:0': Dense.get_linear(12 * 64, 256),  # 64 x 12 = 768
+            'fc:1': Dense.get_linear(256, 64),
+            'fc:2': Dense.get_linear(64, 6)
+        }
+
+        self.epoches = epoches
+        self.lr = lr
+        self.batch_size = batch_size
+        self.losr = CrossEntropyLoss(self.forward)
+
+    def conv_block(self, x, params, id):
+        res = Conv.conv1d(x, params[f'conv1d:{id}0'], self.config[f'conv1d:{id}0'])
+        res = Conv.conv1d(res, params[f'conv1d:{id}1'], self.config[f'conv1d:{id}1'])
+        res = Conv.max_pooling1d(res, self.config[f'maxpooling1d:{id}'])
+
+        return res
+
+    def forward(self, x, params, train=False):
+        res = self.conv_block(x, params, 0)
+        res = self.conv_block(res, params, 1)
+        res = self.conv_block(res, params, 2)
+
+        res = res.reshape(res.shape[0], -1)
+
+        res = Dense.linear(res, params['fc:0'])
+        res = Dense.linear(res, params['fc:1'])
+        res = Dense.linear(res, params['fc:2'])
+
+        return softmax(res)
+
+    @timing
+    def predict_proba(self, x):
+        return self.forward(params=self.optr.get_params(), x=x, train=False)
+
+    @timing
+    def fit(self, x, y):
+        self.optr = Adam(Initer(self.config, random.PRNGKey(42))(), lr=self.lr, batch_size=self.batch_size)
+        _loss = self.losr.get_loss(True)
+        self.optr.open(_loss, x, y)
+
+        _tloss = self.losr.get_loss(False)
+
+        log_wise = self.epoches // 10 if self.epoches >= 10 else self.epoches
+        for cnt in range(self.epoches):
+            if (cnt + 1) % log_wise == 0:
+                print(f'====> Epoch {cnt + 1}/{self.epoches}, loss: {_tloss(self.optr.get_params(), x, y)}')
+
+            self.optr.update()
+
+        self.optr.close()
@@ -75,7 +75,7 @@ def one_hot(y: jnp.ndarray, num_class: int):
 # X_train = jnp.transpose(X_train, (2, 0, 1))
 # X_test = jnp.transpose(X_test, (2, 0, 1))
 
-print('X_train 形状:', X_train.shape)  # 应为 (128, 7352, 9)
+print('X_train 形状:', X_train.shape)  # 应为 (7352, 9, 128)
 print('y_train 形状:', y_train.shape)  # 应为 (7352, 6)
 print('X_test  形状:', X_test.shape)
 print('y_test  形状:', y_test.shape)
@@ -1,18 +1,19 @@
 from plugins.lrkit.executer import KFlodCrossExecuter
 
-from clfs import lstm, gru
+from clfs import lstm, gru, conv1dx3
 from data_process import X_train, X_test, y_train, y_test
 
 excr = KFlodCrossExecuter(
     X_train, y_train, X_test, y_test,
     clf_dict={
-        'gru': gru(lr=0.01, epoches=20, batch_size=64),
+        'gru': gru(lr=0.01, epoches=30, batch_size=64),
         'lstm': lstm(lr=0.01, epoches=50, batch_size=64),
+        'conv1dx3': conv1dx3(lr=0.001, epoches=50, batch_size=128),
     },
     k=5,
     metric_list=['accuracy', 'macro_f1', 'micro_f1', 'avg_recall'],
     log=True,
     log_dir='./log/',
 )
 
-excr.run_all()
+excr.run_all(time=True)
@@ -158,7 +158,7 @@
     "            # 后面发现即使不使用JIT也会出现相同的问题，因为没有JIT也会有其它多线程的优化，导致这里发生问题。\n",
     "            res = res @ p['w'] + p['b']  \n",
     "            res = jnp.maximum(0, res)  # use relu activation function\n",
-    "            res, key = Dense.dropout(res, key, p=0.1, train=train)\n",
+    "            key, res = Dense.dropout(key, res, p=0.1, train=train)\n",
     "\n",
     "        return softmax(res)\n",
     "\n",
 
@@ -157,7 +157,7 @@
     "            # 后面发现即使不使用JIT也会出现相同的问题，因为没有JIT也会有其它多线程的优化，导致这里发生问题。\n",
     "            res = res @ p['w'] + p['b']  \n",
     "            res = jnp.maximum(0, res)  # use relu activation function\n",
-    "            res, key = Dense.dropout(res, key, p=0.1, train=train)  # add dropout\n",
+    "            key, res = Dense.dropout(key, res, p=0.1, train=train)  # add dropout\n",
     "\n",
     "        return softmax(res)\n",
     "\n",
 
@@ -35,7 +35,7 @@
 from jax import random
 
 
-def dropout(x: jnp.ndarray, key, p=0.5, train=True):
+def dropout(key, x: jnp.ndarray, p=0.5, train=True):
     '''
     Applies dropout to the input array during training.
 
@@ -76,7 +76,7 @@ def dropout(x: jnp.ndarray, key, p=0.5, train=True):
     new_key, use_key = random.split(key)  # update key, to make mask different in different **batch**.
     mask = random.bernoulli(use_key, p_keep, x.shape)
 
-    return jnp.where(mask, x / p_keep, 0), new_key  # scale here to make E(X) the same while evaluating.
+    return new_key, jnp.where(mask, x / p_keep, 0)  # scale here to make E(X) the same while evaluating.
 
 
 def _linear(x: jnp.ndarray, w: jnp.ndarray, b: jnp.ndarray):