9、深入剖析PyTorch的nn.Sequential及ModuleList源码
文章目录
- 1. train&eval
- 2. 求导数
- 3. 参数更新
- 4. ModuleList,Sequential
- 5. Parameter&Parameter_List&ParmeterDict
1. train&eval
- train 模式:表示的是神经网络的训练模式,能够进行样本学习,通过样本来更新权重weight
- eval 模式:表示的是神经网络的评估模式,能够进行推理,dropout,batchnorm等层失效
参考链接如下:参考其他大佬资料
- drop_out 类测试
import torch
import torch.nn as nn
torch.manual_seed(23242)
# 创建 Dropout 层
dropout = nn.Dropout(p=0.3)
# 输入张量
x = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
# 在训练模式下
dropout.train()
print("Training mode:\n", dropout(x))
# 在评估模式下
dropout.eval()
print("Evaluation mode:\n", dropout(x))
- 结果:
Training mode:
tensor([[1.4286, 0.0000, 4.2857],
[5.7143, 7.1429, 8.5714]])
Evaluation mode:
tensor([[1., 2., 3.],
[4., 5., 6.]])
- numpy 版本
import numpy as np
np.random.seed(2323)
def dropout(x, p=0.5, training=True):
"""
实现 Dropout 的功能。
参数:
- x: 输入数据,numpy 数组。
- p: Dropout 概率,即随机置零的比例。
- training: 是否处于训练模式。
返回:
- 应用 Dropout 后的数组。
"""
if not training:
return x # 如果是评估模式,直接返回原始输入
# 生成与输入同形状的随机二值 mask
mask = np.random.binomial(1, 1 - p, size=x.shape)
# 按照 Dropout 的规则应用 mask,并进行缩放
return (x * mask) / (1 - p)
# 示例输入
x = np.array([[1.0, 2.0, 3.0],
[4.0, 5.0, 6.0]])
# 训练模式下
x_dropout_train = dropout(x, p=0.3, training=True)
print("Training mode:")
print(x_dropout_train)
# 评估模式下
x_dropout_eval = dropout(x, p=0.3, training=False)
print("\nEvaluation mode:")
print(x_dropout_eval)
- 结果:
Training mode:
[[0. 0. 4.28571429]
[5.71428571 0. 8.57142857]]
Evaluation mode:
[[1. 2. 3.]
[4. 5. 6.]]
2. 求导数
y
=
x
2
+
3
x
→
∂
y
∂
x
=
2
x
+
3
\begin{equation} y=x^2+3x\to \frac{\partial y}{\partial x}=2x+3 \end{equation}
y=x2+3x→∂x∂y=2x+3
x
=
[
1
,
2
,
3
]
→
y
=
[
5
,
7
,
9
]
\begin{equation} x=[1,2,3]\to y=[5,7,9] \end{equation}
x=[1,2,3]→y=[5,7,9]
- Python
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# @FileName :requre_grad_test.py
# @Time :2024/11/24 15:51
# @Author :Jason Zhang
import torch
from torch import nn
if __name__ == "__main__":
run_code = 0
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
y = x ** 2 + 3*x
z = y.sum()
x_grad = x.requires_grad
y_grad = y.requires_grad
z_grad = z.requires_grad
print(f"x_grad={x_grad}")
print(f"y_grad={y_grad}")
print(f"z_grad={z_grad}")
z.backward()
x_grad_value = x.grad
print(f"x={x}")
print(f"x_grad_value={x_grad_value}")
- 结果:
x_grad=True
y_grad=True
z_grad=True
x=tensor([1., 2., 3.], requires_grad=True)
x_grad_value=tensor([5., 7., 9.])
3. 参数更新
- python
import torch
import torch.nn as nn
# 创建一个线性回归模型
model = nn.Linear(2, 1) # 输入特征为2,输出特征为1
# 定义输入和标签
x1 = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
y1 = torch.tensor([[3.0], [7.0]])
# 定义损失函数
loss_func = nn.MSELoss()
# 设置学习率
learning_rate = 0.01
# 训练循环
epoch = 0 # 记录训练次数
while True:
epoch += 1
# 前向传播
output = model(x1)
# 计算损失
loss = loss_func(output, y1)
# 打印当前损失值、权重和偏置
print(f"Epoch {epoch}: Loss = {loss.item()}")
print(f" Weight: {model.weight.data.numpy()}")
print(f" Bias: {model.bias.data.numpy()}")
# 判断是否满足停止条件
if loss.item() < 0.01:
print("Training finished!")
break
# 反向传播
loss.backward()
# 更新模型参数
with torch.no_grad():
for param in model.parameters():
param -= learning_rate * param.grad
# 清零梯度
model.zero_grad()
- 结果:
Epoch 1: Loss = 27.009794235229492
Weight: [[-0.5681111 0.4556529]]
Bias: [0.02086246]
Epoch 2: Loss = 12.700897216796875
Weight: [[-0.33592594 0.78280616]]
Bias: [0.11583048]
Epoch 3: Loss = 6.009862899780273
Weight: [[-0.17655943 1.0063248 ]]
Bias: [0.17998254]
Epoch 4: Loss = 2.8807597160339355
Weight: [[-0.06698827 1.1589792 ]]
Bias: [0.22306578]
Epoch 5: Loss = 1.4171319007873535
Weight: [[0.00853085 1.2631778 ]]
Bias: [0.25174525]
Epoch 300: Loss = 0.014076205901801586
Weight: [[0.5034938 1.3849493]]
Bias: [-0.12144651]
Epoch 301: Loss = 0.013969571329653263
Weight: [[0.5041094 1.3847572]]
Bias: [-0.12225428]
Epoch 302: Loss = 0.013863733038306236
Weight: [[0.5047226 1.3845657]]
Bias: [-0.123059]
Epoch 303: Loss = 0.01375875249505043
Weight: [[0.5053335 1.384375 ]]
Bias: [-0.12386066]
Epoch 304: Loss = 0.013654493726789951
Weight: [[0.50594205 1.384185 ]]
Bias: [-0.12465928]
Epoch 305: Loss = 0.013551048003137112
Weight: [[0.5065483 1.3839957]]
Bias: [-0.12545489]
Epoch 306: Loss = 0.01344841904938221
Weight: [[0.50715226 1.3838071 ]]
Bias: [-0.12624745]
Epoch 307: Loss = 0.013346527703106403
Weight: [[0.50775397 1.3836192 ]]
Bias: [-0.12703702]
Epoch 308: Loss = 0.013245439156889915
Weight: [[0.50835335 1.383432 ]]
Bias: [-0.12782359]
Epoch 309: Loss = 0.013145080767571926
Weight: [[0.5089505 1.3832456]]
Bias: [-0.12860717]
Epoch 310: Loss = 0.013045486062765121
Weight: [[0.5095453 1.3830599]]
Bias: [-0.12938778]
Epoch 311: Loss = 0.012946678325533867
Weight: [[0.5101379 1.3828748]]
Bias: [-0.13016543]
Epoch 312: Loss = 0.012848559767007828
Weight: [[0.5107283 1.3826905]]
Bias: [-0.13094012]
Epoch 313: Loss = 0.01275124866515398
Weight: [[0.51131636 1.3825068 ]]
Bias: [-0.13171189]
Epoch 314: Loss = 0.012654653750360012
Weight: [[0.5119023 1.3823239]]
Bias: [-0.13248071]
Epoch 315: Loss = 0.01255879271775484
Weight: [[0.5124859 1.3821416]]
Bias: [-0.13324663]
Epoch 316: Loss = 0.012463669292628765
Weight: [[0.51306736 1.38196 ]]
Bias: [-0.13400963]
Epoch 317: Loss = 0.012369243428111076
Weight: [[0.5136466 1.3817792]]
Bias: [-0.13476974]
Epoch 318: Loss = 0.012275544926524162
Weight: [[0.51422364 1.3815991 ]]
Bias: [-0.13552696]
Epoch 319: Loss = 0.012182533740997314
Weight: [[0.51479846 1.3814195 ]]
Bias: [-0.1362813]
Epoch 320: Loss = 0.01209024153649807
Weight: [[0.51537114 1.3812407 ]]
Bias: [-0.13703278]
Epoch 321: Loss = 0.011998665519058704
Weight: [[0.5159416 1.3810626]]
Bias: [-0.13778141]
Epoch 322: Loss = 0.01190776564180851
Weight: [[0.51650995 1.3808851 ]]
Bias: [-0.1385272]
Epoch 323: Loss = 0.011817571707069874
Weight: [[0.51707613 1.3807083 ]]
Bias: [-0.13927016]
Epoch 324: Loss = 0.011728014796972275
Weight: [[0.5176402 1.3805323]]
Bias: [-0.1400103]
Epoch 325: Loss = 0.011639159172773361
Weight: [[0.51820207 1.3803568 ]]
Bias: [-0.14074764]
Epoch 326: Loss = 0.01155102625489235
Weight: [[0.5187618 1.380182 ]]
Bias: [-0.14148217]
Epoch 327: Loss = 0.011463488452136517
Weight: [[0.5193194 1.3800079]]
Bias: [-0.14221393]
Epoch 328: Loss = 0.011376669630408287
Weight: [[0.51987493 1.3798344 ]]
Bias: [-0.14294289]
Epoch 329: Loss = 0.011290469206869602
Weight: [[0.52042836 1.3796616 ]]
Bias: [-0.1436691]
Epoch 330: Loss = 0.011204947717487812
Weight: [[0.52097964 1.3794894 ]]
Bias: [-0.14439255]
Epoch 331: Loss = 0.011120039038360119
Weight: [[0.52152884 1.379318 ]]
Bias: [-0.14511324]
Epoch 332: Loss = 0.011035802774131298
Weight: [[0.52207595 1.3791472 ]]
Bias: [-0.14583121]
Epoch 333: Loss = 0.010952199809253216
Weight: [[0.522621 1.378977]]
Bias: [-0.14654645]
Epoch 334: Loss = 0.010869231075048447
Weight: [[0.523164 1.3788074]]
Bias: [-0.14725898]
Epoch 335: Loss = 0.010786919854581356
Weight: [[0.5237049 1.3786385]]
Bias: [-0.14796881]
Epoch 336: Loss = 0.010705174878239632
Weight: [[0.5242438 1.3784703]]
Bias: [-0.14867595]
Epoch 337: Loss = 0.010624050162732601
Weight: [[0.5247806 1.3783027]]
Bias: [-0.1493804]
Epoch 338: Loss = 0.010543602518737316
Weight: [[0.52531534 1.3781357 ]]
Bias: [-0.15008219]
Epoch 339: Loss = 0.010463712736964226
Weight: [[0.5258481 1.3779694]]
Bias: [-0.15078129]
Epoch 340: Loss = 0.010384460911154747
Weight: [[0.5263788 1.3778037]]
Bias: [-0.15147775]
Epoch 341: Loss = 0.010305758565664291
Weight: [[0.5269075 1.3776386]]
Bias: [-0.15217157]
Epoch 342: Loss = 0.010227718390524387
Weight: [[0.52743423 1.3774741 ]]
Bias: [-0.15286276]
Epoch 343: Loss = 0.01015022024512291
Weight: [[0.5279589 1.3773103]]
Bias: [-0.15355131]
Epoch 344: Loss = 0.010073349811136723
Weight: [[0.52848166 1.3771471 ]]
Bias: [-0.15423726]
Epoch 345: Loss = 0.009997041895985603
Weight: [[0.52900237 1.3769845 ]]
Bias: [-0.15492061]
Training finished!
4. ModuleList,Sequential
- python
import torch
from torch import nn
from pytorch_model_summary import summary
torch.manual_seed(2323)
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
self.flatten = nn.Flatten()
self.block = nn.ModuleList([
nn.Linear(28 * 28, 512),
nn.ReLU(),
nn.Linear(512, 10)
])
def forward(self, x):
x = self.flatten(x)
for layer in self.block:
x = layer(x)
return x
class MyNewNet(MyModel):
def __init__(self):
super(MyNewNet, self).__init__()
self.block.insert(2, nn.Linear(512, 256)) # 插入新层
self.block.insert(3, nn.ReLU()) # 插入新的激活函数
self.block.insert(4, nn.Linear(256, 512)) # 插入另一层
self.block.insert(5, nn.ReLU()) # 插入激活函数
class MyModelSequential(nn.Module):
def __init__(self):
super(MyModelSequential, self).__init__()
self.block = nn.Sequential(
nn.Flatten(),
nn.Linear(28 * 28, 512),
nn.ReLU(),
nn.Linear(512, 10)
)
def forward(self, x):
y = self.block(x)
return y
if __name__ == "__main__":
# 测试原始模型
my_model = MyModel()
my_model_get_name = my_model._get_name()
print(f"my_model_get_name={my_model_get_name}")
print(f"str(my_model)=\n{str(my_model)}")
print(f"dir(my_model)=\n{dir(my_model)}")
my_model_modules = list(my_model.named_modules())
print(f"my_model_modules=\n{my_model_modules}")
print(f"*"*50)
print("Original Model:")
print(summary(my_model, torch.ones((1, 28, 28))))
# 测试新模型
my_new_model = MyNewNet()
print("\nNew Model:")
print(summary(my_new_model, torch.ones((1, 28, 28))))
my_sequential_model = MyModelSequential()
print("\nSequential Model:")
print(summary(my_sequential_model, torch.ones((1, 28, 28))))
- 结果:
my_model_get_name=MyModel
str(my_model)=
MyModel(
(flatten): Flatten(start_dim=1, end_dim=-1)
(block): ModuleList(
(0): Linear(in_features=784, out_features=512, bias=True)
(1): ReLU()
(2): Linear(in_features=512, out_features=10, bias=True)
)
)
dir(my_model)=
['T_destination', '__annotations__', '__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_apply', '_backward_hooks', '_backward_pre_hooks', '_buffers', '_call_impl', '_forward_hooks', '_forward_hooks_with_kwargs', '_forward_pre_hooks', '_forward_pre_hooks_with_kwargs', '_get_backward_hooks', '_get_backward_pre_hooks', '_get_name', '_is_full_backward_hook', '_load_from_state_dict', '_load_state_dict_post_hooks', '_load_state_dict_pre_hooks', '_maybe_warn_non_full_backward_hook', '_modules', '_named_members', '_non_persistent_buffers_set', '_parameters', '_register_load_state_dict_pre_hook', '_register_state_dict_hook', '_replicate_for_data_parallel', '_save_to_state_dict', '_slow_forward', '_state_dict_hooks', '_state_dict_pre_hooks', '_version', 'add_module', 'apply', 'bfloat16', 'block', 'buffers', 'call_super_init', 'children', 'cpu', 'cuda', 'double', 'dump_patches', 'eval', 'extra_repr', 'flatten', 'float', 'forward', 'get_buffer', 'get_extra_state', 'get_parameter', 'get_submodule', 'half', 'ipu', 'load_state_dict', 'modules', 'named_buffers', 'named_children', 'named_modules', 'named_parameters', 'parameters', 'register_backward_hook', 'register_buffer', 'register_forward_hook', 'register_forward_pre_hook', 'register_full_backward_hook', 'register_full_backward_pre_hook', 'register_load_state_dict_post_hook', 'register_module', 'register_parameter', 'register_state_dict_pre_hook', 'requires_grad_', 'set_extra_state', 'share_memory', 'state_dict', 'to', 'to_empty', 'train', 'training', 'type', 'xpu', 'zero_grad']
my_model_modules=
[('', MyModel(
(flatten): Flatten(start_dim=1, end_dim=-1)
(block): ModuleList(
(0): Linear(in_features=784, out_features=512, bias=True)
(1): ReLU()
(2): Linear(in_features=512, out_features=10, bias=True)
)
)), ('flatten', Flatten(start_dim=1, end_dim=-1)), ('block', ModuleList(
(0): Linear(in_features=784, out_features=512, bias=True)
(1): ReLU()
(2): Linear(in_features=512, out_features=10, bias=True)
)), ('block.0', Linear(in_features=784, out_features=512, bias=True)), ('block.1', ReLU()), ('block.2', Linear(in_features=512, out_features=10, bias=True))]
**************************************************
Original Model:
-----------------------------------------------------------------------
Layer (type) Output Shape Param # Tr. Param #
=======================================================================
Flatten-1 [1, 784] 0 0
Linear-2 [1, 512] 401,920 401,920
ReLU-3 [1, 512] 0 0
Linear-4 [1, 10] 5,130 5,130
=======================================================================
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
-----------------------------------------------------------------------
New Model:
-----------------------------------------------------------------------
Layer (type) Output Shape Param # Tr. Param #
=======================================================================
Flatten-1 [1, 784] 0 0
Linear-2 [1, 512] 401,920 401,920
ReLU-3 [1, 512] 0 0
Linear-4 [1, 256] 131,328 131,328
ReLU-5 [1, 256] 0 0
Linear-6 [1, 512] 131,584 131,584
ReLU-7 [1, 512] 0 0
Linear-8 [1, 10] 5,130 5,130
=======================================================================
Total params: 669,962
Trainable params: 669,962
Non-trainable params: 0
-----------------------------------------------------------------------
Sequential Model:
-----------------------------------------------------------------------
Layer (type) Output Shape Param # Tr. Param #
=======================================================================
Flatten-1 [1, 784] 0 0
Linear-2 [1, 512] 401,920 401,920
ReLU-3 [1, 512] 0 0
Linear-4 [1, 10] 5,130 5,130
=======================================================================
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
-----------------------------------------------------------------------
5. Parameter&Parameter_List&ParmeterDict
新增新的张量,并注册为可以迭代更新的参数,用nn.Parameter类
- python:
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# @FileName :ParametersTest.py
# @Time :2024/11/24 18:55
# @Author :Jason Zhang
import torch
from torch import nn
torch.manual_seed(4223)
class MyOriginalNet(nn.Module):
def __init__(self):
super(MyOriginalNet, self).__init__()
self.linear1 = nn.Linear(3, 4)
self.mytensor = torch.rand(4, 5)
def forward(self, x):
x = self.linear1(x)
return x
class MyParameterNet(nn.Module):
def __init__(self):
super(MyParameterNet, self).__init__()
self.linear1 = nn.Linear(3, 4)
self.my_tensor = nn.Parameter(torch.rand(4, 5))
def forward(self, x):
x = self.linear1(x)
return x
class MyParameterListNet(nn.Module):
def __init__(self):
super(MyParameterListNet, self).__init__()
self.linear1 = nn.Linear(3, 4)
self.my_tensor = nn.ParameterList(torch.rand(4, 5) for i in range(5))
def forward(self, x):
x = self.linear1(x)
return x
class MyParameterDictNet(nn.Module):
def __init__(self):
super(MyParameterDictNet, self).__init__()
self.linear1 = nn.Linear(3, 4)
self.para_dict = nn.ParameterDict({
'left': nn.Parameter(torch.randn(5, 10)),
'right': nn.Parameter(torch.randn(5, 10)),
})
def forward(self, x, choice):
x = self.para_dict[choice].mm(x)
return x
if __name__ == "__main__":
run_code = 0
my_net = MyOriginalNet()
my_original_list = list(my_net.named_parameters())
print(f"my_original_list={my_original_list}")
my_para = MyParameterNet()
my_para_list = list(my_para.named_parameters())
print(f"my_para_list={my_para_list}")
my_parameter_list = MyParameterListNet()
my_parameter_list_1 = list(my_parameter_list.named_parameters())
print(f"my_parameter_list_1={my_parameter_list_1}")
my_para_dict = MyParameterDictNet()
my_para_dict_list = list(my_para_dict.named_parameters())
print(f"my_para_dict_list={my_para_dict_list}")
- 结果:
my_original_list=[('linear1.weight', Parameter containing:
tensor([[-0.5089, 0.1086, -0.3703],
[-0.0486, 0.5186, -0.0712],
[ 0.0793, 0.2897, -0.0913],
[-0.3404, 0.4358, 0.4464]], requires_grad=True)), ('linear1.bias', Parameter containing:
tensor([-0.2931, 0.2479, -0.2552, -0.5016], requires_grad=True))]
my_para_list=[('my_tensor', Parameter containing:
tensor([[0.3348, 0.5709, 0.6592, 0.7997, 0.2638],
[0.9706, 0.9607, 0.1166, 0.5584, 0.6739],
[0.4050, 0.1469, 0.6307, 0.0260, 0.3300],
[0.4974, 0.1928, 0.4940, 0.1852, 0.1085]], requires_grad=True)), ('linear1.weight', Parameter containing:
tensor([[-0.3481, -0.0894, -0.3917],
[ 0.1817, 0.2657, 0.5464],
[ 0.5769, 0.2373, -0.3456],
[-0.2842, -0.1695, 0.4080]], requires_grad=True)), ('linear1.bias', Parameter containing:
tensor([-0.3280, 0.3507, -0.4470, 0.0984], requires_grad=True))]
my_parameter_list_1=[('linear1.weight', Parameter containing:
tensor([[-0.1617, -0.1333, 0.2694],
[-0.3950, -0.5114, 0.0524],
[ 0.1599, 0.1093, -0.1124],
[ 0.4692, 0.1502, 0.4282]], requires_grad=True)), ('linear1.bias', Parameter containing:
tensor([ 0.5326, -0.0331, 0.4625, 0.4384], requires_grad=True)), ('my_tensor.0', Parameter containing:
tensor([[0.8858, 0.7725, 0.5914, 0.3476, 0.6177],
[0.6986, 0.8195, 0.8608, 0.9989, 0.1673],
[0.5300, 0.5413, 0.5605, 0.9120, 0.7765],
[0.8869, 0.3050, 0.5276, 0.8894, 0.4718]], requires_grad=True)), ('my_tensor.1', Parameter containing:
tensor([[0.3136, 0.8541, 0.6999, 0.2314, 0.2391],
[0.8063, 0.6426, 0.4157, 0.5995, 0.3899],
[0.2969, 0.5717, 0.6532, 0.3171, 0.0089],
[0.8649, 0.7355, 0.9127, 0.9174, 0.8873]], requires_grad=True)), ('my_tensor.2', Parameter containing:
tensor([[0.7101, 0.3629, 0.4214, 0.1725, 0.3527],
[0.5847, 0.5381, 0.2783, 0.5601, 0.4330],
[0.9955, 0.7466, 0.2785, 0.6853, 0.7695],
[0.5083, 0.2306, 0.2742, 0.7975, 0.6680]], requires_grad=True)), ('my_tensor.3', Parameter containing:
tensor([[0.2137, 0.9676, 0.3329, 0.4692, 0.3137],
[0.2052, 0.9377, 0.1016, 0.5990, 0.2175],
[0.8921, 0.0642, 0.9662, 0.7828, 0.7527],
[0.9349, 0.9536, 0.9047, 0.7468, 0.9529]], requires_grad=True)), ('my_tensor.4', Parameter containing:
tensor([[0.3828, 0.8569, 0.5692, 0.6235, 0.8658],
[0.8093, 0.0742, 0.8216, 0.4765, 0.9254],
[0.8145, 0.6869, 0.8607, 0.5033, 0.7655],
[0.8213, 0.7167, 0.3585, 0.6060, 0.9576]], requires_grad=True))]
my_para_dict_list=[('linear1.weight', Parameter containing:
tensor([[-0.4675, 0.2992, 0.3019],
[ 0.4830, -0.5492, -0.1147],
[-0.1806, 0.3557, -0.5125],
[ 0.0112, 0.0357, 0.5018]], requires_grad=True)), ('linear1.bias', Parameter containing:
tensor([ 0.2655, -0.0366, 0.3213, 0.5213], requires_grad=True)), ('para_dict.left', Parameter containing:
tensor([[-0.6760, 1.5255, -0.2469, -0.8515, 1.1868, 0.2585, 1.1479, -1.0624,
0.9016, -0.0555],
[ 1.6491, -0.1097, -0.6965, -0.7961, -0.1093, -0.3175, -0.7918, 0.1021,
1.4860, -1.2344],
[ 1.0438, 0.3725, 1.4540, 1.2188, 0.5695, 1.9102, 0.9539, -1.7334,
1.1806, 0.0480],
[-0.0024, -1.6298, 2.0577, 1.6975, 0.0825, 0.3063, -0.8376, -1.5045,
1.4113, 0.4588],
[-1.0440, -0.8490, -0.8787, -0.0418, -1.3182, 0.6033, -0.8973, 1.0968,
-0.6005, -0.3022]], requires_grad=True)), ('para_dict.right', Parameter containing:
tensor([[-2.5233, 0.7036, 0.8324, 1.6768, -1.2052, -0.0711, 0.5338, -0.4779,
0.0469, -0.0403],
[ 1.3852, -0.3172, -1.3008, 0.1408, -1.9336, -0.9055, -1.7066, -0.2712,
0.5037, 1.5622],
[ 0.1787, -1.3736, -0.9583, -1.8272, -0.6235, 0.7015, 0.3433, -0.4415,
1.0714, -0.7610],
[-0.4396, -0.1573, -0.0357, 0.1194, 1.0966, 1.4011, -1.3459, 1.3431,
0.0034, 2.3680],
[ 1.4473, -0.2111, -1.3596, 0.7426, -1.0766, 1.2246, -0.7292, 1.1326,
0.6913, -1.4395]], requires_grad=True))]