1.示例1
logit=[0.5 -1.0 2.0]
targets=[1 0 1]
# δ=sigmoid(x)
p=δ(logits)=[δ(0.5) δ(-1.0) δ(2.0)]=[0.6225 0.2689 0.8808]
loss1 = -[1 · log(0.6225) + 0 · log(1 - 0.6225)] ≈ 0.4744
loss2 = -[0 · log(0.2689) + 1 · log(1 - 0.2689)] ≈ 0.3133
loss3 = -[1 · log(0.8808) + 0 · log(1 - 0.8808)] ≈ 0.1269
total_loss = loss1 + loss2 + loss3 ≈ 0.4744 + 0.3133 + 0.1269 = 0.9143
2.分布计算演示
import torch
logits = torch.tensor([[0.5], [-1.0], [2.0]], dtype=torch.float32)
targets = torch.tensor([[1.0], [0.0], [1.0]], dtype=torch.float32)
probs = torch.sigmoid(logits)
# Use the formula: - [ y * log(p) + (1 - y) * log(1 - p) ]
loss_per_sample = - (targets * torch.log(probs) + (1 - targets) * torch.log(1 - probs))
total_loss = torch.sum(loss_per_sample)
print(total_loss.item())
# 0.9142667055130005
3.BCEWithLogitsLoss实现
import torch
import torch.nn as nn
# Define logits and targets
logits = torch.tensor([[0.5], [-1.0], [2.0]], dtype=torch.float32)
targets = torch.tensor([[1.0], [0.0], [1.0]], dtype=torch.float32)
# Define the loss function
loss_fn = nn.BCEWithLogitsLoss(reduction='sum')
# Compute the loss
loss = loss_fn(logits, targets)
print(loss.item())
# 0.9142667055130005