Message-Embedding/loss.py at master · DolphyWind/Message-Embedding · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import torch
import torch.nn.functional as F


def triplet_loss(
    *,
    anchors: torch.Tensor,
    positives: torch.Tensor,
    negatives: torch.Tensor,
    margin: float,
    **kwargs,
) -> torch.Tensor:
    a = F.normalize(anchors, dim=1)
    p = F.normalize(positives, dim=1)
    n = F.normalize(negatives, dim=1)

    d_ap = torch.linalg.norm(a - p, dim=1)
    d_an = torch.linalg.norm(a - n, dim=1)

    return torch.mean(F.relu(d_ap - d_an + margin))


def infonce_loss(
    *,
    anchors: torch.Tensor,
    positives: torch.Tensor,
    temperature: float,
    **kwargs,
) -> torch.Tensor:
    a = F.normalize(anchors, dim=1)
    p = F.normalize(positives, dim=1)

    logits: torch.Tensor = a @ p.T
    logits /= temperature
    targets: torch.Tensor = torch.arange(logits.size(0), device=logits.device)
    return F.cross_entropy(logits, targets)


def clip_loss(
    *,
    anchors: torch.Tensor,
    positives: torch.Tensor,
    temperature: float,
    **kwargs,
) -> torch.Tensor:
    return 0.5 * (
        infonce_loss(
            anchors=anchors,
            positives=positives,
            temperature=temperature
        ) +
        infonce_loss(
            anchors=positives,
            positives=anchors,
            temperature=temperature
        )
    )


def multipositive_infonce_loss(
    *,
    anchors: torch.Tensor,
    positives: torch.Tensor,
    temperature: float,
    **kwargs,
) -> torch.Tensor:
    a = F.normalize(anchors, dim=1)
    p = F.normalize(positives, dim=1)

    logits: torch.Tensor = a @ p.T
    logits /= temperature

    n = a.size(0)
    m = p.size(0)
    k = m // n

    log_probs = logits - torch.logsumexp(logits, dim=1, keepdim=True)

    idx = torch.arange(n, device=logits.device)
    pos_idx = idx[:, None] * k + torch.arange(k, device=logits.device)
    pos_mask = torch.zeros_like(logits, dtype=logits.dtype)
    pos_mask.scatter_(1, pos_idx, 1.0)

    loss = -(log_probs * pos_mask).sum(dim=1) / k
    return loss.mean()