Skip to content

Sparse MIL

sawmil.smil.sMIL dataclass

sMIL(
    C: float = 1.0,
    kernel: KernelType = "linear",
    solver: str = "gurobi",
    *,
    normalizer: Literal[
        "none", "average", "featurespace"
    ] = "none",
    p: float = 1.0,
    use_intra_labels: bool = False,
    fast_linear: bool = True,
    scale_C: bool = True,
    tol: float = 1e-08,
    verbose: bool = False,
    solver_params: Optional[Mapping[str, Any]] = None,
)

Bases: NSK

Sparse MIL (Bunescu & Mooney, 2007) implemented on top of NSK.

Training set
  • Every negative instance becomes its own 1-instance bag (label -1).
  • Every positive bag stays a bag (label +1).
Dual tweaks
  • Linear term for positive bags: f_j = 2/|B_j| - 1
  • Box constraints: iC for negatives, bC for positives (scaled if scale_C)
Notes
  • By default we ignore intra-bag labels (uniform instance weights).
  • Use your NSK's bag kernel; mean aggregator with normalizer="none" is a good default.
Source code in src/sawmil/nsk.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def __init__(
    self,
    C: float = 1.0,
    # If bag_kernel is None, we'll build one from this instance-kernel spec:
    kernel: KernelType = "linear",
    solver: str = 'gurobi',
    *,
    # Bag kernel settings:
    normalizer: Literal["none", "average", "featurespace"] = "none",
    p: float = 1.0,
    use_intra_labels: bool = False,
    fast_linear: bool = True,
    # Solver / SVM settings:
    scale_C: bool = True,
    tol: float = 1e-8,
    verbose: bool = False,
    solver_params: Optional[Mapping[str, Any]] = None
) -> "NSK":
    """
    Initialize the NSK model.

    Args:
        C: Regularization parameter.
        kernel: Kernel type (default: "linear").
        solver: Solver to use (default: "gurobi").
        normalizer: Bag kernel normalization method (default: "none").
        p: Parameter for bag kernel (default: 1.0).
        use_intra_labels: Whether to use intra-bag labels (default: False).
        fast_linear: Whether to use fast linear approximation (default: True).
        scale_C: Whether to scale C (default: True).
        tol: Tolerance for stopping criteria (default: 1e-8).
        verbose: Whether to print verbose output (default: False).
        solver_params: Additional parameters for the solver (default: None).

    Returns:
        NSK: Initialized NSK model.
    """
    # parent SVM stores common attrs; kernel arg unused here
    super().__init__(C=C, kernel=kernel, tol=tol, verbose=verbose, solver=solver)
    self.scale_C = scale_C

    # How to build the bag kernel (if not provided)
    self.kernel = kernel
    # Bag Kernel
    self.normalizer = normalizer
    self.p = p
    self.use_intra_labels = use_intra_labels
    self.fast_linear = fast_linear
    self.bag_kernel = make_bag_kernel(inst_kernel=self.kernel, normalizer=self.normalizer,
                                      p=self.p, use_intra_labels=self.use_intra_labels, fast_linear=self.fast_linear)
    self.solver_params = dict(solver_params or {})

    # Fitted state
    # training bags (ordering does not matter)
    self.bags_: Optional[List[Bag]] = None

fit

fit(
    bags: Sequence[Bag] | BagDataset | Sequence[ndarray],
    y: Optional[NDArray[float64]] = None,
) -> "sMIL"

Fit the model to the training data.

Source code in src/sawmil/smil.py
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def fit(self, bags: Sequence[Bag] | BagDataset | Sequence[np.ndarray],
        y: Optional[npt.NDArray[np.float64]] = None) -> "sMIL":
    '''Fit the model to the training data.'''
    # 1) coerce inputs and build sMIL training set
    init_bags, _ = self._coerce_bags_and_labels(bags, y)
    train_bags, y_train, S_n, B_p = self._build_init_training(init_bags)
    if not train_bags:
        raise ValueError("No training data after sMIL transformation.")
    self.bags_ = train_bags

    # map to {-1,+1} and store classes
    classes = np.unique(y_train)
    if classes.size != 2:
        raise ValueError("Binary classification only.")
    self.classes_ = classes.astype(float)
    Y = np.where(y_train == classes[0], -1.0, 1.0)
    self.y_ = Y

    # 2) bag kernel Gram
    bk = self._ensure_bag_kernel()
    bk.fit(train_bags)
    K = bk(train_bags, train_bags)                     # (n, n)

    # 3) QP pieces
    H = (Y[:, None] * Y[None, :]) * K
    n = len(train_bags)
    f = -np.ones(n, dtype=float)
    sizes = np.array([b.n for b in train_bags[S_n:]], dtype=float)
    f[S_n:] = (2.0 / np.maximum(sizes, 1.0)) - 1.0

    Aeq = Y.reshape(1, -1)
    beq = np.array([0.0], dtype=float)

    # per-variable box constraints
    if self.scale_C:
        iC = float(self.C) / max(S_n, 1)
        bC = float(self.C) / max(B_p, 1)
    else:
        iC = float(self.C)
        bC = float(self.C)
    lb = np.zeros(n, dtype=float)
    ub = np.concatenate([np.full(S_n, iC), np.full(B_p, bC)]).astype(float)

    # 4) solve
    alpha, _ = quadprog(H, f, Aeq, beq, lb, ub,
                        verbose=self.verbose, solver=self.solver)
    self.alpha_ = alpha

    # 5) SVs + intercept (dual)
    sv_mask = alpha > self.tol
    self.support_ = np.flatnonzero(sv_mask).astype(int)
    self.support_vectors_ = [train_bags[i] for i in self.support_]
    self.dual_coef_ = (alpha[sv_mask] * Y[sv_mask]).reshape(1, -1)

    caps = np.concatenate(
        [np.full(S_n, iC), np.full(B_p, bC)]).astype(float)
    on_margin = (alpha > self.tol) & (alpha < (caps - self.tol))
    if not np.any(on_margin):
        on_margin = sv_mask
    b_vals = Y[on_margin] - (alpha * Y) @ K[:, on_margin]
    self.intercept_ = float(np.mean(b_vals)) if b_vals.size else 0.0

    # 6) optional primal recovery (linearizable case: Linear + WeightedMean + p==1)
    if isinstance(bk, WeightedMeanBagKernel) and isinstance(bk.inst_kernel, Linear) and abs(bk.p - 1.0) < 1e-12:
        # φ(B) consistent with your NSK (uniform weights, chosen normalizer)
        Z = np.stack([self._phi(b, normalizer=bk.normalizer)
                     for b in train_bags], axis=0)  # (n, d)
        self.coef_ = (self.alpha_ * self.y_) @ Z
        use = on_margin if np.any(on_margin) else sv_mask
        if np.any(use):
            self.intercept_ = float(
                np.mean(self.y_[use] - Z[use] @ self.coef_))
    else:
        self.coef_ = None  # keep dual intercept

    return self