Skip to content

Sparse Aware MIL (sAwMIL)

sawmil.sawmil.sAwMIL dataclass

sAwMIL(
    C: float = 1.0,
    kernel: KernelType = "Linear",
    sil_kernel: KernelType = "Linear",
    normalizer: str = "none",
    p: float = 1.0,
    scale_C: bool = True,
    tol: float = 1e-08,
    verbose: bool = False,
    solver: str = "gurobi",
    eta: float = 0.1,
    min_pos_ratio: float = 0.05,
    smil_: sMIL | None = None,
    sil_: SVM | None = None,
    classes_: NDArray[float64] | None = None,
    coef_: NDArray[float64] | None = None,
    intercept_: float | None = None,
    cutoff_: float | None = None,
    solver_params: Optional[dict] = None,
)

Bases: BaseEstimator, ClassifierMixin

Sparse Aware MIL (SVM)

decision_function

decision_function(
    bags: Sequence[Bag] | BagDataset | Sequence[ndarray],
) -> npt.NDArray[np.float64]

Compute the decision function for the given bags.

Source code in src/sawmil/sawmil.py
193
194
195
196
197
198
199
200
201
202
203
204
def decision_function(self, bags: Sequence[Bag] | BagDataset | Sequence[np.ndarray]) -> npt.NDArray[np.float64]:
    '''Compute the decision function for the given bags.'''
    blist = self._coerce_bags(bags)
    if self.sil_ is None:
        raise RuntimeError("sAwMIL is not fitted.")
    scores = np.empty(len(blist), dtype=float)
    for i, b in enumerate(blist):
        if b.n == 0:
            scores[i] = float(self.sil_.intercept_ or 0.0)
        else:
            scores[i] = float(np.max(self.sil_.decision_function(b.X)))
    return scores

fit

fit(
    bags: Sequence[Bag] | BagDataset | Sequence[ndarray],
    y: Optional[NDArray[float64]] = None,
    intra_bag_mask: Optional[Sequence[ndarray]] = None,
) -> "sAwMIL"

Fit the model to the training data.

Source code in src/sawmil/sawmil.py
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
def fit(
    self,
    bags: Sequence[Bag] | BagDataset | Sequence[np.ndarray],
    y: Optional[npt.NDArray[np.float64]] = None,
    intra_bag_mask: Optional[Sequence[np.ndarray]] = None,
) -> "sAwMIL":
    '''Fit the model to the training data.'''
    # 1) coerce input
    blist = self._coerce_bags(bags, y, intra_bag_mask)
    if not blist:
        raise ValueError("No bags provided.")

    # 2) sMIL (stage 1) — use its decision on singletons to rank instances
    self.__fit_mil__(blist)
    X_sil, y_sil = self.__rank_and_filter__(blist)

    # 7) train instance SVM (stage 2) — pass solver here
    sil = SVM(
        C=self.C,
        kernel=self.kernel,
        solver=self.solver,
        tol=self.tol,
        verbose=self.verbose,
        solver_params=self.solver_params,
    )
    sil.fit(X_sil, y_sil)
    self.sil_ = sil

    self.coef_ = sil.coef_.ravel() if sil.coef_ is not None else None
    self.intercept_ = float(
        sil.intercept_) if sil.intercept_ is not None else None
    return self

predict

predict(
    bags: Sequence[Bag] | BagDataset | Sequence[ndarray],
) -> npt.NDArray[np.float64]

Predict the labels for the given bags.

Source code in src/sawmil/sawmil.py
206
207
208
def predict(self, bags: Sequence[Bag] | BagDataset | Sequence[np.ndarray]) -> npt.NDArray[np.float64]:
    '''Predict the labels for the given bags.'''
    return (self.decision_function(bags) >= 0.0).astype(float)

score

score(bags, y_true) -> float

Compute the accuracy of the model on the given bags.

Source code in src/sawmil/sawmil.py
210
211
212
213
214
215
216
217
218
219
def score(self, bags, y_true) -> float:
    '''Compute the accuracy of the model on the given bags.'''
    y_pred = self.predict(bags)
    if isinstance(bags, BagDataset):
        y_true_arr = np.asarray([b.y for b in bags.bags], dtype=float)
    elif len(bags) and isinstance(bags[0], Bag):  # type: ignore[index]
        y_true_arr = np.asarray([b.y for b in bags], dtype=float)
    else:
        y_true_arr = np.asarray(y_true, dtype=float)
    return float((y_pred == y_true_arr).mean())