Sparse Aware MIL (sAwMIL)

sawmil.sawmil.sAwMIL `dataclass`

sAwMIL(
    C: float = 1.0,
    kernel: KernelType = "Linear",
    sil_kernel: KernelType = "Linear",
    normalizer: str = "none",
    p: float = 1.0,
    scale_C: bool = True,
    tol: float = 1e-08,
    verbose: bool = False,
    solver: str = "gurobi",
    eta: float = 0.1,
    min_pos_ratio: float = 0.05,
    smil_: sMIL | None = None,
    sil_: SVM | None = None,
    classes_: NDArray[float64] | None = None,
    coef_: NDArray[float64] | None = None,
    intercept_: float | None = None,
    cutoff_: float | None = None,
    solver_params: Optional[dict] = None,
)

Bases: BaseEstimator, ClassifierMixin

Sparse Aware MIL (SVM)

decision_function

decision_function(
    bags: Sequence[Bag] | BagDataset | Sequence[ndarray],
) -> npt.NDArray[np.float64]

Compute the decision function for the given bags.

Source code in src/sawmil/sawmil.py

def decision_function(self, bags: Sequence[Bag] | BagDataset | Sequence[np.ndarray]) -> npt.NDArray[np.float64]:
    '''Compute the decision function for the given bags.'''
    blist = self._coerce_bags(bags)
    if self.sil_ is None:
        raise RuntimeError("sAwMIL is not fitted.")
    scores = np.empty(len(blist), dtype=float)
    for i, b in enumerate(blist):
        if b.n == 0:
            scores[i] = float(self.sil_.intercept_ or 0.0)
        else:
            scores[i] = float(np.max(self.sil_.decision_function(b.X)))
    return scores

fit

fit(
    bags: Sequence[Bag] | BagDataset | Sequence[ndarray],
    y: Optional[NDArray[float64]] = None,
    intra_bag_mask: Optional[Sequence[ndarray]] = None,
) -> "sAwMIL"

Fit the model to the training data.

Source code in src/sawmil/sawmil.py

def fit(
    self,
    bags: Sequence[Bag] | BagDataset | Sequence[np.ndarray],
    y: Optional[npt.NDArray[np.float64]] = None,
    intra_bag_mask: Optional[Sequence[np.ndarray]] = None,
) -> "sAwMIL":
    '''Fit the model to the training data.'''
    # 1) coerce input
    blist = self._coerce_bags(bags, y, intra_bag_mask)
    if not blist:
        raise ValueError("No bags provided.")

    # 2) sMIL (stage 1) — use its decision on singletons to rank instances
    self.__fit_mil__(blist)
    X_sil, y_sil = self.__rank_and_filter__(blist)

    # 7) train instance SVM (stage 2) — pass solver here
    sil = SVM(
        C=self.C,
        kernel=self.kernel,
        solver=self.solver,
        tol=self.tol,
        verbose=self.verbose,
        solver_params=self.solver_params,
    )
    sil.fit(X_sil, y_sil)
    self.sil_ = sil

    self.coef_ = sil.coef_.ravel() if sil.coef_ is not None else None
    self.intercept_ = float(
        sil.intercept_) if sil.intercept_ is not None else None
    return self

predict

predict(
    bags: Sequence[Bag] | BagDataset | Sequence[ndarray],
) -> npt.NDArray[np.float64]

Predict the labels for the given bags.

Source code in src/sawmil/sawmil.py

def predict(self, bags: Sequence[Bag] | BagDataset | Sequence[np.ndarray]) -> npt.NDArray[np.float64]:
    '''Predict the labels for the given bags.'''
    return (self.decision_function(bags) >= 0.0).astype(float)

score

score(bags, y_true) -> float

Compute the accuracy of the model on the given bags.

Source code in src/sawmil/sawmil.py

def score(self, bags, y_true) -> float:
    '''Compute the accuracy of the model on the given bags.'''
    y_pred = self.predict(bags)
    if isinstance(bags, BagDataset):
        y_true_arr = np.asarray([b.y for b in bags.bags], dtype=float)
    elif len(bags) and isinstance(bags[0], Bag):  # type: ignore[index]
        y_true_arr = np.asarray([b.y for b in bags], dtype=float)
    else:
        y_true_arr = np.asarray(y_true, dtype=float)
    return float((y_pred == y_true_arr).mean())

Sparse Aware MIL (sAwMIL)

sawmil.sawmil.sAwMIL dataclass

decision_function

fit

predict

score

sawmil.sawmil.sAwMIL `dataclass`