Reference
Fuzzy C-means Model
Attributes:
Name | Type | Description |
---|---|---|
n_clusters |
int |
The number of clusters to form as well as the number |
max_iter |
int |
Maximum number of iterations of the fuzzy C-means |
m |
float |
Degree of fuzziness: \(m \in (1, \infty)\). |
error |
float |
Relative tolerance with regards to Frobenius norm of |
random_state |
Optional[int] |
Determines random number generation for |
trained |
bool |
Variable to store whether or not the model has been |
Returns:
Type | Description |
---|---|
FCM |
A FCM model. |
Exceptions:
Type | Description |
---|---|
ReferenceError |
If called without the model being trained |
Source code in fcmeans/main.py
class FCM(BaseModel):
r"""Fuzzy C-means Model
Attributes:
n_clusters (int): The number of clusters to form as well as the number
of centroids to generate by the fuzzy C-means.
max_iter (int): Maximum number of iterations of the fuzzy C-means
algorithm for a single run.
m (float): Degree of fuzziness: $m \in (1, \infty)$.
error (float): Relative tolerance with regards to Frobenius norm of
the difference
in the cluster centers of two consecutive iterations to declare
convergence.
random_state (Optional[int]): Determines random number generation for
centroid initialization.
Use an int to make the randomness deterministic.
trained (bool): Variable to store whether or not the model has been
trained.
Returns:
FCM: A FCM model.
Raises:
ReferenceError: If called without the model being trained
"""
class Config:
extra = Extra.allow
arbitrary_types_allowed = True
n_clusters: int = Field(5, ge=1, le=100)
max_iter: int = Field(150, ge=1, le=1000)
m: float = Field(2.0, ge=1.0)
error: float = Field(1e-5, ge=1e-9)
random_state: Optional[int] = None
trained: bool = Field(False, const=True)
@validate_arguments(config=dict(arbitrary_types_allowed=True))
def fit(self, X: NDArray) -> None:
"""Train the fuzzy-c-means model
Args:
X (NDArray): Training instances to cluster.
"""
self.rng = np.random.default_rng(self.random_state)
n_samples = X.shape[0]
self.u = self.rng.uniform(size=(n_samples, self.n_clusters))
self.u = self.u / np.tile(
self.u.sum(axis=1)[np.newaxis].T, self.n_clusters
)
for _ in range(self.max_iter):
u_old = self.u.copy()
self._centers = FCM._next_centers(X, self.u, self.m)
self.u = self.soft_predict(X)
# Stopping rule
if np.linalg.norm(self.u - u_old) < self.error:
break
self.trained = True
@validate_arguments(config=dict(arbitrary_types_allowed=True))
def soft_predict(self, X: NDArray) -> NDArray:
"""Soft predict of FCM
Args:
X (NDArray): New data to predict.
Returns:
NDArray: Fuzzy partition array, returned as an array with
n_samples rows and n_clusters columns.
"""
temp = FCM._dist(X, self._centers) ** (2 / (self.m - 1))
denominator_ = temp.reshape((X.shape[0], 1, -1)).repeat(
temp.shape[-1], axis=1
)
denominator_ = temp[:, :, np.newaxis] / denominator_
return 1 / denominator_.sum(2)
@validate_arguments(config=dict(arbitrary_types_allowed=True))
def predict(self, X: NDArray) -> NDArray:
"""Predict the closest cluster each sample in X belongs to.
Args:
X (NDArray): New data to predict.
Raises:
ReferenceError: If it called without the model being trained.
Returns:
NDArray: Index of the cluster each sample belongs to.
"""
if self._is_trained():
X = np.expand_dims(X, axis=0) if len(X.shape) == 1 else X
return self.soft_predict(X).argmax(axis=-1)
raise ReferenceError(
"You need to train the model. Run `.fit()` method to this."
)
def _is_trained(self) -> bool:
if self.trained:
return True
return False
@staticmethod
def _dist(A: NDArray, B: NDArray) -> NDArray:
"""Compute the euclidean distance two matrices"""
return np.sqrt(np.einsum("ijk->ij", (A[:, None, :] - B) ** 2))
@staticmethod
def _next_centers(X: NDArray, u: NDArray, m: float):
"""Update cluster centers"""
um = u**m
return (X.T @ um / np.sum(um, axis=0)).T
@property
def centers(self) -> NDArray:
if self._is_trained():
return self._centers
raise ReferenceError(
"You need to train the model. Run `.fit()` method to this."
)
@property
def partition_coefficient(self) -> float:
"""Partition coefficient
Equation 12a of
[this paper](https://doi.org/10.1016/0098-3004(84)90020-7).
"""
if self._is_trained():
return np.mean(self.u**2)
raise ReferenceError(
"You need to train the model. Run `.fit()` method to this."
)
@property
def partition_entropy_coefficient(self):
if self._is_trained():
return -np.mean(self.u * np.log2(self.u))
raise ReferenceError(
"You need to train the model. Run `.fit()` method to this."
)
partition_coefficient: float
property
readonly
Partition coefficient
Equation 12a of this paper.
fit(self, X)
Train the fuzzy-c-means model
Parameters:
Name | Type | Description | Default |
---|---|---|---|
X |
NDArray |
Training instances to cluster. |
required |
Source code in fcmeans/main.py
@validate_arguments(config=dict(arbitrary_types_allowed=True))
def fit(self, X: NDArray) -> None:
"""Train the fuzzy-c-means model
Args:
X (NDArray): Training instances to cluster.
"""
self.rng = np.random.default_rng(self.random_state)
n_samples = X.shape[0]
self.u = self.rng.uniform(size=(n_samples, self.n_clusters))
self.u = self.u / np.tile(
self.u.sum(axis=1)[np.newaxis].T, self.n_clusters
)
for _ in range(self.max_iter):
u_old = self.u.copy()
self._centers = FCM._next_centers(X, self.u, self.m)
self.u = self.soft_predict(X)
# Stopping rule
if np.linalg.norm(self.u - u_old) < self.error:
break
self.trained = True
predict(self, X)
Predict the closest cluster each sample in X belongs to.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
X |
NDArray |
New data to predict. |
required |
Exceptions:
Type | Description |
---|---|
ReferenceError |
If it called without the model being trained. |
Returns:
Type | Description |
---|---|
NDArray |
Index of the cluster each sample belongs to. |
Source code in fcmeans/main.py
@validate_arguments(config=dict(arbitrary_types_allowed=True))
def predict(self, X: NDArray) -> NDArray:
"""Predict the closest cluster each sample in X belongs to.
Args:
X (NDArray): New data to predict.
Raises:
ReferenceError: If it called without the model being trained.
Returns:
NDArray: Index of the cluster each sample belongs to.
"""
if self._is_trained():
X = np.expand_dims(X, axis=0) if len(X.shape) == 1 else X
return self.soft_predict(X).argmax(axis=-1)
raise ReferenceError(
"You need to train the model. Run `.fit()` method to this."
)
soft_predict(self, X)
Soft predict of FCM
Parameters:
Name | Type | Description | Default |
---|---|---|---|
X |
NDArray |
New data to predict. |
required |
Returns:
Type | Description |
---|---|
NDArray |
Fuzzy partition array, returned as an array with n_samples rows and n_clusters columns. |
Source code in fcmeans/main.py
@validate_arguments(config=dict(arbitrary_types_allowed=True))
def soft_predict(self, X: NDArray) -> NDArray:
"""Soft predict of FCM
Args:
X (NDArray): New data to predict.
Returns:
NDArray: Fuzzy partition array, returned as an array with
n_samples rows and n_clusters columns.
"""
temp = FCM._dist(X, self._centers) ** (2 / (self.m - 1))
denominator_ = temp.reshape((X.shape[0], 1, -1)).repeat(
temp.shape[-1], axis=1
)
denominator_ = temp[:, :, np.newaxis] / denominator_
return 1 / denominator_.sum(2)