Spaces:
Sleeping
Sleeping
| import datasets | |
| import evaluate | |
| from sklearn.metrics import ( | |
| adjusted_mutual_info_score, | |
| adjusted_rand_score, | |
| calinski_harabasz_score, | |
| completeness_score, | |
| davies_bouldin_score, | |
| fowlkes_mallows_score, | |
| homogeneity_score, | |
| silhouette_score, | |
| ) | |
| from sklearn.metrics.cluster import contingency_matrix, pair_confusion_matrix | |
| _CITATION = """ | |
| @article{scikit-learn, | |
| title={Scikit-learn: Machine Learning in {P}ython}, | |
| author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. | |
| and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. | |
| and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and | |
| Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, | |
| journal={Journal of Machine Learning Research}, | |
| volume={12}, | |
| pages={2825--2830}, | |
| year={2011} | |
| } | |
| """ | |
| _DESCRIPTION = """\ | |
| This evaluator computes multiple clustering metrics to assess the quality of a clustering. | |
| By default, the evaluator works as in an unsupervised setting, evaluating the clustering just from | |
| the samples and the predictions. | |
| However, it allows to compute additional metrics when truth labels are passed too, which is not shown in this demo. | |
| """ | |
| _KWARGS_DESCRIPTION = """ | |
| Computes the quality of clustering results. | |
| Args: | |
| samples: vector representations | |
| predictions: predicted cluster labels | |
| truth_labels (optional): truth labels to compute additional metrics | |
| Returns: | |
| silhouete_score | |
| davies_bouldin_score | |
| calinski_harabasz_score | |
| completeness_score | |
| davies_bouldin_score | |
| fowlkes_mallows_score | |
| homogeneity_score | |
| silhouette_score | |
| contingency_matrix | |
| pair_confusion_matrix | |
| """ | |
| class ClusteringEvaluator(evaluate.Metric): | |
| def _info(self): | |
| return evaluate.MetricInfo( | |
| module_type="metric", | |
| description=_DESCRIPTION, | |
| citation=_CITATION, | |
| inputs_description=_KWARGS_DESCRIPTION, | |
| features=datasets.Features( | |
| { | |
| "samples": datasets.Sequence(datasets.Value("float32")), | |
| "predictions": datasets.Value("int64"), | |
| } | |
| ), | |
| ) | |
| def _compute(self, samples, predictions, truth_labels=None): | |
| unsupervised_metrics = [ | |
| silhouette_score, | |
| davies_bouldin_score, | |
| calinski_harabasz_score, | |
| ] | |
| supervised_metrics = [ | |
| adjusted_rand_score, | |
| adjusted_mutual_info_score, | |
| homogeneity_score, | |
| completeness_score, | |
| fowlkes_mallows_score, | |
| contingency_matrix, | |
| pair_confusion_matrix, | |
| ] | |
| results = {} | |
| # Compute unsupervised metrics always | |
| for fn in unsupervised_metrics: | |
| results[fn.__name__] = float(fn(samples, predictions)) | |
| # Compute supervised metrics if reference labels are passed | |
| if truth_labels is not None: | |
| for fn in supervised_metrics: | |
| score = fn(truth_labels, predictions) | |
| try: | |
| score = float(score) | |
| except (AttributeError, TypeError): | |
| pass | |
| results[fn.__name__] = score | |
| return results | |