当前位置: 首页 > article >正文

AutoML-sklearn and torch

一、auto-sklearn

1.1 环境依赖

  1. 额外安装swig 第三方库

  2. linux 支持, mac,windows不支持

1.2 示例代码

time_left_for_this_task 设定任务最大时间

per_run_time_limit 每个子任务最大训练时间

include 可以限制任务训练的模型

import autosklearn.classification
import sklearn.model_selection
from sklearn import datasets
import sklearn.metrics

if __name__ == "__main__":
    X, y = datasets.load_breast_cancer(return_X_y=True)

    X_train, X_test, y_train, y_test = \
        sklearn.model_selection.train_test_split(X, y, random_state=1)
    automl = autosklearn.classification.AutoSklearnClassifier(
        time_left_for_this_task=120,
        per_run_time_limit=30,
        tmp_folder="/tmp/autosklearn_classification_example_tmp",
        include={
            'classifier': ["random_forest"],
            'feature_preprocessor': ["no_preprocessing"]
        }
    )
    automl.fit(X_train, y_train)
    y_hat = automl.predict(X_test)
    automl.get_models_with_weights()
    print("Accuracy score", sklearn.metrics.accuracy_score(y_test, y_hat))
    print(automl.leaderboard())
    models_with_weights = automl.get_models_with_weights()
    with open('../../preprocess/models_report.txt', 'w') as f:
        for model in models_with_weights:
            f.write(str(model) + '\n')

结果展示:

可以展示参数任务cost值排列顺序
在这里插入图片描述
以及训练参数配置:
在这里插入图片描述

1.3 模块扩展

在不支持的训练模块,可以扩展及自定义模型进行自动调参

代码示例:

继承AutoSklearnClassificationAlgorithm 并重写子方法

autosklearn.pipeline.components.classification.add_classifier(MLPClassifier) 将自定义模块注册至模块中

include 参数添加既可调用

"""
====================================================
Extending Auto-Sklearn with Classification Component
====================================================

The following example demonstrates how to create a new classification
component for using in auto-sklearn.
"""
from typing import Optional
from pprint import pprint

from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.hyperparameters import (
    CategoricalHyperparameter,
    UniformIntegerHyperparameter,
    UniformFloatHyperparameter,
)

import sklearn.metrics

from autosklearn.askl_typing import FEAT_TYPE_TYPE
import autosklearn.classification
import autosklearn.pipeline.components.classification
from autosklearn.pipeline.components.base import AutoSklearnClassificationAlgorithm
from autosklearn.pipeline.constants import (
    DENSE,
    SIGNED_DATA,
    UNSIGNED_DATA,
    PREDICTIONS,
)

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split


############################################################################
# Create MLP classifier component for auto-sklearn
# ================================================


class MLPClassifier(AutoSklearnClassificationAlgorithm):
    def __init__(
            self,
            hidden_layer_depth,
            num_nodes_per_layer,
            activation,
            alpha,
            solver,
            random_state=None,
    ):
        self.hidden_layer_depth = hidden_layer_depth
        self.num_nodes_per_layer = num_nodes_per_layer
        self.activation = activation
        self.alpha = alpha
        self.solver = solver
        self.random_state = random_state

    def fit(self, X, y):
        self.num_nodes_per_layer = int(self.num_nodes_per_layer)
        self.hidden_layer_depth = int(self.hidden_layer_depth)
        self.alpha = float(self.alpha)

        from sklearn.neural_network import MLPClassifier

        hidden_layer_sizes = tuple(
            self.num_nodes_per_layer for i in range(self.hidden_layer_depth)
        )

        self.estimator = MLPClassifier(
            hidden_layer_sizes=hidden_layer_sizes,
            activation=self.activation,
            alpha=self.alpha,
            solver=self.solver,
            random_state=self.random_state,
        )
        self.estimator.fit(X, y)
        return self

    def predict(self, X):
        if self.estimator is None:
            raise NotImplementedError()
        return self.estimator.predict(X)

    def predict_proba(self, X):
        if self.estimator is None:
            raise NotImplementedError()
        return self.estimator.predict_proba(X)

    @staticmethod
    def get_properties(dataset_properties=None):
        return {
            "shortname": "MLP Classifier",
            "name": "MLP CLassifier",
            "handles_regression": False,
            "handles_classification": True,
            "handles_multiclass": True,
            "handles_multilabel": False,
            "handles_multioutput": False,
            "is_deterministic": False,
            # Both input and output must be tuple(iterable)
            "input": [DENSE, SIGNED_DATA, UNSIGNED_DATA],
            "output": [PREDICTIONS],
        }

    @staticmethod
    def get_hyperparameter_search_space(
            feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
    ):
        cs = ConfigurationSpace()
        hidden_layer_depth = UniformIntegerHyperparameter(
            name="hidden_layer_depth", lower=1, upper=3, default_value=1
        )
        num_nodes_per_layer = UniformIntegerHyperparameter(
            name="num_nodes_per_layer", lower=16, upper=216, default_value=32
        )
        activation = CategoricalHyperparameter(
            name="activation",
            choices=["identity", "logistic", "tanh", "relu"],
            default_value="relu",
        )
        alpha = UniformFloatHyperparameter(
            name="alpha", lower=0.0001, upper=1.0, default_value=0.0001
        )
        solver = CategoricalHyperparameter(
            name="solver", choices=["lbfgs", "sgd", "adam"], default_value="adam"
        )
        cs.add_hyperparameters(
            [
                hidden_layer_depth,
                num_nodes_per_layer,
                activation,
                alpha,
                solver,
            ]
        )
        return cs


# Add MLP classifier component to auto-sklearn.
autosklearn.pipeline.components.classification.add_classifier(MLPClassifier)
cs = MLPClassifier.get_hyperparameter_search_space()
print(cs)


############################################################################
# Data Loading
# ============
def get_local_csv():
    import pandas as pd
    import numpy as np
    df = pd.read_csv("/data/projects/example/auto_ml/Radiomics-2D/features.csv")
    label = pd.read_csv("/data/projects/example/auto_ml/Radiomics-2D/labels.csv")["label"]
    label = np.array([1 if l == "Positive" else 0 for l in label])
    return df.to_numpy(), label


# local
X, y = get_local_csv()

# breast cancer
# X, y = load_breast_cancer(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X, y)

############################################################################
# Fit MLP classifier to the data
# ==============================

clf = autosklearn.classification.AutoSklearnClassifier(
    time_left_for_this_task=60,
    per_run_time_limit=30,
    include={"classifier": ["gradient_boosting", "adaboost", "MLPClassifier"],
             'feature_preprocessor': ["no_preprocessing"]},
)
clf.fit(X_train, y_train)

############################################################################
# Print test accuracy and statistics
# ==================================

y_pred = clf.predict(X_test)
print("accuracy: ", sklearn.metrics.accuracy_score(y_pred, y_test))
print(clf.sprint_statistics())
print(clf.leaderboard(detailed=False,top_k=30))
pprint(clf.show_models(), indent=4)

models_with_weights = clf.get_models_with_weights()
with open('./models_report.txt', 'w') as f:
    for model in models_with_weights:
        f.write(str(model) + '\n')

二、auto-pytorch

1. 1 环境依赖

额外安装brew install cmake

lightgbm 库依赖第三方库 pip install lightgbm

brew install libomp

pip install autoPyTorch

mac 允许不限制memory, M1 芯片对内容限制的操作目前还有bug

在这里插入图片描述

1.2 支持用法

支持大量的表格型数据,图片数据支持少,且不支持扩展
在这里插入图片描述
代码示例:

用法比较固定,没有更多的文档来作为参考,且无法扩展。

import numpy as np

import sklearn.model_selection

import torchvision.datasets

from autoPyTorch.pipeline.image_classification import ImageClassificationPipeline

# Get the training data for tabular classification
trainset = torchvision.datasets.FashionMNIST(root='../datasets/', train=True, download=True)
data = trainset.data.numpy()
data = np.expand_dims(data, axis=3)
# Create a proof of concept pipeline!
dataset_properties = dict()
pipeline = ImageClassificationPipeline(dataset_properties=dataset_properties)

# Train and test split
train_indices, val_indices = sklearn.model_selection.train_test_split(
    list(range(data.shape[0])),
    random_state=1,
    test_size=0.25,
)

# Configuration space
pipeline_cs = pipeline.get_hyperparameter_search_space()
print("Pipeline CS:\n", '_' * 40, f"\n{pipeline_cs}")
config = pipeline_cs.sample_configuration()
print("Pipeline Random Config:\n", '_' * 40, f"\n{config}")
pipeline.set_hyperparameters(config)

# Fit the pipeline
print("Fitting the pipeline...")

pipeline.fit(X=dict(X_train=data,
                    is_small_preprocess=True,
                    dataset_properties=dict(mean=np.array([np.mean(data[:, :, :, i]) for i in range(1)]),
                                            std=np.array([np.std(data[:, :, :, i]) for i in range(1)]),
                                            num_classes=10,
                                            num_features=data.shape[1] * data.shape[2],
                                            image_height=data.shape[1],
                                            image_width=data.shape[2],
                                            is_small_preprocess=True),
                    train_indices=train_indices,
                    val_indices=val_indices,
                    )
             )

# Showcase some components of the pipeline
print(pipeline)

http://www.kler.cn/a/4681.html

相关文章:

  • 【Rust自学】12.6. 使用TDD(测试驱动开发)开发库功能
  • 栈 (算法十二)
  • python-42-使用selenium-wire爬取微信公众号下的所有文章列表
  • 【HM-React】08. Layout模块
  • 三 BH1750 光感驱动调试1
  • nginx负载均衡-基于端口的负载均衡(一)
  • 学习HM微博项目第4天
  • 12、MySQL数据类型
  • 三个月从功能测试进阶到自动化测试,涨薪5k?你在想啥呢?
  • ICG-PEG-OH 结构式,吲哚菁绿-聚乙二醇-羟基的相关说明
  • 循环神经网络RNN基础
  • 数据结构与算法笔记--堆栈和队列的使用
  • NPM 组件包 epic-geo-ip 等恶意获取主机敏感信息(MPS-2023-8302)
  • P1659拉拉队排练 manacher经典题
  • PyInstaller库的使用及Koch曲线及推广,绘制康托尔集
  • 2023学生党适用的蓝牙耳机有哪些?四款学生党值得入手的蓝牙耳机推荐
  • 软件框架-实现使用@Component@Data@Configuration@Bean(配置类控制类实体类)等方法实现将配置文件从8080端口显示在网页上
  • QT中字符转换
  • 支付宝跳转
  • cron表达式 详解
  • vue3 编写 svg 通用组件 修改颜色和尺寸
  • Unity Animation -- Overview
  • Vue3 の 组合式函数
  • C语言通讯录应用程序:从设计到实现
  • 如何使用python删除一个文件?好用到上头.....
  • ElasticSearch系列 - SpringBoot整合ES:组合多个查询条件 bool 查询