当前位置: 首页 > article >正文

人工智能--网络可解释性框架

背景

1、模型黑盒的可解释性如何提升?如何像树模型一样查看特征的重要程度?

解决方案

SHAP

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import shap
from sklearn.model_selection import train_test_split

# 加载数据
data = shap.datasets.adult()
x, y = data
y = y.astype(np.int32)

# 划分数据集
X_train, X_valid, y_train, y_valid = train_test_split(x, y, train_size=0.8, random_state=42)

# 构建 Keras 模型
model = keras.Sequential([
    keras.layers.Dense(20, activation='relu', input_dim=X_train.shape[1]),
    keras.layers.Dense(10, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))

# 使用 SHAP 解释模型
explainer = shap.KernelExplainer(model.predict, shap.sample(X_train, 100))
shap_values = explainer.shap_values(shap.sample(X_valid, 100))
shap.summary_plot(shap_values, shap.sample(X_valid, 100))

# 绘图和可视化
plt.show()

# 清理和简化代码
# 如果有更多的具体需求,或需要进一步的帮助,请随时告知!

Lime

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
import lime
import lime.lime_tabular

# 假设 x, y 已经定义且预处理适当
X_train, X_valid, y_train, y_valid = train_test_split(x, y, train_size=0.7, random_state=42)

# 使用 TensorFlow/Keras 构建模型
model = keras.Sequential([
    keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=30, batch_size=2048)

# 定义预测概率函数,适配 LIME
def pred_proba(X):
    # 获取模型预测的正类概率
    pred = model.predict(X)
    # 将单个概率转换为两类的概率数组
    return np.hstack([1-pred, pred])

# 初始化 LIME 解释器
explainer = lime.lime_tabular.LimeTabularExplainer(
    training_data=np.array(X_train),
    feature_names=['feature_' + str(i) for i in range(X_train.shape[1])],  # 定义特征名称
    class_names=['class_0', 'class_1'],
    mode='classification'
)

# 选择一个实例进行解释
idx = 4  # 根据实际数据调整
explanation = explainer.explain_instance(
    data_row=X_valid.iloc[idx], 
    predict_fn=pred_proba,
    num_features=8, 
    num_samples=1000,
    labels=[1]  # 假设您关注的是正类的解释
)

# 展示解释结果
explanation.show_in_notebook(show_table=True)

特征提取中间内容透出

from keras import layers as L
from tf_keras_vis.activation_maximization import ActivationMaximization

def loss(output):
    return (output[0, 0], output[1, 1], output[2, 2], output[3, 3], output[4, 4], output[5, 5], output[6, 6], output[7, 7])

def model_modifier(model):
    model.layers[-1].activation = tensorflow.keras.activations.linear
df = pd.read_csv('../../input/mpempe/mouse-protein-expression.csv').drop('Unnamed: 0', axis=1)

from sklearn.model_selection import train_test_split as tts
mpe_x = df.drop('class', axis=1)
mpe_y = df['class']
X_train, X_valid, y_train, y_valid = tts(mpe_x, mpe_y, train_size = 0.8, random_state = 42)
inp = L.Input((80,))
x = L.Dense(32, activation='relu')(inp)
for nodes in [32, 32]:
    x = L.Dense(nodes, activation='relu')(x)
for nodes in [16, 16]:
    x = L.Dense(nodes, activation='relu')(x)
x = L.Dense(8, activation='softmax')(x)
model = keras.models.Model(inputs=inp, outputs=x)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train-1, epochs=50, validation_data=(X_valid, y_valid-1))

inp = L.Input((80,1,1))
reshape = L.Reshape((80,))(inp)
modelOut = model(reshape)
act = keras.models.Model(inputs=inp, outputs=modelOut)
for i in range(20):
    
    visualize_activation = ActivationMaximization(act, model_modifier)

    # Generate a random seed for each activation
    seed_input = tensorflow.random.uniform((10, 80, 1, 1), 0, 1)

    # Generate activations and convert into images
    activations = visualize_activation(loss, seed_input=seed_input, steps=256)
    # images = [activation.astype(np.float32) for activation in activations]
    images = [tf.cast(activation, tf.float32).numpy() for activation in activations]

    plt.set_cmap('gray')
    plt.figure(figsize=(9,12), dpi=400)
    for i in range(0, len(images)):
        plt.subplot(4, 3, i+1)
        visualization = images[i].reshape(8, 10)
        plt.imshow(visualization)
        plt.title(f'Target: {i}')
        plt.axis('off')
    plt.show()
    plt.close()

参考文献


http://www.kler.cn/a/300330.html

相关文章:

  • ESP32下FreeRTOS实时操作系统使用
  • JavaScript语言的多线程编程
  • apisix的authz-casbin
  • Dockerfile另一种使用普通用户启动的方式
  • STM32之CubeMX图形化工具开发介绍(十七)
  • 基于VSCode+CMake+debootstrap搭建Ubuntu交叉编译开发环境
  • AI大模型日报#0908:OpenAI计划年底推出GPT Next、Roblox官宣AI秒生3D物体模型
  • AI电商,如何提高设计效率?
  • qt下两种方式读取opencv 图片各个通道的值
  • YOLOv8改进 | 模块缝合 | C2f 融合RVB + EMA注意力机制【二次融合 + 结构图】
  • 论文阅读:3D Gaussian Splatting for Real-Time Radiance Field Rendering
  • 【Unity】HybridCLR测试笔记
  • 数据结构代码集训day16(适合考研、自学、期末和专升本)
  • ASP.NET Core 入门教学二十三 模型绑定和验证
  • 高并发内存池项目(3)——项目框架介绍与实现线程池
  • 【2024】Benchmarking Foundation Models with Language-Model-as-an-Examiner
  • 【佳学基因检测】在织梦网站中, 创建或修改目录:/var/www/html/cp 失败! DedeTag Engine Create File False
  • Adobe After Effects下载_AE绿色中文版下载,AE2023软件下...
  • JavaScript 中的 `var`, `let`, `const` 详解
  • --数据库--
  • Kubernetes中Pod和Node标签的添加、修改与删除
  • 如何用python打开csv文件路径
  • Jenkins 构建后操作(Send build artifacts over SSH)
  • 入侵检测与防御系统:网络安全的前沿阵地
  • 原生 input 中的 “type=file“ 上传文件
  • CMU 10423 Generative AI:HW1(理论部分)