docker torcherve打包mar包并部署模型
使用Docker打包深度网络模型mar包到服务端
参考链接:Docker torchserve 部署模型流程——以WSL部署YOLO-FaceV2为例_class myhandler(basehandler): def initialize(self,-CSDN博客
1、docker拉取环境镜像命令
docker images
出现此提示为没有权限取执行命令,此时修改命令为
sudo docker images
出现类似如下界面即为成功
**注意:**如果没有所需镜像,那么则需要去网上的镜像网站取拉镜像。命令为
docker pull registry.cn-shanghai.aliyuncs.com/jhinno/torch-serve:v6.1
docker pull 镜像
2、启动docker并进行打包
2、1启动docker服务
systemctl start docker
docker run --rm -it -p 8080:8080 -p 8081:8081 -v /home/lxz-2/lyw/class_eval/model_store:/home/model-server/model-store registry.cn-shanghai.aliyuncs.com/jhinno/torch-serve:v6.1
–rm 表示使用完立刻清空内存。
–it 表示交互式容器,会有一个伪终端,可以理解为docker容器的终端,我个人认为docker容器就是一个虚拟机,能更加方便人们去运行代码。
-p端口映射, -p 8080:8080 表示将本地8080端口映射到虚拟机8080端口
-v路径映射: -v /home/lxz-2/lyw/class_eval/model_store:/home/model-server/model-store将 /home/lxz-2/lyw/class_eval/model_store映射/home/model-server/model-store
registry.cn-shanghai.aliyuncs.com/jhinno/torch-serve:v6.1 :所使用的环境
输入完上述命令后进入docker容器的伪终端,此时所使用的环境为 registry.cn-shanghai.aliyuncs.com/jhinno/torch-serve:v6.1
2、2 编写handler文件
handler文件,是加载模型 预处理 推理 后处理的过程的函数,可以理解为跑通模型从前到后的步骤,比如加载模型,加载好后需要对数据进行预处理,进行预处理然后进行推理,将得到的加过进行后处理,应用到实际中,最后返回。
load_model、initialize第一个是下载模型用的,第二个是初始化,例如是不是用GPU啊等参数设置的,preprocess,此时这个函数的传参data就是用户在调用接口传递数据,preprocess返回的data会进入到inference的data,同时inference返回的data会进入到postprocess,最后通过接口返回给用户
import datetime
import os
import pickle
import sys
import zipfile
import numpy as np
import logging
logger = logging.getLogger(__name__)
import base64
import torch
import io
from PIL import Image
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from ts.torch_handler.base_handler import BaseHandler
import ast
class FaceDetectHandler(BaseHandler):
def __init__(self):
super().__init__()
# Define your scalers for normalization and standardization
self.scaler = StandardScaler() # For standardization
self.minmax_scaler = MinMaxScaler() # For min-max scaling
self.args = 14
self.order = ['ATTENDANCE_RATE','HEAD_UP_RATE', 'FRONT_RATE','FRONT_SEAT_RATE','TVOTELOG','TPREAMPANSWERLOG','TDISCUSSLOG', 'TSURVEYLOG', 'TCHOOSESOMEONELOG', 'TWORKLOG','VOTELOG','PREAMPANSWERLOG','WORKLOG','CHOOSESOMEONELOG',]
self.count = ['VOTELOG', 'WORKLOG', 'PREAMPANSWERLOG', 'CHOOSESOMEONELOG', 'JOBFINISH']
def load_model(self, model_path):
from model import MyModel
print('loading model...')
model = MyModel(1,4,1)
# if not os.path.exists(model_path):
# print("Checkpoint not found: " + model_path)
# return
# print("Resuming from " + model_path)
states = torch.load(
model_path, map_location=self.device)
model.load_state_dict(states, strict=False)
model = model.to(device=self.device)
# print("Resumed from " + model_path)
self.model = model.eval()
def initialize(self, context):
properties = context.system_properties
logger.info(f"Cuda available: {torch.cuda.is_available()}")
logger.info(f"GPU available: {torch.cuda.device_count()}")
use_cuda = torch.cuda.is_available() and torch.cuda.device_count() > 0
self.map_location = 'cuda' if use_cuda else 'cpu'
self.device = torch.device(self.map_location + ':' +
str(properties.get('gpu_id')
) if use_cuda else 'cpu')
self.manifest = context.manifest
model_dir = properties.get('model_dir')
logger.info("==================model_dir==========================="
" %s loaded successfully", model_dir)
self.model_pt_path = None
self.model_path = None
# 读取pth和模型
if "serializedFile" in self.manifest["model"]:
serialized_file = self.manifest["model"]["serializedFile"]
self.model_pt_path = os.path.join(model_dir, serialized_file)
model_file = self.manifest['model']['modelFile']
# if "modelFile" in self.manifest["model"]:
# model_file = self.manifest['model']['modelFile']
# self.model_path = os.path.join(model_dir, model_file)
# logger.info("Model file %s loaded successfully", self.model_pt_path)
# with zipfile.ZipFile(model_dir + '/models.zip', 'r') as zip_ref:
# zip_ref.extractall(model_dir)
self.load_model(self.model_pt_path)
logger.info("Model file %s loaded successfully", self.model_pt_path)
def preprocess(self, data):
logger.info("preprocess ing~~~~~")
logger.info(data)
data_dict = data[0]['body']
keys = data_dict.keys()
if 'ATTENDANCE_RATE' not in keys or 'HEAD_UP_RATE' not in keys or 'FRONT_RATE' not in keys or 'FRONT_SEAT_RATE' not in keys:
return "参数不足"
values_array = [data_dict[feature] for feature in self.order]
MASK = [1 if value!=0 else 0 for value in values_array]
MASK = np.array(MASK, dtype=np.float32).reshape(1, self.args)
# 使用 StandardScaler 标准化特征数据
# 将 MASK 转换为 PyTorch 张量
MASK_tensor = torch.tensor(MASK, dtype=torch.float32).to(self.device)
values_array = np.array(values_array, dtype=np.float32).reshape(1,self.args, 1)
values_tensor = torch.tensor(values_array, dtype=torch.float32).to(self.device)
logger.info(values_tensor)
data = {"values_tensor":values_tensor,"MASK_tensor":MASK_tensor}
return data
def inference(self, data):
logger.info("inference ing~~~~~")
logging.info(data)
if data=="参数不足":
return "参数不足"
model = self.model
logger.info("model loaded")
prediction = model(data['values_tensor'],data['MASK_tensor'])
logging.info(prediction)
return prediction
def postprocess(self, data):
logger.info("postprocess ing~~~~~")
logging.info(data)
if data == "参数不足":
return [{"output": "参数不足"}]
result = data.item()
# results = data_reshape.tolist()
return [{"output": result}]
可以本地编写个测试文件,看看是否能跑通
# 添加到 handler 文件的末尾
if __name__ == "__main__":
import logging
logging.basicConfig(level=logging.INFO)
# 模拟 TorchServe 的上下文
class MockContext:
def __init__(self):
self.system_properties = {
"model_dir": "./model", # 假设模型文件存放在 ./model 目录
"gpu_id": 0 # 测试时使用 CPU
}
self.manifest = {
"model": {
"serializedFile": "model.pth", # 假设模型权重文件名
"modelFile": "model.py" # 模型定义文件
}
}
# 初始化 Handler
handler = FaceDetectHandler()
context = MockContext()
# 确保模型目录存在
os.makedirs(context.system_properties["model_dir"], exist_ok=True)
# 创建一个虚拟模型文件(如果实际不存在)
dummy_model_path = os.path.join(
context.system_properties["model_dir"],
context.manifest["model"]["serializedFile"]
)
if not os.path.exists(dummy_model_path):
torch.save(handler.model.state_dict(), dummy_model_path)
# 初始化 Handler
handler.initialize(context)
# 构造测试数据(包含所有必要字段)
test_data = [{
"body": {
"ATTENDANCE_RATE": 0.85,
"HEAD_UP_RATE": 0.6,
"FRONT_RATE": 0.75,
"FRONT_SEAT_RATE": 0.5,
"TVOTELOG": 1,
"TPREAMPANSWERLOG": 2,
"TDISCUSSLOG": 0,
"TSURVEYLOG": 1,
"TCHOOSESOMEONELOG": 0,
"TWORKLOG": 3,
"VOTELOG": 1,
"PREAMPANSWERLOG": 2,
"WORKLOG": 3,
"CHOOSESOMEONELOG": 0,
"LISTEN_CLASS": 0.9,
"WRITE_NOTE": 0.3,
"PLAY_PHONE": 0.1,
"LIT_TABLE": 0.2
}
}]
# 完整处理流程
try:
preprocessed = handler.preprocess(test_data)
print("预处理结果:", preprocessed)
if preprocessed != "参数不足":
inference_result = handler.inference(preprocessed)
print("推理结果:", inference_result)
postprocessed = handler.postprocess(inference_result)
print("最终输出:", postprocessed)
else:
print("错误:输入参数不完整")
except Exception as e:
print(f"处理出错: {str(e)}")
2.3打包
torch-model-archiver --model-name ml_regression_AICourseScoring_V2.2 --version 2.2 --model-file model.py --serialized-file test_model_loss0.2894_r20.9787_20250327_184525.pth --handler Myhandler.py
需要将参数文件、模型文件’handler文件同时放到一个文件夹下。
–model-name ml_regression_AICourseScoring_V2.2 模型的名字
–version 2.2 版本
–model-file model.py 模型文件
serialized-file test_model_loss0.2894_r20.9787_20250327_184525.pth 参数文件
–handler Myhandler.py handler文件
映射后的文件夹下的文件
执行完上述后命令会在当前文件下生成mar包,
我本地使用的是学校中台,只需上传mar包即可,本地测试参考
参考链接:Docker torchserve 部署模型流程——以WSL部署YOLO-FaceV2为例_class myhandler(basehandler): def initialize(self,-CSDN博客
第四点