当前位置: 首页 > article >正文

TensorRT:自定义插件学习与实践 002:实现GELU

  • 代码连接:https://github.com/codesteller/trt-custom-plugin

TensorRT版本的选择

  • 教程代码对应的版本TensorRT-6.0.1.8,我尝试使用TensorRT-7.2.3.4也能通过编译
set_ifndef(TRT_LIB /usr/local/TensorRT-7.2.3.4/lib)
set_ifndef(TRT_INCLUDE /usr/local/TensorRT-7.2.3.4/include)
  • 但是使用更高版本可能报错
set_ifndef(TRT_LIB /usr/local/TensorRT-8.0.0.3/lib)
set_ifndef(TRT_INCLUDE /usr/local/TensorRT-8.0.0.3/include)
  • error: looser throw specifier for ‘virtual int GeluPlugin::getNbOutputs() const’ // https://forums.developer.nvidia.com/t/custom-plugin-fails-error-looser-throw-specifier-for-virtual/186885

  • 需要按照新版本的格式进行修改,一般需要加上noexcept关键字 : int getNbOutputs() const noexcept override;

plugin 在python端使用使用

trt.PluginField

def getAddScalarPlugin(scalar):
    for c in trt.get_plugin_registry().plugin_creator_list:
        print(c.name)
        if c.name == "CustomGeluPlugin":# "LReLU_TRT":#
            parameterList = []
            #res = c.create_plugin(c.name,None) ## 段错误 (核心已转储)
            parameterList.append(trt.PluginField("typeId", np.int32(0), trt.PluginFieldType.INT32))
            parameterList.append(trt.PluginField("bias", np.int32(scalar), trt.PluginFieldType.INT32))
            res = c.create_plugin(c.name, trt.PluginFieldCollection(parameterList))
            return res
    return None
  • 关于参数列表 parameterList.append(trt.PluginField("typeId", np.int32(0), trt.PluginFieldType.INT32))的设置,可在原代码的PluginField进行设置

在这里插入图片描述

  • python文档:https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Plugin/IPluginCreator.html#tensorrt.PluginFieldCollection

python环境安装

install cuda-python

  • pip install cuda-python -i https://mirrors.aliyun.com/pypi/simple/

        Both CUDA-Python and pyCUDA allow you to write GPU kernels using CUDA C++. The kernel is presented as a string to the python code to compile and run. The key difference is that the host-side code in one case is coming from the community (Andreas K and others) whereas in the CUDA Python case it is coming from NVIDIA.(https://pypi.org/project/cuda-python/ ,https://nvidia.github.io/cuda-python/install.html https://blog.csdn.net/hjxu2016/article/details/122868139)

install right version tensorrt for python

  • https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html
  • cd /usr/local/TensorRT-7.2.3.4/python/
  • pip install tensorrt-7.2.3.4-cp37-none-linux_x86_64.whl
    在这里插入图片描述
  • from .tensorrt import * ImportError: libnvinfer.so.7: cannot open shared object file: No such file or directory
  • LD_LIBRARY_PATH=/usr/local/TensorRT-7.2.3.4/lib/ /home/pdd/anaconda3/envs/yolocopy/bin/python3.7 /home/pdd/MPI/AddScalarPlugin/cmake-build-debug/testAddScalarPlugin.py
  • from .tensorrt import * ImportError: libcudnn.so.8: cannot open shared object file: No such file or directory
  • LD_LIBRARY_PATH=/usr/local/TensorRT-7.2.3.4/lib/:/usr/local/cuda-11.1/targets/x86_64-linux/lib /home/pdd/anaconda3/envs/yolocopy/bin/python3.7 /home/pdd/MPI/AddScalarPlugin/cmake-build-debug/testAddScalarPlugin.py

CG

  • 用到的CMakeLists.txt
#
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

cmake_minimum_required(VERSION 3.8 FATAL_ERROR)

set(CMAKE_CUDA_COMPILER /usr/local/cuda-11.1/bin/nvcc)
set(CUDACXX /usr/local/cuda-11.1/bin/nvcc)


project(gelu LANGUAGES CXX CUDA)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-long-long -pedantic -Wno-deprecated-declarations")

#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall ")


# Sets variable to a value if variable is unset.
macro(set_ifndef var val)
    if (NOT ${var})
        set(${var} ${val})
    endif()
    message(STATUS "Configurable variable ${var} set to ${${var}}")
endmacro()

# -------- CONFIGURATION --------
find_package(CUDA REQUIRED)

#set_ifndef(TRT_LIB /home/codesteller/apsoft/mlstack/tensorrt/TensorRT-7.0.0.11/targets/x86_64-linux-gnu/lib)
#set_ifndef(TRT_INCLUDE /home/codesteller/apsoft/mlstack/tensorrt/TensorRT-7.0.0.11/include)

set_ifndef(TRT_LIB /usr/local/TensorRT-8.6.0.12/lib)
set_ifndef(TRT_INCLUDE /usr/local/TensorRT-8.6.0.12/include)
#set_ifndef(TRT_LIB /usr/local/TensorRT-8.0.0.3/lib)
#set_ifndef(TRT_INCLUDE /usr/local/TensorRT-8.0.0.3/include)
#set_ifndef(TRT_LIB /usr/local/TensorRT-7.2.3.4/lib)
#set_ifndef(TRT_INCLUDE /usr/local/TensorRT-7.2.3.4/include)
set_ifndef(CUDA_ROOT /usr/local/cuda-11.1)

# Find dependencies:
message("\nThe following variables are derived from the values of the previous variables unless provided explicitly:\n")

# TensorRT's nvinfer lib
find_library(_NVINFER_LIB nvinfer HINTS ${TRT_LIB} PATH_SUFFIXES lib lib64)
set_ifndef(NVINFER_LIB ${_NVINFER_LIB})

# cuBLAS
find_library(_CUBLAS_LIB cublas HINTS ${CUDA_ROOT} PATH_SUFFIXES lib lib64)
set_ifndef(CUBLAS_LIB ${_CUBLAS_LIB})

# CUDA include dir
find_path(_CUDA_INC_DIR cuda_runtime_api.h HINTS ${CUDA_ROOT} PATH_SUFFIXES include)
set_ifndef(CUDA_INC_DIR ${_CUDA_INC_DIR})

# -------- BUILDING --------
# include_directories(${TRT_INCLUDE} ${CUDA_INC_DIR})
include_directories(${CUDA_ROOT}/include  ${TRT_INCLUDE})
include_directories(${CUDA_ROOT}/include  ${TRT_INCLUDE} ${CMAKE_SOURCE_DIR}/include/ ${CMAKE_SOURCE_DIR}/) 

add_library(GeluPlugin MODULE
    ${CMAKE_SOURCE_DIR}/geluPlugin.cu
     ${CMAKE_SOURCE_DIR}/geluKernel.h
    ${CMAKE_SOURCE_DIR}/geluPlugin.cpp
    ${CMAKE_SOURCE_DIR}/geluPlugin.h
    
)

# Use C++11
target_compile_features(GeluPlugin PUBLIC cxx_std_11)

# Link TensorRT's nvinfer lib
target_link_libraries(GeluPlugin PRIVATE ${NVINFER_LIB} ${CUBLAS_LIB})

set_target_properties(GeluPlugin PROPERTIES
  CUDA_SEPARABLE_COMPILATION ON
)


http://www.kler.cn/a/17098.html

相关文章:

  • 【go从零单排】Timer、Epoch 时间函数
  • ML 系列: 第 24 节 — 离散概率分布(泊松分布)
  • 拦截器实现http请求访问本地图片
  • qt QVideoWidget详解
  • BERT配置详解1:构建强大的自然语言处理模型
  • git commit 校验
  • MyBatis详细笔记
  • Java I/O
  • 【前端面经】JS-深浅拷贝
  • 4. 嵌入式基础
  • 唱作音乐人朱卫明新歌全网首发,当初恋遇到《龙仙街》
  • 留守儿童爱心网站
  • JSP+SQL基于JSP的学生信息管理系统(源代码+论文+答辩PPT)
  • 操作系统原理 —— 调度的概念、层次(十一)
  • Git 使用教程:最详细、最正宗手把手教学(万字长文)
  • springboot实用配置
  • Effective 建造者模式、私有化、依赖注入
  • 五一欢乐赛!题解
  • 希尔排序(C++)
  • 安卓开发_广播机制_广播的最佳实践:实现强制下线功能
  • PyQt5桌面应用开发(5):对话框
  • Java 基础进阶篇(二)—— static 静态关键字与单例模式
  • kafka 学习,笔记
  • Spring Boot参考指南-Spring Boot安装(Maven安装、Gradle安装)
  • Docker compose 常用指令
  • c++ 11标准模板(STL) std::vector (二)