精华内容
下载资源
问答
  • 海思 3559 SVP NNIE demo解读

    千次阅读 2020-10-18 17:44:58
    海思3559 SVP NNIE demo解读 图片处理——yolo3为例 1. 先看函数void SAMPLE_SVP_NNIE_Yolov3(void) - 部分1 HI_CHAR *pcSrcFile = "./data/nnie_image/rgb_planar/dog_bike_car_416x416.bgr"; HI_CHAR *pcModelName...

    海思3559 SVP NNIE demo解读

    图片处理——yolo3为例

    1. 先看函数void SAMPLE_SVP_NNIE_Yolov3(void)

    - 部分1

    HI_CHAR *pcSrcFile = "./data/nnie_image/rgb_planar/dog_bike_car_416x416.bgr";
    HI_CHAR *pcModelName = "./data/nnie_model/detection/inst_yolov3_cycle.wk";
    HI_U32 u32PicNum = 1;
    HI_FLOAT f32PrintResultThresh = 0.0f;
    HI_S32 s32Ret = HI_SUCCESS;
    SAMPLE_SVP_NNIE_CFG_S   stNnieCfg = {0};
    SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S stInputDataIdx = {0};
    SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S stProcSegIdx = {0};
    

    结构体SAMPLE_SVP_NNIE_CFG_S

    typedef struct hiSAMPLE_SVP_NNIE_CFG_S
    {
    	HI_CHAR *pszPic; //图片路径
    	HI_U32 u32MaxInputNum; //每个batch最大输入图片数
    	HI_U32 u32MaxRoiNum; //最大ROI数
    	HI_U64 au64StepVirAddr[SAMPLE_SVP_NNIE_EACH_SEG_STEP_ADDR_NUM * SVP_NNIE_MAX_NET_SEG_NUM]; //virtual addr of LSTM's or RNN's step buffer
    	SVP_NNIE_ID_E aenNnieCoreId[SVP_NNIE_MAX_NET_SEG_NUM]; //选用段对应的核
    } SAMPLE_SVP_NNIE_CFG_S;
    

    主要涉及NNIE的设置

    结构体SVP_NNIE_ID_E:

    typedef enum hiSVP_NNIE_ID_E {
        SVP_NNIE_ID_0 = 0x0,
        SVP_NNIE_ID_1 = 0x1,
    
        SVP_NNIE_ID_BUTT
    } SVP_NNIE_ID_E;
    

    主要定义了NNIE的核的枚举

    结构体SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S

    typedef struct hiSAMPLE_SVP_NNIE_DATA_INDEX_S
    {
    	HI_U32 u32SegIdx;
    	HI_U32 u32NodeIdx;
    } SAMPLE_SVP_NNIE_DATA_INDEX_S;
    typedef SAMPLE_SVP_NNIE_DATA_INDEX_S SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S;
    

    SegIdx是指段(但是Yolo不需要分段,因此仅有一段)

    NodeIdx是指节点数

    结构体SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S

    typedef SAMPLE_SVP_NNIE_DATA_INDEX_S SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S;
    

    同上

    - 部分2

    /*Set configuration parameter*/
    f32PrintResultThresh = 0.8f;
    stNnieCfg.pszPic= pcSrcFile;
    stNnieCfg.u32MaxInputNum = u32PicNum; //max input image num in each batch
    stNnieCfg.u32MaxRoiNum = 0;
    stNnieCfg.aenNnieCoreId[0] = SVP_NNIE_ID_0;//set NNIE core
    

    主要设置了SAMPLE_SVP_NNIE_CFG_S结构体的相关内容

    - 部分3

    SAMPLE_COMM_SVP_CheckSysInit();
    

    主要进行初始化

    HI_VOID SAMPLE_COMM_SVP_CheckSysInit(HI_VOID)
    {
        //s_bSampleSvpInit是一个bool类型变量,指示了是否进行了系统初始化
    	if (HI_FALSE == s_bSampleSvpInit)
    	{
    		if (SAMPLE_COMM_SVP_SysInit())
    		{
    			SAMPLE_SVP_TRACE(SAMPLE_SVP_ERR_LEVEL_ERROR, "Svp mpi init failed!\n");
    			exit(-1);
    		}
    		s_bSampleSvpInit = HI_TRUE;
    	}
    
    	SAMPLE_SVP_TRACE(SAMPLE_SVP_ERR_LEVEL_DEBUG, "Svp mpi init ok!\n");
    }
    
    static HI_S32 SAMPLE_COMM_SVP_SysInit(HI_VOID)
    {
    	HI_S32 s32Ret = HI_FAILURE;
    	VB_CONFIG_S struVbConf;
    
    	HI_MPI_SYS_Exit();
    	HI_MPI_VB_Exit();
    
        //将struVbConf所有内容赋值0
    	memset(&struVbConf, 0, sizeof(VB_CONFIG_S));
    
    	struVbConf.u32MaxPoolCnt = 2;
    	struVbConf.astCommPool[1].u64BlkSize = 768 * 576 * 2;
    	struVbConf.astCommPool[1].u32BlkCnt = 1;
    
        //设置MPP 视频缓存池属性
        //HI_S32 HI_MPI_VB_SetConfig(const VB_CONFIG_S *pstVbConfig);
        //pstVbConfig 视频缓存池属性指针。静态属性。
    	//输入
    	s32Ret = HI_MPI_VB_SetConfig((const VB_CONFIG_S *)&struVbConf);
    	SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error(%#x):HI_MPI_VB_SetConf failed!\n", s32Ret);
    
        //初始化MPP 视频缓存池。
    	s32Ret = HI_MPI_VB_Init();
    	SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error(%#x):HI_MPI_VB_Init failed!\n", s32Ret);
    
        //初始化MPP 系统。包括音频输入输出、视频输入输出、视频编解码、视频叠加区域、视频处理、图形处理等模块都会被初始化。
    	s32Ret = HI_MPI_SYS_Init();
    	SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error(%#x):HI_MPI_SYS_Init failed!\n", s32Ret);
    
    	return s32Ret;
    }
    

    结构体VB_CONFIG_S

    typedef struct hiVB_CONFIG_S {
        HI_U32 u32MaxPoolCnt;
        VB_POOL_CONFIG_S astCommPool[VB_MAX_COMM_POOLS];
    } VB_CONFIG_S;
    

    结构体VB_POOL_CONFIG_S

    typedef struct hiVB_POOL_CONFIG_S {
        HI_U64 u64BlkSize;
        HI_U32 u32BlkCnt;
        VB_REMAP_MODE_E enRemapMode;
        HI_CHAR acMmzName[MAX_MMZ_NAME_LEN];
    } VB_POOL_CONFIG_S;
    

    -部分4

    static SAMPLE_SVP_NNIE_MODEL_S s_stYolov3Model = {0};
    s32Ret = SAMPLE_COMM_SVP_NNIE_LoadModel(pcModelName,&s_stYolov3Model);
    

    向NNIE核载入模型

    结构体SAMPLE_SVP_NNIE_MODEL_S:

    typedef struct hiSAMPLE_SVP_NNIE_MODEL_S
    {
    	SVP_NNIE_MODEL_S stModel;
    	SVP_MEM_INFO_S stModelBuf; //store Model file
    } SAMPLE_SVP_NNIE_MODEL_S;
    

    结构体SVP_NNIE_MODEL_S

    typedef struct hiSVP_NNIE_MODEL_S {
        SVP_NNIE_RUN_MODE_E enRunMode;//枚举类型,网络模型运行模式
    
        HI_U32 u32TmpBufSize; /* temp buffer size */
        HI_U32 u32NetSegNum;
        SVP_NNIE_SEG_S astSeg[SVP_NNIE_MAX_NET_SEG_NUM];
        SVP_NNIE_ROIPOOL_INFO_S astRoiInfo[SVP_NNIE_MAX_ROI_LAYER_NUM]; /* ROIPooling info */
    
        SVP_MEM_INFO_S stBase;
    } SVP_NNIE_MODEL_S;
    

    主要存了模型用于NNIE核的一些属性

    结构体SVP_MEM_INFO_S

    /* Mem information */
    typedef struct hiSVP_MEM_INFO_S {
        HI_U64  u64PhyAddr; /* RW;The physical address of the memory */
        HI_U64  u64VirAddr; /* RW;The virtual address of the memory */
        HI_U32  u32Size;    /* RW;The size of memory */
    } SVP_MEM_INFO_S;
    

    主要存了内存分配的信息

    HI_S32 SAMPLE_COMM_SVP_NNIE_LoadModel(HI_CHAR *pszModelFile,
    									  SAMPLE_SVP_NNIE_MODEL_S *pstNnieModel)
    {
    	HI_S32 s32Ret = HI_INVALID_VALUE;
    	HI_U64 u64PhyAddr = 0;
    	HI_U8 *pu8VirAddr = NULL;
    	HI_SL slFileSize = 0;
    	/*Get model file size*/
    	FILE *fp = fopen(pszModelFile, "rb");
    	SAMPLE_SVP_CHECK_EXPR_RET(NULL == fp, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error, open model file failed!\n");
    	s32Ret = fseek(fp, 0L, SEEK_END);
    	SAMPLE_SVP_CHECK_EXPR_GOTO(-1 == s32Ret, FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error, fseek failed!\n");
    	slFileSize = ftell(fp);
    	SAMPLE_SVP_CHECK_EXPR_GOTO(slFileSize <= 0, FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error, ftell failed!\n");
    	s32Ret = fseek(fp, 0L, SEEK_SET);
    	SAMPLE_SVP_CHECK_EXPR_GOTO(-1 == s32Ret, FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error, fseek failed!\n");
    
    	/*malloc model file mem*/
        //申请内存,内存地址既有虚拟地址(进程内地址)和物理地址(内存线性地址)
    	s32Ret = SAMPLE_COMM_SVP_MallocMem("SAMPLE_NNIE_MODEL", NULL, (HI_U64 *)&u64PhyAddr, (void **)&pu8VirAddr, slFileSize);
    	SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,
    							   "Error(%#x),Malloc memory failed!\n", s32Ret);
    
    	pstNnieModel->stModelBuf.u32Size = (HI_U32)slFileSize;
    	pstNnieModel->stModelBuf.u64PhyAddr = u64PhyAddr;
    	pstNnieModel->stModelBuf.u64VirAddr = (HI_U64)pu8VirAddr;
    
    	s32Ret = fread(pu8VirAddr, slFileSize, 1, fp);
    	SAMPLE_SVP_CHECK_EXPR_GOTO(1 != s32Ret, FAIL_1, SAMPLE_SVP_ERR_LEVEL_ERROR,
    							   "Error,read model file failed!\n");
    
    	/*load model*/
    	//从用户事先加载到buf中的模型中解析出网络模型
    	// pstModelBuf 存储模型的buf,用户需事先开辟好,且将
    	// 	NNIE 编译器得到的wk文件加载到该buf中。 不能为空。 输入
    	// 	pstModel 网络模型结构体。输出
    	s32Ret = HI_MPI_SVP_NNIE_LoadModel(&pstNnieModel->stModelBuf, &pstNnieModel->stModel);
    	SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, FAIL_1, SAMPLE_SVP_ERR_LEVEL_ERROR,
    							   "Error,HI_MPI_SVP_NNIE_LoadModel failed!\n");
    
    	fclose(fp);
    	return HI_SUCCESS;
    FAIL_1:
    	SAMPLE_SVP_MMZ_FREE(pstNnieModel->stModelBuf.u64PhyAddr, pstNnieModel->stModelBuf.u64VirAddr);
    	pstNnieModel->stModelBuf.u32Size = 0;
    FAIL_0:
    	if (NULL != fp)
    	{
    		fclose(fp);
    	}
    
    	return HI_FAILURE;
    }
    
    HI_S32 SAMPLE_COMM_SVP_MallocMem(HI_CHAR *pszMmb, HI_CHAR *pszZone, HI_U64 *pu64PhyAddr, HI_VOID **ppvVirAddr, HI_U32 u32Size)
    {
    	HI_S32 s32Ret = HI_SUCCESS;
    
    	//mmz是海思用来存储媒体的内存,https://www.cnblogs.com/wlzy/p/9733110.html
    	/*pu64PhyAddr 物理地址指针。输出
    	*ppVirAddr 指向虚拟地址指针的指针。输出
    	*strMmb Mmb 名称的字符串指针。输入
    	*strZone MMZ zone 名称的字符串指针。输入
    	*u32Len 内存块大小。输入
    	*/
    	s32Ret = HI_MPI_SYS_MmzAlloc(pu64PhyAddr, ppvVirAddr, pszMmb, pszZone, u32Size);
    
    	return s32Ret;
    }
    

    - 部分5

    static SAMPLE_SVP_NNIE_PARAM_S s_stYolov3NnieParam = {0};
    s_stYolov3NnieParam.pstModel = &s_stYolov3Model.stModel;
    s32Ret = SAMPLE_SVP_NNIE_Yolov3_ParamInit(&stNnieCfg,&s_stYolov3NnieParam,&s_stYolov3SoftwareParam);
    

    该部分主要进行模型的设置

    结构体SAMPLE_SVP_NNIE_PARAM_S:

    typedef struct hiSAMPLE_SVP_NNIE_PARAM_S
    {
    	SVP_NNIE_MODEL_S *pstModel;
    	HI_U32 u32TmpBufSize;
    	HI_U32 au32TaskBufSize[SVP_NNIE_MAX_NET_SEG_NUM];
    	SVP_MEM_INFO_S stTaskBuf;
    	SVP_MEM_INFO_S stTmpBuf;
    	SVP_MEM_INFO_S stStepBuf;										 //store Lstm step info
    	SAMPLE_SVP_NNIE_SEG_DATA_S astSegData[SVP_NNIE_MAX_NET_SEG_NUM]; //each seg's input and output blob
    	SVP_NNIE_FORWARD_CTRL_S astForwardCtrl[SVP_NNIE_MAX_NET_SEG_NUM];
    	SVP_NNIE_FORWARD_WITHBBOX_CTRL_S astForwardWithBboxCtrl[SVP_NNIE_MAX_NET_SEG_NUM];
    } SAMPLE_SVP_NNIE_PARAM_S;
    

    结构体 hiSAMPLE_SVP_NNIE_SEG_DATA_S:

    /*each seg input and output memory*/
    typedef struct hiSAMPLE_SVP_NNIE_SEG_DATA_S
    {
    	SVP_SRC_BLOB_S astSrc[SVP_NNIE_MAX_INPUT_NUM];
    	SVP_DST_BLOB_S astDst[SVP_NNIE_MAX_OUTPUT_NUM];
    } SAMPLE_SVP_NNIE_SEG_DATA_S;
    

    结构体SVP_BLOB_S:

    typedef SVP_BLOB_S  SVP_SRC_BLOB_S;
    typedef SVP_BLOB_S  SVP_DST_BLOB_S;
    /****************************** Blob struct ******************************
    In Caffe, the blob contain shape info as the following order:
    Image\FeatureMap:               N       C       H       W
    FC(normal vector):              N       C
    RNN\LSTM(Recurrent) vector:     T       N       D
    
    The relationship of the following blob struct with Caffe blob is as follows:
    Image\FeatureMap:               Num    Chn    Height   With
    FC(VEC_S32):                    Num    Width
    RNN\LSTM(SEQ_S32) vector:       Step   Num     Dim
    The stride, which measuring unit is byte, is always algined by the width or
    dim direction.
    **************************************************************************/
    typedef struct hiSVP_BLOB_S {
        SVP_BLOB_TYPE_E enType;     /* Blob type */
        HI_U32 u32Stride;           /* Stride, a line bytes num */
    
        HI_U64 u64VirAddr;          /* virtual addr */
        HI_U64 u64PhyAddr;          /* physical addr */
    
        HI_U32 u32Num;             /* N: frame num or sequence num, correspond to caffe blob's n */
        union {
            struct {
                HI_U32 u32Width;    /* W: frame width, correspond to caffe blob's w */
                HI_U32 u32Height;   /* H: frame height, correspond to caffe blob's h */
                HI_U32 u32Chn;      /* C: frame channel, correspond to caffe blob's c */
            } stWhc;
            struct {
                HI_U32 u32Dim;          /* D: vecotr dimension */
                HI_U64 u64VirAddrStep;  /* T: virtual adress of time steps array in each sequence */
            } stSeq;
        } unShape;
    } SVP_BLOB_S;
    
    static HI_S32 SAMPLE_SVP_NNIE_Yolov3_ParamInit(SAMPLE_SVP_NNIE_CFG_S* pstCfg,
        SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftWareParam)
    {
        HI_S32 s32Ret = HI_SUCCESS;
        /*init hardware para*/
        s32Ret = SAMPLE_COMM_SVP_NNIE_ParamInit(pstCfg,pstNnieParam);
        SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret,INIT_FAIL_0,SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error(%#x),SAMPLE_COMM_SVP_NNIE_ParamInit failed!\n",s32Ret);
    
        /*init software para*/
        s32Ret = SAMPLE_SVP_NNIE_Yolov3_SoftwareInit(pstCfg,pstNnieParam,
            pstSoftWareParam);
        SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret,INIT_FAIL_0,SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error(%#x),SAMPLE_SVP_NNIE_Yolov3_SoftwareInit failed!\n",s32Ret);
    
        return s32Ret;
    INIT_FAIL_0:
        s32Ret = SAMPLE_SVP_NNIE_Yolov3_Deinit(pstNnieParam,pstSoftWareParam,NULL);
        SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret,s32Ret,SAMPLE_SVP_ERR_LEVEL_ERROR,
                "Error(%#x),SAMPLE_SVP_NNIE_Yolov3_Deinit failed!\n",s32Ret);
        return HI_FAILURE;
    
    }
    

    函数SAMPLE_COMM_SVP_NNIE_ParamInit

    HI_S32 SAMPLE_COMM_SVP_NNIE_ParamInit(SAMPLE_SVP_NNIE_CFG_S *pstNnieCfg,
    									  SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam)
    {
    	HI_S32 s32Ret = HI_SUCCESS;
    
    	/*check*/
    	SAMPLE_SVP_CHECK_EXPR_RET((NULL == pstNnieCfg || NULL == pstNnieParam), HI_ERR_SVP_NNIE_ILLEGAL_PARAM,
    							  SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,pstNnieCfg and pstNnieParam can't be NULL!\n");
    	SAMPLE_SVP_CHECK_EXPR_RET((NULL == pstNnieParam->pstModel), HI_ERR_SVP_NNIE_ILLEGAL_PARAM,
    							  SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,pstNnieParam->pstModel can't be NULL!\n");
    
    	/*NNIE parameter initialization */
    	s32Ret = SAMPLE_SVP_NNIE_ParamInit(pstNnieCfg, pstNnieParam);
    	SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, FAIL, SAMPLE_SVP_ERR_LEVEL_ERROR,
    							   "Error, SAMPLE_SVP_NNIE_ParamInit failed!\n");
    
    	return s32Ret;
    FAIL:
    	s32Ret = SAMPLE_COMM_SVP_NNIE_ParamDeinit(pstNnieParam);
    	SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
    							  "Error, SAMPLE_COMM_SVP_NNIE_ParamDeinit failed!\n");
    	return HI_FAILURE;
    }
    
    /*****************************************************************************
    *   Prototype    : SAMPLE_SVP_NNIE_ParamInit
    *   Description  : Fill info of NNIE Forward parameters
    *   Input        : SAMPLE_SVP_NNIE_CFG_S   *pstNnieCfg    NNIE configure parameter
    * 		            SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam	 NNIE parameters
    *
    *
    *
    *   Output       :
    *   Return Value : HI_S32,HI_SUCCESS:Success,Other:failure
    *   Spec         :
    *   Calls        :
    *   Called By    :
    *   History:
    *
    *       1.  Date         : 2017-03-14
    *           Author       :
    *           Modification : Create
    *
    *****************************************************************************/
    static HI_S32 SAMPLE_SVP_NNIE_ParamInit(SAMPLE_SVP_NNIE_CFG_S *pstNnieCfg,
    										SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam)
    {
    	HI_U32 i = 0, j = 0;
    	HI_U32 u32TotalSize = 0;
    	HI_U32 u32TotalTaskBufSize = 0;
    	HI_U32 u32TmpBufSize = 0;
    	HI_S32 s32Ret = HI_SUCCESS;
    	HI_U32 u32Offset = 0;
    	HI_U64 u64PhyAddr = 0;
    	HI_U8 *pu8VirAddr = NULL;
    	SAMPLE_SVP_NNIE_BLOB_SIZE_S astBlobSize[SVP_NNIE_MAX_NET_SEG_NUM] = {0};
    
    	/*fill forward info*/
    	s32Ret = SAMPLE_SVP_NNIE_FillForwardInfo(pstNnieCfg, pstNnieParam);
    	SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
    							  "Error,SAMPLE_SVP_NNIE_FillForwardCtrl failed!\n");
    
    	/*Get taskInfo and Blob mem size*/
    	s32Ret = SAMPLE_SVP_NNIE_GetTaskAndBlobBufSize(pstNnieCfg, pstNnieParam, &u32TotalTaskBufSize,
    												   &u32TmpBufSize, astBlobSize, &u32TotalSize);
    	SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
    							  "Error,SAMPLE_SVP_NNIE_GetTaskAndBlobBufSize failed!\n");
    
    	/*Malloc mem*/
    	s32Ret = SAMPLE_COMM_SVP_MallocCached("SAMPLE_NNIE_TASK", NULL, (HI_U64 *)&u64PhyAddr, (void **)&pu8VirAddr, u32TotalSize);
    	SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
    							  "Error,Malloc memory failed!\n");
    	//分配到的内存进行初始化
        memset(pu8VirAddr, 0, u32TotalSize);
        
    	SAMPLE_COMM_SVP_FlushCache(u64PhyAddr, (void *)pu8VirAddr, u32TotalSize);
    
    	/*fill taskinfo mem addr*/
    	pstNnieParam->stTaskBuf.u32Size = u32TotalTaskBufSize;
    	pstNnieParam->stTaskBuf.u64PhyAddr = u64PhyAddr;
    	pstNnieParam->stTaskBuf.u64VirAddr = (HI_U64)pu8VirAddr;
    
    	/*fill Tmp mem addr*/
    	pstNnieParam->stTmpBuf.u32Size = u32TmpBufSize;
    	pstNnieParam->stTmpBuf.u64PhyAddr = u64PhyAddr + u32TotalTaskBufSize;
    	pstNnieParam->stTmpBuf.u64VirAddr = (HI_U64)pu8VirAddr + u32TotalTaskBufSize;
    
    	/*fill forward ctrl addr*/
    	for (i = 0; i < pstNnieParam->pstModel->u32NetSegNum; i++)
    	{
    		if (SVP_NNIE_NET_TYPE_ROI == pstNnieParam->pstModel->astSeg[i].enNetType)
    		{
    			pstNnieParam->astForwardWithBboxCtrl[i].stTmpBuf = pstNnieParam->stTmpBuf;
    			pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u64PhyAddr = pstNnieParam->stTaskBuf.u64PhyAddr + u32Offset;
    			pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u64VirAddr = pstNnieParam->stTaskBuf.u64VirAddr + u32Offset;
    			pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u32Size = pstNnieParam->au32TaskBufSize[i];
    		}
    		else if (SVP_NNIE_NET_TYPE_CNN == pstNnieParam->pstModel->astSeg[i].enNetType ||
    				 SVP_NNIE_NET_TYPE_RECURRENT == pstNnieParam->pstModel->astSeg[i].enNetType)
    		{
    
    			pstNnieParam->astForwardCtrl[i].stTmpBuf = pstNnieParam->stTmpBuf;
    			pstNnieParam->astForwardCtrl[i].stTskBuf.u64PhyAddr = pstNnieParam->stTaskBuf.u64PhyAddr + u32Offset;
    			pstNnieParam->astForwardCtrl[i].stTskBuf.u64VirAddr = pstNnieParam->stTaskBuf.u64VirAddr + u32Offset;
    			pstNnieParam->astForwardCtrl[i].stTskBuf.u32Size = pstNnieParam->au32TaskBufSize[i];
    		}
    		u32Offset += pstNnieParam->au32TaskBufSize[i];
    	}
    
    	/*fill each blob's mem addr*/
    	u64PhyAddr = u64PhyAddr + u32TotalTaskBufSize + u32TmpBufSize;
    	pu8VirAddr = pu8VirAddr + u32TotalTaskBufSize + u32TmpBufSize;
    	for (i = 0; i < pstNnieParam->pstModel->u32NetSegNum; i++)
    	{
    		/*first seg has src blobs, other seg's src blobs from the output blobs of
    		those segs before it or from software output results*/
    		if (0 == i)
    		{
    			for (j = 0; j < pstNnieParam->pstModel->astSeg[i].u16SrcNum; j++)
    			{
    				if (j != 0)
    				{
    					u64PhyAddr += astBlobSize[i].au32SrcSize[j - 1];
    					pu8VirAddr += astBlobSize[i].au32SrcSize[j - 1];
    				}
    				pstNnieParam->astSegData[i].astSrc[j].u64PhyAddr = u64PhyAddr;
    				pstNnieParam->astSegData[i].astSrc[j].u64VirAddr = (HI_U64)pu8VirAddr;
    			}
    			u64PhyAddr += astBlobSize[i].au32SrcSize[j - 1];
    			pu8VirAddr += astBlobSize[i].au32SrcSize[j - 1];
    		}
    
    		/*fill the mem addrs of each seg's output blobs*/
    		for (j = 0; j < pstNnieParam->pstModel->astSeg[i].u16DstNum; j++)
    		{
    			if (j != 0)
    			{
    				u64PhyAddr += astBlobSize[i].au32DstSize[j - 1];
    				pu8VirAddr += astBlobSize[i].au32DstSize[j - 1];
    			}
    			pstNnieParam->astSegData[i].astDst[j].u64PhyAddr = u64PhyAddr;
    			pstNnieParam->astSegData[i].astDst[j].u64VirAddr = (HI_U64)pu8VirAddr;
    		}
    		u64PhyAddr += astBlobSize[i].au32DstSize[j - 1];
    		pu8VirAddr += astBlobSize[i].au32DstSize[j - 1];
    	}
    	return s32Ret;
    }
    

    函数SAMPLE_SVP_NNIE_FillForwardInfo

    static HI_S32 SAMPLE_SVP_NNIE_FillForwardInfo(SAMPLE_SVP_NNIE_CFG_S *pstNnieCfg,
    											  SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam)
    {
    	HI_U32 i = 0, j = 0;
    	HI_U32 u32Offset = 0;
    	HI_U32 u32Num = 0;
    
    	for (i = 0; i < pstNnieParam->pstModel->u32NetSegNum; i++)
    	{
    		/*fill forwardCtrl info*/
            //根据不同的网络类型选择不同的数据域
    		if (SVP_NNIE_NET_TYPE_ROI == pstNnieParam->pstModel->astSeg[i].enNetType)
    		{
    			pstNnieParam->astForwardWithBboxCtrl[i].enNnieId = pstNnieCfg->aenNnieCoreId[i];
    			pstNnieParam->astForwardWithBboxCtrl[i].u32SrcNum = pstNnieParam->pstModel->astSeg[i].u16SrcNum;
    			pstNnieParam->astForwardWithBboxCtrl[i].u32DstNum = pstNnieParam->pstModel->astSeg[i].u16DstNum;
    			pstNnieParam->astForwardWithBboxCtrl[i].u32ProposalNum = 1;
    			pstNnieParam->astForwardWithBboxCtrl[i].u32NetSegId = i;
    			pstNnieParam->astForwardWithBboxCtrl[i].stTmpBuf = pstNnieParam->stTmpBuf;
    			pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u64PhyAddr = pstNnieParam->stTaskBuf.u64PhyAddr + u32Offset;
    			pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u64VirAddr = pstNnieParam->stTaskBuf.u64VirAddr + u32Offset;
    			pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u32Size = pstNnieParam->au32TaskBufSize[i];
    		}
    		else if (SVP_NNIE_NET_TYPE_CNN == pstNnieParam->pstModel->astSeg[i].enNetType ||
    				 SVP_NNIE_NET_TYPE_RECURRENT == pstNnieParam->pstModel->astSeg[i].enNetType)
    		{
    
    			pstNnieParam->astForwardCtrl[i].enNnieId = pstNnieCfg->aenNnieCoreId[i];
    			pstNnieParam->astForwardCtrl[i].u32SrcNum = pstNnieParam->pstModel->astSeg[i].u16SrcNum;
    			pstNnieParam->astForwardCtrl[i].u32DstNum = pstNnieParam->pstModel->astSeg[i].u16DstNum;
    			pstNnieParam->astForwardCtrl[i].u32NetSegId = i;
    			pstNnieParam->astForwardCtrl[i].stTmpBuf = pstNnieParam->stTmpBuf;
    			pstNnieParam->astForwardCtrl[i].stTskBuf.u64PhyAddr = pstNnieParam->stTaskBuf.u64PhyAddr + u32Offset;
    			pstNnieParam->astForwardCtrl[i].stTskBuf.u64VirAddr = pstNnieParam->stTaskBuf.u64VirAddr + u32Offset;
    			pstNnieParam->astForwardCtrl[i].stTskBuf.u32Size = pstNnieParam->au32TaskBufSize[i];
    		}
    		u32Offset += pstNnieParam->au32TaskBufSize[i];
    
    		/*fill src blob info*/
    		for (j = 0; j < pstNnieParam->pstModel->astSeg[i].u16SrcNum; j++)
    		{
    			/*Recurrent blob*/
    			if (SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->pstModel->astSeg[i].astSrcNode[j].enType)
    			{
    				pstNnieParam->astSegData[i].astSrc[j].enType = pstNnieParam->pstModel->astSeg[i].astSrcNode[j].enType;
    				pstNnieParam->astSegData[i].astSrc[j].unShape.stSeq.u32Dim = pstNnieParam->pstModel->astSeg[i].astSrcNode[j].unShape.u32Dim;
    				pstNnieParam->astSegData[i].astSrc[j].u32Num = pstNnieCfg->u32MaxInputNum;
    				pstNnieParam->astSegData[i].astSrc[j].unShape.stSeq.u64VirAddrStep = pstNnieCfg->au64StepVirAddr[i * SAMPLE_SVP_NNIE_EACH_SEG_STEP_ADDR_NUM];
    			}
    			else
    			{
    				pstNnieParam->astSegData[i].astSrc[j].enType = pstNnieParam->pstModel->astSeg[i].astSrcNode[j].enType;
    				pstNnieParam->astSegData[i].astSrc[j].unShape.stWhc.u32Chn = pstNnieParam->pstModel->astSeg[i].astSrcNode[j].unShape.stWhc.u32Chn;
    				pstNnieParam->astSegData[i].astSrc[j].unShape.stWhc.u32Height = pstNnieParam->pstModel->astSeg[i].astSrcNode[j].unShape.stWhc.u32Height;
    				pstNnieParam->astSegData[i].astSrc[j].unShape.stWhc.u32Width = pstNnieParam->pstModel->astSeg[i].astSrcNode[j].unShape.stWhc.u32Width;
    				pstNnieParam->astSegData[i].astSrc[j].u32Num = pstNnieCfg->u32MaxInputNum;
    			}
    		}
    
    		/*fill dst blob info*/
    		if (SVP_NNIE_NET_TYPE_ROI == pstNnieParam->pstModel->astSeg[i].enNetType)
    		{
    			u32Num = pstNnieCfg->u32MaxRoiNum * pstNnieCfg->u32MaxInputNum;
    		}
    		else
    		{
    			u32Num = pstNnieCfg->u32MaxInputNum;
    		}
    
    		for (j = 0; j < pstNnieParam->pstModel->astSeg[i].u16DstNum; j++)
    		{
    			if (SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->pstModel->astSeg[i].astDstNode[j].enType)
    			{
    				pstNnieParam->astSegData[i].astDst[j].enType = pstNnieParam->pstModel->astSeg[i].astDstNode[j].enType;
    				pstNnieParam->astSegData[i].astDst[j].unShape.stSeq.u32Dim =
    					pstNnieParam->pstModel->astSeg[i].astDstNode[j].unShape.u32Dim;
    				pstNnieParam->astSegData[i].astDst[j].u32Num = u32Num;
    				pstNnieParam->astSegData[i].astDst[j].unShape.stSeq.u64VirAddrStep =
    					pstNnieCfg->au64StepVirAddr[i * SAMPLE_SVP_NNIE_EACH_SEG_STEP_ADDR_NUM + 1];
    			}
    			else
    			{
    				pstNnieParam->astSegData[i].astDst[j].enType = pstNnieParam->pstModel->astSeg[i].astDstNode[j].enType;
    				pstNnieParam->astSegData[i].astDst[j].unShape.stWhc.u32Chn = pstNnieParam->pstModel->astSeg[i].astDstNode[j].unShape.stWhc.u32Chn;
    				pstNnieParam->astSegData[i].astDst[j].unShape.stWhc.u32Height = pstNnieParam->pstModel->astSeg[i].astDstNode[j].unShape.stWhc.u32Height;
    				pstNnieParam->astSegData[i].astDst[j].unShape.stWhc.u32Width = pstNnieParam->pstModel->astSeg[i].astDstNode[j].unShape.stWhc.u32Width;
    				pstNnieParam->astSegData[i].astDst[j].u32Num = u32Num;
    			}
    		}
    	}
    	return HI_SUCCESS;
    }
    

    根据 pstNnieParam->pstModel填充pstNnieParam->astSegData段的输入和输出blob,固定的逻辑

    函数SAMPLE_SVP_NNIE_GetTaskAndBlobBufSize

    /*****************************************************************************
    *   Prototype    : SAMPLE_SVP_NNIE_GetTaskAndBlobBufSize
    *   Description  : Get taskinfo and blob memory size
    *   Input        : SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam     NNIE parameter
    * 	                HI_U32                  *pu32TaskInfoSize Task info size
    *                  HI_U32                  *pu32TmpBufSize    Tmp buffer size
    *                  SAMPLE_SVP_NNIE_BLOB_SIZE_S  astBlobSize[] each seg input and output blob mem size
    *                  HI_U32                  *pu32TotalSize     Total mem size
    *
    *
    *   Output       :
    *   Return Value : HI_S32,HI_SUCCESS:Success,Other:failure
    *   Spec         :
    *   Calls        :
    *   Called By    :
    *   History:
    *
    *       1.  Date         : 2017-11-20
    *           Author       :
    *           Modification : Create
    *
    *****************************************************************************/
    static HI_S32 SAMPLE_SVP_NNIE_GetTaskAndBlobBufSize(SAMPLE_SVP_NNIE_CFG_S *pstNnieCfg,
    													SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, HI_U32 *pu32TotalTaskBufSize, HI_U32 *pu32TmpBufSize,
    													SAMPLE_SVP_NNIE_BLOB_SIZE_S astBlobSize[], HI_U32 *pu32TotalSize)
    {
    	HI_S32 s32Ret = HI_SUCCESS;
    	HI_U32 i = 0, j = 0;
    	HI_U32 u32TotalStep = 0;
    
    	/*Get each seg's task buf size*/
    	s32Ret = HI_MPI_SVP_NNIE_GetTskBufSize(pstNnieCfg->u32MaxInputNum, pstNnieCfg->u32MaxRoiNum,
    										   pstNnieParam->pstModel, pstNnieParam->au32TaskBufSize, pstNnieParam->pstModel->u32NetSegNum);
    	SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,
    							  "Error,HI_MPI_SVP_NNIE_GetTaskSize failed!\n");
    
    	/*Get total task buf size*/
        //每个段的TaskBufSize累加
    	*pu32TotalTaskBufSize = 0;
    	for (i = 0; i < pstNnieParam->pstModel->u32NetSegNum; i++)
    	{
    		*pu32TotalTaskBufSize += pstNnieParam->au32TaskBufSize[i];
    	}
    
    	/*Get tmp buf size*/
    	*pu32TmpBufSize = pstNnieParam->pstModel->u32TmpBufSize;
    	*pu32TotalSize += *pu32TotalTaskBufSize + *pu32TmpBufSize;
    
    	/*calculate Blob mem size*/
    	for (i = 0; i < pstNnieParam->pstModel->u32NetSegNum; i++)
    	{
    		if (SVP_NNIE_NET_TYPE_RECURRENT == pstNnieParam->pstModel->astSeg[i].enNetType)
    		{
    			for (j = 0; j < pstNnieParam->astSegData[i].astSrc[0].u32Num; j++)
    			{
    				u32TotalStep += *((HI_S32 *)pstNnieParam->astSegData[i].astSrc[0].unShape.stSeq.u64VirAddrStep + j);
    			}
    		}
    		/*the first seg's Src Blob mem size, other seg's src blobs from the output blobs of
    		those segs before it or from software output results*/
    		if (i == 0)
    		{
    			SAMPLE_SVP_NNIE_GetBlobMemSize(&(pstNnieParam->pstModel->astSeg[i].astSrcNode[0]),
    										   pstNnieParam->pstModel->astSeg[i].u16SrcNum, u32TotalStep, &(pstNnieParam->astSegData[i].astSrc[0]),
    										   SAMPLE_SVP_NNIE_ALIGN_16, pu32TotalSize, &(astBlobSize[i].au32SrcSize[0]));
    		}
    
    		/*Get each seg's Dst Blob mem size*/
    		SAMPLE_SVP_NNIE_GetBlobMemSize(&(pstNnieParam->pstModel->astSeg[i].astDstNode[0]),
    									   pstNnieParam->pstModel->astSeg[i].u16DstNum, u32TotalStep, &(pstNnieParam->astSegData[i].astDst[0]),
    									   SAMPLE_SVP_NNIE_ALIGN_16, pu32TotalSize, &(astBlobSize[i].au32DstSize[0]));
    	}
    	return s32Ret;
    }
    
    /*****************************************************************************
    *   Prototype    : SAMPLE_SVP_NNIE_GetBlobMemSize
    *   Description  : Get blob mem size
    *   Input        : SVP_NNIE_NODE_S astNnieNode[]   NNIE Node
    *                  HI_U32          u32NodeNum      Node num
    *                  HI_U32          astBlob[]       blob struct
    *                  HI_U32          u32Align        stride align type
    *                  HI_U32          *pu32TotalSize  Total size
    *                  HI_U32          au32BlobSize[]  blob size
    *
    *
    *
    *
    *   Output       :
    *   Return Value : VOID
    *   Spec         :
    *   Calls        :
    *   Called By    :
    *   History:
    *
    *       1.  Date         : 2017-11-20
    *           Author       :
    *           Modification : Create
    *
    *****************************************************************************/
    static void SAMPLE_SVP_NNIE_GetBlobMemSize(SVP_NNIE_NODE_S astNnieNode[], HI_U32 u32NodeNum,
    										   HI_U32 u32TotalStep, SVP_BLOB_S astBlob[], HI_U32 u32Align, HI_U32 *pu32TotalSize, HI_U32 au32BlobSize[])
    {
    	HI_U32 i = 0;
    	HI_U32 u32Size = 0;
    	HI_U32 u32Stride = 0;
    
    	for (i = 0; i < u32NodeNum; i++)
    	{
    		if (SVP_BLOB_TYPE_S32 == astNnieNode[i].enType || SVP_BLOB_TYPE_VEC_S32 == astNnieNode[i].enType ||
    			SVP_BLOB_TYPE_SEQ_S32 == astNnieNode[i].enType)
    		{
    			u32Size = sizeof(HI_U32);
    		}
    		else
    		{
    			u32Size = sizeof(HI_U8);
    		}
    		if (SVP_BLOB_TYPE_SEQ_S32 == astNnieNode[i].enType)
    		{
    			if (SAMPLE_SVP_NNIE_ALIGN_16 == u32Align)
    			{
    				u32Stride = SAMPLE_SVP_NNIE_ALIGN16(astNnieNode[i].unShape.u32Dim * u32Size);
    			}
    			else
    			{
    				u32Stride = SAMPLE_SVP_NNIE_ALIGN32(astNnieNode[i].unShape.u32Dim * u32Size);
    			}
    			au32BlobSize[i] = u32TotalStep * u32Stride;
    		}
    		else
    		{
    			if (SAMPLE_SVP_NNIE_ALIGN_16 == u32Align)
    			{
    				u32Stride = SAMPLE_SVP_NNIE_ALIGN16(astNnieNode[i].unShape.stWhc.u32Width * u32Size);
    			}
    			else
    			{
    				u32Stride = SAMPLE_SVP_NNIE_ALIGN32(astNnieNode[i].unShape.stWhc.u32Width * u32Size);
    			}
    			au32BlobSize[i] = astBlob[i].u32Num * u32Stride * astNnieNode[i].unShape.stWhc.u32Height *
    							  astNnieNode[i].unShape.stWhc.u32Chn;
    		}
    		*pu32TotalSize += au32BlobSize[i];
    		astBlob[i].u32Stride = u32Stride;
    	}
    }
    

    结构体SVP_NNIE_NODE_S:

    typedef struct hiSVP_NNIE_NODE_S {
        SVP_BLOB_TYPE_E enType;
        union {
            struct {
                HI_U32 u32Width;
                HI_U32 u32Height;
                HI_U32 u32Chn;
            } stWhc;
            HI_U32 u32Dim;
        } unShape;
        HI_U32 u32NodeId;
        HI_CHAR szName[SVP_NNIE_NODE_NAME_LEN]; /* Report layer bottom name or data layer bottom name */
    } SVP_NNIE_NODE_S;
    

    函数SAMPLE_COMM_SVP_MallocCached

    HI_S32 SAMPLE_COMM_SVP_MallocCached(HI_CHAR *pszMmb, HI_CHAR *pszZone, HI_U64 *pu64PhyAddr, HI_VOID **ppvVirAddr, HI_U32 u32Size)
    {
    	HI_S32 s32Ret = HI_SUCCESS;
        //在用户态分配MMZ内存,该内存支持cache缓存。
    	//HI_S32 HI_MPI_SYS_MmzAlloc_Cached(HI_U64* pu64PhyAddr, HI_VOID** ppVirAddr, const HI_CHAR* pstrMmb, const HI_CHAR* pstrZone,HI_U32u32Len);
        //pu64PhyAddr 物理地址指针。输出
    	//ppVirAddr 指向虚拟地址指针的指针。输出
    	//pstrMmb Mmb 名称的字符串指针。输入
    	//pstrZone MMZ zone 名称的字符串指针。输入
    	//u32Len 内存块大小。输入
        //本接口与HI_MPI_SYS_MmzAlloc接口的区别:通过本接口分配的内存支持cache缓存,对于频繁使用的内存,最好使用本接口分配内存,这样可以提高cpu读写的效率,提升系统性能,如用户在使用ive算子时,就存在大量数据频繁读写,此时使用此接口来分配内存,就能很好的提高cpu 的效率。当 cpu访问此接口分配的内存时,会将内存中的数据放在cache 中,而硬件设备(如ive)只能访问物理内存,不能访问cache 的内容,对于这种cpu和硬件会共同操作的内存,需调用HI_MPI_SYS_MmzFlushCache做好数据同步
    	s32Ret = HI_MPI_SYS_MmzAlloc_Cached(pu64PhyAddr, ppvVirAddr, pszMmb, pszZone, u32Size);
    
    	return s32Ret;
    }
    

    函数SAMPLE_COMM_SVP_FlushCache

    /*
    *Flush cached
    */
    HI_S32 SAMPLE_COMM_SVP_FlushCache(HI_U64 u64PhyAddr, HI_VOID *pvVirAddr, HI_U32 u32Size)
    {
    	HI_S32 s32Ret = HI_SUCCESS;
        //刷新 cache里的内容到内存并且使cache里的内容无效。
        //HI_S32 HI_MPI_SYS_MmzFlushCache(HI_U64 u64PhyAddr, HI_VOID* pVirAddr,HI_U32 u32Size);
    	//u64PhyAddr 待操作数据的起始物理地址。输入
    	//pVirAddr 待操作数据的起始虚拟地址指针。不能传NULL。输入
    	//u32Size 待操作数据的大小。输入
    	s32Ret = HI_MPI_SYS_MmzFlushCache(u64PhyAddr, pvVirAddr, u32Size);
    	return s32Ret;
    }
    

    函数SAMPLE_SVP_NNIE_Yolov3_SoftwareInit

    static HI_S32 SAMPLE_SVP_NNIE_Yolov3_SoftwareInit(SAMPLE_SVP_NNIE_CFG_S* pstCfg,
        SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftWareParam)
    {
        HI_S32 s32Ret = HI_SUCCESS;
        HI_U32 u32ClassNum = 0;
        HI_U32 u32TotalSize = 0;
        HI_U32 u32DstRoiSize = 0;
        HI_U32 u32DstScoreSize = 0;
        HI_U32 u32ClassRoiNumSize = 0;
        HI_U32 u32TmpBufTotalSize = 0;
        HI_U64 u64PhyAddr = 0;
        HI_U8* pu8VirAddr = NULL;
    
        pstSoftWareParam->u32OriImHeight = pstNnieParam->astSegData[0].astSrc[0].unShape.stWhc.u32Height;
        pstSoftWareParam->u32OriImWidth = pstNnieParam->astSegData[0].astSrc[0].unShape.stWhc.u32Width;
        pstSoftWareParam->u32BboxNumEachGrid = 3;
        pstSoftWareParam->u32ClassNum = 80;
        pstSoftWareParam->au32GridNumHeight[0] = 13;
        pstSoftWareParam->au32GridNumHeight[1] = 26;
        pstSoftWareParam->au32GridNumHeight[2] = 52;
        pstSoftWareParam->au32GridNumWidth[0] = 13;
        pstSoftWareParam->au32GridNumWidth[1] = 26;
        pstSoftWareParam->au32GridNumWidth[2] = 52;
        pstSoftWareParam->u32NmsThresh = (HI_U32)(0.3f*SAMPLE_SVP_NNIE_QUANT_BASE);
        pstSoftWareParam->u32ConfThresh = (HI_U32)(0.5f*SAMPLE_SVP_NNIE_QUANT_BASE);
        pstSoftWareParam->u32MaxRoiNum = 10;
        pstSoftWareParam->af32Bias[0][0] = 116;
        pstSoftWareParam->af32Bias[0][1] = 90;
        pstSoftWareParam->af32Bias[0][2] = 156;
        pstSoftWareParam->af32Bias[0][3] = 198;
        pstSoftWareParam->af32Bias[0][4] = 373;
        pstSoftWareParam->af32Bias[0][5] = 326;
        pstSoftWareParam->af32Bias[1][0] = 30;
        pstSoftWareParam->af32Bias[1][1] = 61;
        pstSoftWareParam->af32Bias[1][2] = 62;
        pstSoftWareParam->af32Bias[1][3] = 45;
        pstSoftWareParam->af32Bias[1][4] = 59;
        pstSoftWareParam->af32Bias[1][5] = 119;
        pstSoftWareParam->af32Bias[2][0] = 10;
        pstSoftWareParam->af32Bias[2][1] = 13;
        pstSoftWareParam->af32Bias[2][2] = 16;
        pstSoftWareParam->af32Bias[2][3] = 30;
        pstSoftWareParam->af32Bias[2][4] = 33;
        pstSoftWareParam->af32Bias[2][5] = 23;
    
        /*Malloc assist buffer memory*/
        u32ClassNum = pstSoftWareParam->u32ClassNum+1;
    
        SAMPLE_SVP_CHECK_EXPR_RET(SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM != pstNnieParam->pstModel->astSeg[0].u16DstNum,
            HI_FAILURE,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,pstNnieParam->pstModel->astSeg[0].u16DstNum(%d) should be %d!\n",
            pstNnieParam->pstModel->astSeg[0].u16DstNum,SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM);
        u32TmpBufTotalSize = SAMPLE_SVP_NNIE_Yolov3_GetResultTmpBuf(pstNnieParam,pstSoftWareParam);
        u32DstRoiSize = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum*pstSoftWareParam->u32MaxRoiNum*sizeof(HI_U32)*SAMPLE_SVP_NNIE_COORDI_NUM);
        u32DstScoreSize = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum*pstSoftWareParam->u32MaxRoiNum*sizeof(HI_U32));
        u32ClassRoiNumSize = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum*sizeof(HI_U32));
        u32TotalSize = u32TotalSize+u32DstRoiSize+u32DstScoreSize+u32ClassRoiNumSize+u32TmpBufTotalSize;
        s32Ret = SAMPLE_COMM_SVP_MallocCached("SAMPLE_YOLOV3_INIT",NULL,(HI_U64*)&u64PhyAddr,
            (void**)&pu8VirAddr,u32TotalSize);
        SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret,s32Ret,SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,Malloc memory failed!\n");
        memset(pu8VirAddr,0, u32TotalSize);
        SAMPLE_COMM_SVP_FlushCache(u64PhyAddr,(void*)pu8VirAddr,u32TotalSize);
    
       /*set each tmp buffer addr*/
        pstSoftWareParam->stGetResultTmpBuf.u64PhyAddr = u64PhyAddr;
        pstSoftWareParam->stGetResultTmpBuf.u64VirAddr = (HI_U64)(pu8VirAddr);
    
        /*set result blob*/
        pstSoftWareParam->stDstRoi.enType = SVP_BLOB_TYPE_S32;
        pstSoftWareParam->stDstRoi.u64PhyAddr = u64PhyAddr+u32TmpBufTotalSize;
        pstSoftWareParam->stDstRoi.u64VirAddr = (HI_U64)(pu8VirAddr+u32TmpBufTotalSize);
        pstSoftWareParam->stDstRoi.u32Stride = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum*
            pstSoftWareParam->u32MaxRoiNum*sizeof(HI_U32)*SAMPLE_SVP_NNIE_COORDI_NUM);
        pstSoftWareParam->stDstRoi.u32Num = 1;
        pstSoftWareParam->stDstRoi.unShape.stWhc.u32Chn = 1;
        pstSoftWareParam->stDstRoi.unShape.stWhc.u32Height = 1;
        pstSoftWareParam->stDstRoi.unShape.stWhc.u32Width = u32ClassNum*
            pstSoftWareParam->u32MaxRoiNum*SAMPLE_SVP_NNIE_COORDI_NUM;
    
        pstSoftWareParam->stDstScore.enType = SVP_BLOB_TYPE_S32;
        pstSoftWareParam->stDstScore.u64PhyAddr = u64PhyAddr+u32TmpBufTotalSize+u32DstRoiSize;
        pstSoftWareParam->stDstScore.u64VirAddr = (HI_U64)(pu8VirAddr+u32TmpBufTotalSize+u32DstRoiSize);
        pstSoftWareParam->stDstScore.u32Stride = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum*
            pstSoftWareParam->u32MaxRoiNum*sizeof(HI_U32));
        pstSoftWareParam->stDstScore.u32Num = 1;
        pstSoftWareParam->stDstScore.unShape.stWhc.u32Chn = 1;
        pstSoftWareParam->stDstScore.unShape.stWhc.u32Height = 1;
        pstSoftWareParam->stDstScore.unShape.stWhc.u32Width = u32ClassNum*pstSoftWareParam->u32MaxRoiNum;
    
        pstSoftWareParam->stClassRoiNum.enType = SVP_BLOB_TYPE_S32;
        pstSoftWareParam->stClassRoiNum.u64PhyAddr = u64PhyAddr+u32TmpBufTotalSize+
            u32DstRoiSize+u32DstScoreSize;
        pstSoftWareParam->stClassRoiNum.u64VirAddr = (HI_U64)(pu8VirAddr+u32TmpBufTotalSize+
            u32DstRoiSize+u32DstScoreSize);
        pstSoftWareParam->stClassRoiNum.u32Stride = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum*sizeof(HI_U32));
        pstSoftWareParam->stClassRoiNum.u32Num = 1;
        pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Chn = 1;
        pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Height = 1;
        pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Width = u32ClassNum;
    
        return s32Ret;
    }
    

    主要涉及yolo3的一些个性化设置

    结构体SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S

    /*Yolov3 software parameter*/
    typedef struct hiSAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S
    {
    	HI_U32 u32OriImHeight;
    	HI_U32 u32OriImWidth;
    	HI_U32 u32BboxNumEachGrid;
    	HI_U32 u32ClassNum;
    	HI_U32 au32GridNumHeight[3];
    	HI_U32 au32GridNumWidth[3];
    	HI_U32 u32NmsThresh;
    	HI_U32 u32ConfThresh;
    	HI_U32 u32MaxRoiNum;
    	HI_FLOAT af32Bias[3][6];
    	SVP_MEM_INFO_S stGetResultTmpBuf;
    	SVP_DST_BLOB_S stClassRoiNum;
    	SVP_DST_BLOB_S stDstRoi;
    	SVP_DST_BLOB_S stDstScore;
    } SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S;
    

    - 部分6

    /*Fill src data*/
    stInputDataIdx.u32SegIdx = 0;
    stInputDataIdx.u32NodeIdx = 0;
    s32Ret = SAMPLE_SVP_NNIE_FillSrcData(&stNnieCfg,&s_stYolov3NnieParam,&stInputDataIdx);
    
    static HI_S32 SAMPLE_SVP_NNIE_FillSrcData(SAMPLE_SVP_NNIE_CFG_S* pstNnieCfg,
        SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S* pstInputDataIdx)
    {
        FILE* fp = NULL;
        HI_U32 i =0, j = 0, n = 0;
        HI_U32 u32Height = 0, u32Width = 0, u32Chn = 0, u32Stride = 0, u32Dim = 0;
        HI_U32 u32VarSize = 0;
        HI_S32 s32Ret = HI_SUCCESS;
        HI_U8*pu8PicAddr = NULL;
        HI_U32*pu32StepAddr = NULL;
        HI_U32 u32SegIdx = pstInputDataIdx->u32SegIdx;
        HI_U32 u32NodeIdx = pstInputDataIdx->u32NodeIdx;
        HI_U32 u32TotalStepNum = 0;
    
        /*open file*/
        if (NULL != pstNnieCfg->pszPic)
        {
            fp = fopen(pstNnieCfg->pszPic,"rb");
            SAMPLE_SVP_CHECK_EXPR_RET(NULL == fp,HI_INVALID_VALUE,SAMPLE_SVP_ERR_LEVEL_ERROR,
                "Error, open file failed!\n");
        }
    
        /*get data size*/
        if(SVP_BLOB_TYPE_U8 <= pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].enType &&
            SVP_BLOB_TYPE_YVU422SP >= pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].enType)
        {
            u32VarSize = sizeof(HI_U8);
        }
        else
        {
            u32VarSize = sizeof(HI_U32);
        }
    
        /*fill src data*/
        if(SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].enType)
        {
            u32Dim = pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].unShape.stSeq.u32Dim;
            u32Stride = pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Stride;
            pu32StepAddr = (HI_U32*)(pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].unShape.stSeq.u64VirAddrStep);
            pu8PicAddr = (HI_U8*)(pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64VirAddr);
            for(n = 0; n < pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Num; n++)
            {
                for(i = 0;i < *(pu32StepAddr+n); i++)
                {
                    s32Ret = fread(pu8PicAddr,u32Dim*u32VarSize,1,fp);
                    SAMPLE_SVP_CHECK_EXPR_GOTO(1 != s32Ret,FAIL,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,Read image file failed!\n");
                    pu8PicAddr += u32Stride;
                }
                u32TotalStepNum += *(pu32StepAddr+n);
            }
            SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64PhyAddr,
                (HI_VOID *) pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64VirAddr,
                u32TotalStepNum*u32Stride);
        }
        else
        {
            u32Height = pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].unShape.stWhc.u32Height;
            u32Width = pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].unShape.stWhc.u32Width;
            u32Chn = pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].unShape.stWhc.u32Chn;
            u32Stride = pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Stride;
            pu8PicAddr = (HI_U8*)(pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64VirAddr);
            if(SVP_BLOB_TYPE_YVU420SP== pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].enType)
            {
                for(n = 0; n < pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Num; n++)
                {
                    for(i = 0; i < u32Chn*u32Height/2; i++)
                    {
                        s32Ret = fread(pu8PicAddr,u32Width*u32VarSize,1,fp);
                        SAMPLE_SVP_CHECK_EXPR_GOTO(1 != s32Ret,FAIL,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,Read image file failed!\n");
                        pu8PicAddr += u32Stride;
                    }
                }
            }
            else if(SVP_BLOB_TYPE_YVU422SP== pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].enType)
            {
                for(n = 0; n < pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Num; n++)
                {
                    for(i = 0; i < u32Height*2; i++)
                    {
                        s32Ret = fread(pu8PicAddr,u32Width*u32VarSize,1,fp);
                        SAMPLE_SVP_CHECK_EXPR_GOTO(1 != s32Ret,FAIL,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,Read image file failed!\n");
                        pu8PicAddr += u32Stride;
                    }
                }
            }
            else
            {
                for(n = 0; n < pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Num; n++)
                {
                    for(i = 0;i < u32Chn; i++)
                    {
                        for(j = 0; j < u32Height; j++)
                        {
                            s32Ret = fread(pu8PicAddr,u32Width*u32VarSize,1,fp);
                            SAMPLE_SVP_CHECK_EXPR_GOTO(1 != s32Ret,FAIL,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,Read image file failed!\n");
                            pu8PicAddr += u32Stride;
                        }
                    }
                }
            }
            SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64PhyAddr,
                (HI_VOID *) pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64VirAddr,
                pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Num*u32Chn*u32Height*u32Stride);
        }
    
        fclose(fp);
        return HI_SUCCESS;
    FAIL:
    
        fclose(fp);
        return HI_FAILURE;
    }
    
    

    主要完成了读取图片内容,根据pstInputDataIdx放入pstNnieParam里面。

    - 部分7

    stProcSegIdx.u32SegIdx = 0;
    s32Ret = SAMPLE_SVP_NNIE_Forward(&s_stYolov3NnieParam,&stInputDataIdx,&stProcSegIdx,HI_TRUE);
    
    tatic HI_S32 SAMPLE_SVP_NNIE_Forward(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,
        SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S* pstInputDataIdx,
        SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S* pstProcSegIdx,HI_BOOL bInstant)
    {
        HI_S32 s32Ret = HI_SUCCESS;
        HI_U32 i = 0, j = 0;
        HI_BOOL bFinish = HI_FALSE;
        SVP_NNIE_HANDLE hSvpNnieHandle = 0;
        HI_U32 u32TotalStepNum = 0;
    
        SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].stTskBuf.u64PhyAddr,
            (HI_VOID *) pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].stTskBuf.u64VirAddr,
            pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].stTskBuf.u32Size);
    
        /*set input blob according to node name*/
        if(pstInputDataIdx->u32SegIdx != pstProcSegIdx->u32SegIdx)
        {
            for(i = 0; i < pstNnieParam->pstModel->astSeg[pstProcSegIdx->u32SegIdx].u16SrcNum; i++)
            {
                for(j = 0; j < pstNnieParam->pstModel->astSeg[pstInputDataIdx->u32SegIdx].u16DstNum; j++)
                {
                    if(0 == strncmp(pstNnieParam->pstModel->astSeg[pstInputDataIdx->u32SegIdx].astDstNode[j].szName,
                        pstNnieParam->pstModel->astSeg[pstProcSegIdx->u32SegIdx].astSrcNode[i].szName,
                        SVP_NNIE_NODE_NAME_LEN))
                    {
                        pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astSrc[i] =
                            pstNnieParam->astSegData[pstInputDataIdx->u32SegIdx].astDst[j];
                        break;
                    }
                }
                SAMPLE_SVP_CHECK_EXPR_RET((j == pstNnieParam->pstModel->astSeg[pstInputDataIdx->u32SegIdx].u16DstNum),
                    HI_FAILURE,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,can't find %d-th seg's %d-th src blob!\n",
                    pstProcSegIdx->u32SegIdx,i);
            }
        }
    
        /*NNIE_Forward*/
        //多节点输入输出的CNN类型网络预测
       	//HI_S32 HI_MPI_SVP_NNIE_Forward(SVP_NNIE_HANDLE *phSvpNnieHandle, const SVP_SRC_BLOB_S astSrc[],const SVP_NNIE_MODEL_S 	*pstModel, const SVP_DST_BLOB_S astDst[],const SVP_NNIE_FORWARD_CTRL_S *pstForwardCtrl,HI_BOOL bInstant);
        //phSvpNnieHandle handle指针。输出
    	//astSrc[] 多个节点输入,节点的顺序跟网络描述中的顺序要求一致,支持多帧同时输入。输入
    	//pstModel 网络模型结构体。输入
    	//astDst[] 网络段的多个节点输出,包含用户标记需要上报输出的中间层结果,以及网络段的最终结果。输出
    	//pstForwardCtrl 控制结构体。输入
    	//bInstant 及时返回结果标志。输入
    

    avatar

        s32Ret = HI_MPI_SVP_NNIE_Forward(&hSvpNnieHandle,
            pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astSrc,
            pstNnieParam->pstModel, pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst,
            &pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx], bInstant);
        SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret,s32Ret,SAMPLE_SVP_ERR_LEVEL_ERROR,
            "Error,HI_MPI_SVP_NNIE_Forward failed!\n");
    
        if(bInstant)
        {
            /*Wait NNIE finish*/
            //查询任务是否完成。
    		//HI_S32 HI_MPI_SVP_NNIE_Query(SVP_NNIE_ID_E enNnieId,SVP_NNIE_HANDLE svpNnieHandle,HI_BOOL *pbFinish,HI_BOOL bBlock);
            //enNnieId 任务所运行的NNIE 核指示标志输入
    		//svpNnieHandle handle。输入
    		//pbFinish 是否完成标志。输出
    		//bBlock 是否阻塞查询。输入
            while(HI_ERR_SVP_NNIE_QUERY_TIMEOUT == (s32Ret = HI_MPI_SVP_NNIE_Query(pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].enNnieId,
                hSvpNnieHandle, &bFinish, HI_TRUE)))
            {
                usleep(100);
                SAMPLE_SVP_TRACE(SAMPLE_SVP_ERR_LEVEL_INFO,
                    "HI_MPI_SVP_NNIE_Query Query timeout!\n");
            }
        }
    
        bFinish = HI_FALSE;
        for(i = 0; i < pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].u32DstNum; i++)
        {
            if(SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].enType)
            {
                for(j = 0; j < pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Num; j++)
                {
                    u32TotalStepNum += *((HI_U32*)(pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stSeq.u64VirAddrStep)+j);
                }
                SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64PhyAddr,
                    (HI_VOID *) pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64VirAddr,
                    u32TotalStepNum*pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Stride);
    
            }
            else
            {
    
                SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64PhyAddr,
                    (HI_VOID *) pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64VirAddr,
                    pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Num*
                    pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stWhc.u32Chn*
                    pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stWhc.u32Height*
                    pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Stride);
            }
        }
    
        return s32Ret;
    }
    

    主要进行模型推理,并把它填充进去

    - 部分8

    s32Ret = SAMPLE_SVP_NNIE_Yolov3_GetResult(&s_stYolov3NnieParam,&s_stYolov3SoftwareParam);
    
    *****************************************************************************
    * Prototype :   SAMPLE_SVP_NNIE_Yolov3_GetResult
    * Description : this function is used to Get Yolov3 result
    * Input :      SAMPLE_SVP_NNIE_PARAM_S*               pstNnieParam     [IN]  the pointer to YOLOV3 NNIE parameter
    *              SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S*   pstSoftwareParam [IN]  the pointer to YOLOV3 software parameter
    *
    *
    *
    *
    * Output :
    * Return Value : HI_SUCCESS: Success;Error codes: Failure.
    * Spec :
    * Calls :
    * Called By :
    * History:
    *
    * 1. Date : 2017-11-10
    * Author :
    * Modification : Create
    *
    *****************************************************************************/
    HI_S32 SAMPLE_SVP_NNIE_Yolov3_GetResult(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam,
        SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftwareParam)
    {
        HI_U32 i = 0;
        HI_S32 *aps32InputBlob[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM] = {0};
        HI_U32 au32Stride[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM] = {0};
    
        for(i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++)
        {
            aps32InputBlob[i] = (HI_S32*)pstNnieParam->astSegData[0].astDst[i].u64VirAddr;
            au32Stride[i] = pstNnieParam->astSegData[0].astDst[i].u32Stride;
        }
        return SVP_NNIE_Yolov3_GetResult(aps32InputBlob,pstSoftwareParam->au32GridNumWidth,
            pstSoftwareParam->au32GridNumHeight,au32Stride,pstSoftwareParam->u32BboxNumEachGrid,
            pstSoftwareParam->u32ClassNum,pstSoftwareParam->u32OriImWidth,
            pstSoftwareParam->u32OriImWidth,pstSoftwareParam->u32MaxRoiNum,pstSoftwareParam->u32NmsThresh,
            pstSoftwareParam->u32ConfThresh,pstSoftwareParam->af32Bias,
            (HI_S32*)pstSoftwareParam->stGetResultTmpBuf.u64VirAddr,
            (HI_S32*)pstSoftwareParam->stDstScore.u64VirAddr,
            (HI_S32*)pstSoftwareParam->stDstRoi.u64VirAddr,
            (HI_S32*)pstSoftwareParam->stClassRoiNum.u64VirAddr);
    }
    

    主要完成aps32InputBlob和au32Stride的填充后调用SVP_NNIE_Yolov3_GetResult

    /*****************************************************************************
    *   Prototype    : SVP_NNIE_Yolov3_GetResult
    *   Description  : Yolov3 GetResult function
    * Input :      HI_S32    **pps32InputData     [IN]  pointer to the input data
    *              HI_U32    au32GridNumWidth[]   [IN]  Grid num in width direction
    *              HI_U32    au32GridNumHeight[]  [IN]  Grid num in height direction
    *              HI_U32    au32Stride[]         [IN]  stride of input data
    *              HI_U32    u32EachGridBbox      [IN]  Bbox num of each gird
    *              HI_U32    u32ClassNum          [IN]  class num
    *              HI_U32    u32SrcWidth          [IN]  input image width
    *              HI_U32    u32SrcHeight         [IN]  input image height
    *              HI_U32    u32MaxRoiNum         [IN]  Max output roi num
    *              HI_U32    u32NmsThresh         [IN]  NMS thresh
    *              HI_U32    u32ConfThresh        [IN]  conf thresh
    *              HI_U32    af32Bias[][]         [IN]  bias
    *              HI_U32*   pu32TmpBuf           [IN]  assist buffer
    *              HI_S32    *ps32DstScores       [OUT] dst score
    *              HI_S32    *ps32DstRoi          [OUT] dst roi
    *              HI_S32    *ps32ClassRoiNum     [OUT] class roi num
    *
    *   Output       :
    *   Return Value : HI_FLOAT: max score value.
    *   Spec         :
    *   Calls        :
    *   Called By    :
    *   History:
    *
    *       1.  Date         : 2017-11-14
    *           Author       :
    *           Modification : Create
    *
    *****************************************************************************/
    static HI_S32 SVP_NNIE_Yolov3_GetResult(HI_S32 **pps32InputData,HI_U32 au32GridNumWidth[],
        HI_U32 au32GridNumHeight[],HI_U32 au32Stride[],HI_U32 u32EachGridBbox,HI_U32 u32ClassNum,HI_U32 u32SrcWidth,
        HI_U32 u32SrcHeight,HI_U32 u32MaxRoiNum,HI_U32 u32NmsThresh,HI_U32 u32ConfThresh,
        HI_FLOAT af32Bias[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM][SAMPLE_SVP_NNIE_YOLOV3_EACH_GRID_BIAS_NUM],
        HI_S32* ps32TmpBuf,HI_S32 *ps32DstScore, HI_S32 *ps32DstRoi, HI_S32 *ps32ClassRoiNum)
    {
        HI_S32 *ps32InputBlob = NULL;
        HI_FLOAT *pf32Permute = NULL;
        SAMPLE_SVP_NNIE_YOLOV3_BBOX_S *pstBbox = NULL;
        HI_S32 *ps32AssistBuf = NULL;
        HI_U32 u32TotalBboxNum = 0;
        HI_U32 u32ChnOffset = 0;
        HI_U32 u32HeightOffset = 0;
        HI_U32 u32BboxNum = 0;
        HI_U32 u32GridXIdx;
        HI_U32 u32GridYIdx;
        HI_U32 u32Offset;
        HI_FLOAT f32StartX;
        HI_FLOAT f32StartY;
        HI_FLOAT f32Width;
        HI_FLOAT f32Height;
        HI_FLOAT f32ObjScore;
        HI_U32 u32MaxValueIndex = 0;
        HI_FLOAT f32MaxScore;
        HI_S32 s32ClassScore;
        HI_U32 u32ClassRoiNum;
        HI_U32 i = 0, j = 0, k = 0, c = 0, h = 0, w = 0;
        HI_U32 u32BlobSize = 0;
        HI_U32 u32MaxBlobSize = 0;
    
        //获得最大blob size
        for(i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++)
        {
            u32BlobSize = au32GridNumWidth[i]*au32GridNumHeight[i]*sizeof(HI_U32)*
                SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM*u32EachGridBbox;
            if(u32MaxBlobSize < u32BlobSize)
            {
                u32MaxBlobSize = u32BlobSize;
            }
        }
    
        //获得所有bbox数量
        for(i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++)
        {
            u32TotalBboxNum += au32GridNumWidth[i]*au32GridNumHeight[i]*u32EachGridBbox;
        }
    
        //get each tmpbuf addr
        pf32Permute = (HI_FLOAT*)ps32TmpBuf;
        pstBbox = (SAMPLE_SVP_NNIE_YOLOV3_BBOX_S*)(pf32Permute+u32MaxBlobSize/sizeof(HI_S32));
        ps32AssistBuf = (HI_S32*)(pstBbox+u32TotalBboxNum);
    
        for(i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++)
        {
            //permute
            u32Offset = 0;
            ps32InputBlob = pps32InputData[i];
            u32ChnOffset = au32GridNumHeight[i]*au32Stride[i]/sizeof(HI_S32);
            u32HeightOffset = au32Stride[i]/sizeof(HI_S32);
            for (h = 0; h < au32GridNumHeight[i]; h++)
            {
                for (w = 0; w < au32GridNumWidth[i]; w++)
                {
                    for (c = 0; c < SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM*u32EachGridBbox; c++)
                    {
                        pf32Permute[u32Offset++] = (HI_FLOAT)(ps32InputBlob[c*u32ChnOffset+h*u32HeightOffset+w]) / SAMPLE_SVP_NNIE_QUANT_BASE;
                    }
                }
            }
    
            //decode bbox and calculate score
            for(j = 0; j < au32GridNumWidth[i]*au32GridNumHeight[i]; j++)
            {
                u32GridXIdx = j % au32GridNumWidth[i];
                u32GridYIdx = j / au32GridNumWidth[i];
                for (k = 0; k < u32EachGridBbox; k++)
                {
                    u32MaxValueIndex = 0;
                    u32Offset = (j * u32EachGridBbox + k) * SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM;
                    //decode bbox
                    f32StartX = ((HI_FLOAT)u32GridXIdx + SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset + 0])) / au32GridNumWidth[i];
                    f32StartY = ((HI_FLOAT)u32GridYIdx + SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset + 1])) / au32GridNumHeight[i];
                    f32Width = (HI_FLOAT)(exp(pf32Permute[u32Offset + 2]) * af32Bias[i][2*k]) / u32SrcWidth;
                    f32Height = (HI_FLOAT)(exp(pf32Permute[u32Offset + 3]) * af32Bias[i][2*k + 1]) / u32SrcHeight;
    
                    //calculate score
                    
                    (void)SVP_NNIE_Sigmoid(&pf32Permute[u32Offset + 4], (u32ClassNum+1));
                    f32ObjScore = pf32Permute[u32Offset + 4];
                    f32MaxScore = SVP_NNIE_GetMaxVal(&pf32Permute[u32Offset + 5], u32ClassNum, &u32MaxValueIndex);
                    s32ClassScore = (HI_S32)(f32MaxScore * f32ObjScore*SAMPLE_SVP_NNIE_QUANT_BASE);
    
                    //filter low score roi
                    if (s32ClassScore > u32ConfThresh)
                    {
                        pstBbox[u32BboxNum].f32Xmin= (HI_FLOAT)(f32StartX - f32Width * 0.5f);
                        pstBbox[u32BboxNum].f32Ymin= (HI_FLOAT)(f32StartY - f32Height * 0.5f);
                        pstBbox[u32BboxNum].f32Xmax= (HI_FLOAT)(f32StartX + f32Width * 0.5f);
                        pstBbox[u32BboxNum].f32Ymax= (HI_FLOAT)(f32StartY + f32Height * 0.5f);
                        pstBbox[u32BboxNum].s32ClsScore = s32ClassScore;
                        pstBbox[u32BboxNum].u32Mask= 0;
                        pstBbox[u32BboxNum].u32ClassIdx = (HI_S32)(u32MaxValueIndex+1);
                        u32BboxNum++;
                    }
                }
            }
        }
    
        //quick sort
        (void)SVP_NNIE_Yolo_NonRecursiveArgQuickSort((HI_S32*)pstBbox, 0, u32BboxNum - 1,
            sizeof(SAMPLE_SVP_NNIE_YOLOV3_BBOX_S)/sizeof(HI_U32),4,(SAMPLE_SVP_NNIE_STACK_S*)ps32AssistBuf);
    
        //Yolov3 and Yolov2 have the same Nms operation
        (void)SVP_NNIE_Yolov2_NonMaxSuppression(pstBbox, u32BboxNum, u32NmsThresh, sizeof(SAMPLE_SVP_NNIE_YOLOV3_BBOX_S)/sizeof(HI_U32));
    
        //Get result
        for (i = 1; i < u32ClassNum; i++)
        {
            u32ClassRoiNum = 0;
            for(j = 0; j < u32BboxNum; j++)
            {
                if ((0 == pstBbox[j].u32Mask) && (i == pstBbox[j].u32ClassIdx) && (u32ClassRoiNum < u32MaxRoiNum))
                {
                    *(ps32DstRoi++) = SAMPLE_SVP_NNIE_MAX((HI_S32)(pstBbox[j].f32Xmin*u32SrcWidth), 0);
                    *(ps32DstRoi++) = SAMPLE_SVP_NNIE_MAX((HI_S32)(pstBbox[j].f32Ymin*u32SrcHeight), 0);
                    *(ps32DstRoi++) = SAMPLE_SVP_NNIE_MIN((HI_S32)(pstBbox[j].f32Xmax*u32SrcWidth), u32SrcWidth);
                    *(ps32DstRoi++) = SAMPLE_SVP_NNIE_MIN((HI_S32)(pstBbox[j].f32Ymax*u32SrcHeight), u32SrcHeight);
                    *(ps32DstScore++) = pstBbox[j].s32ClsScore;
                    u32ClassRoiNum++;
                }
            }
            *(ps32ClassRoiNum+i) = u32ClassRoiNum;
        }
    
        return HI_SUCCESS;
    }
    

    结构体SAMPLE_SVP_NNIE_YOLOV3_BBOX_S:

    typedef SAMPLE_SVP_NNIE_YOLOV2_BBOX_S SAMPLE_SVP_NNIE_YOLOV3_BBOX_S;
    typedef struct hiSAMPLE_SVP_NNIE_YOLOV2_BBOX
    {
        HI_FLOAT f32Xmin;
        HI_FLOAT f32Xmax;
        HI_FLOAT f32Ymin;
        HI_FLOAT f32Ymax;
        HI_S32 s32ClsScore;
        HI_U32 u32ClassIdx;
        HI_U32 u32Mask;
    }SAMPLE_SVP_NNIE_YOLOV2_BBOX_S;
    

    - 部分9

    (void)SAMPLE_SVP_NNIE_Detection_PrintResult(&s_stYolov3SoftwareParam.stDstScore,&s_stYolov3SoftwareParam.stDstRoi, &s_stYolov3SoftwareParam.stClassRoiNum,f32PrintResultThresh);
    

    主要完成打印结果

    static HI_S32 SAMPLE_SVP_NNIE_Detection_PrintResult(SVP_BLOB_S *pstDstScore,
        SVP_BLOB_S *pstDstRoi, SVP_BLOB_S *pstClassRoiNum, HI_FLOAT f32PrintResultThresh)
    {
        HI_U32 i = 0, j = 0;
        HI_U32 u32RoiNumBias = 0;
        HI_U32 u32ScoreBias = 0;
        HI_U32 u32BboxBias = 0;
        HI_FLOAT f32Score = 0.0f;
        HI_S32* ps32Score = (HI_S32*)pstDstScore->u64VirAddr;
        HI_S32* ps32Roi = (HI_S32*)pstDstRoi->u64VirAddr;
        HI_S32* ps32ClassRoiNum = (HI_S32*)pstClassRoiNum->u64VirAddr;
        HI_U32 u32ClassNum = pstClassRoiNum->unShape.stWhc.u32Width;
        HI_S32 s32XMin = 0,s32YMin= 0,s32XMax = 0,s32YMax = 0;
    
        u32RoiNumBias += ps32ClassRoiNum[0];
        for (i = 1; i < u32ClassNum; i++)
        {
            u32ScoreBias = u32RoiNumBias;
            u32BboxBias = u32RoiNumBias * SAMPLE_SVP_NNIE_COORDI_NUM;
            /*if the confidence score greater than result threshold, the result will be printed*/
            if((HI_FLOAT)ps32Score[u32ScoreBias] / SAMPLE_SVP_NNIE_QUANT_BASE >=
                f32PrintResultThresh && ps32ClassRoiNum[i]!=0)
            {
                SAMPLE_SVP_TRACE_INFO("==== The %dth class box info====\n", i);
            }
            for (j = 0; j < (HI_U32)ps32ClassRoiNum[i]; j++)
            {
                f32Score = (HI_FLOAT)ps32Score[u32ScoreBias + j] / SAMPLE_SVP_NNIE_QUANT_BASE;
                if (f32Score < f32PrintResultThresh)
                {
                    break;
                }
                s32XMin = ps32Roi[u32BboxBias + j*SAMPLE_SVP_NNIE_COORDI_NUM];
                s32YMin = ps32Roi[u32BboxBias + j*SAMPLE_SVP_NNIE_COORDI_NUM + 1];
                s32XMax = ps32Roi[u32BboxBias + j*SAMPLE_SVP_NNIE_COORDI_NUM + 2];
                s32YMax = ps32Roi[u32BboxBias + j*SAMPLE_SVP_NNIE_COORDI_NUM + 3];
                SAMPLE_SVP_TRACE_INFO("%d %d %d %d %f\n", s32XMin, s32YMin, s32XMax, s32YMax, f32Score);
            }
            u32RoiNumBias += ps32ClassRoiNum[i];
        }
        return HI_SUCCESS;
    }
    
    展开全文
  • easyToUseHisiSVP 为了方便自己使用海思svp而建立的仓库 整理信息 海思ive函数耗时记录,再耗时数据.md里。 测试用的代码,在testcode。代码会陆续整合。
  • 对于海思HI3516/HI3519/3559等开发都有相同的函数
  • SVP(Smart Vision Platform)是海思媒体处理芯片智能视觉异构加速平台。该平台包含了 CPU、DSP、NNIE(Neural Network Inference Engine)等多个硬件处理单元和运行在这些 硬件上 SDK 开发环境,以及配套的工具链开发...

    1. SVP简介

    SVP(Smart Vision Platform)是海思媒体处理芯片智能视觉异构加速平台。该平台包含了 CPU、DSP、NNIE(Neural Network Inference Engine)等多个硬件处理单元和运行在这些 硬件上 SDK 开发环境,以及配套的工具链开发环境。

    2. 开发框架

    SVP 开发框架如图所示。目前 SVP 中包含的硬件处理单元有 CPU、vision DSP、NNIE,其中某些硬件可能有多核。 不同的硬件有不同的配套工具链,用户的应用程序需要结合这些工具的使用来开发。

    3. 硬件资源

    4. 软件开发

    SVP 是海思媒体处理芯片的智能加速平台,因此需要结合海思 MPP 平台一起来进行软件开发。用户可以根据 SVP 的软硬件特性开发出能最大化利用 SVP 硬件资源的视觉处理应用。

    5. 开发环境

    展开全文

空空如也

空空如也

1 2 3 4 5 ... 11
收藏数 217
精华内容 86
关键字:

svp海思