/* * Copyright 2020-2022 NVIDIA Corporation. All rights reserved. * * NOTICE TO LICENSEE: * * This source code and/or documentation ("Licensed Deliverables") are * subject to NVIDIA intellectual property rights under U.S. and * international Copyright laws. * * These Licensed Deliverables contained herein is PROPRIETARY and * CONFIDENTIAL to NVIDIA and is being provided under the terms and * conditions of a form of NVIDIA software license agreement by and * between NVIDIA and Licensee ("License Agreement") or electronically * accepted by Licensee. Notwithstanding any terms or conditions to * the contrary in the License Agreement, reproduction or disclosure * of the Licensed Deliverables to any third party without the express * written consent of NVIDIA is prohibited. * * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THESE LICENSED DELIVERABLES. * * U.S. Government End Users. These Licensed Deliverables are a * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT * 1995), consisting of "commercial computer software" and "commercial * computer software documentation" as such terms are used in 48 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government * only as a commercial end item. Consistent with 48 C.F.R.12.212 and * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all * U.S. Government End Users acquire the Licensed Deliverables with * only those rights set forth herein. * * Any use of the Licensed Deliverables in individual and commercial * software must include, in the user documentation and internal * comments to the code, the above Disclaimer and U.S. Government End * Users Notice. */ #if !defined(_CUPTI_PCSAMPLING_H_) #define _CUPTI_PCSAMPLING_H_ #include #include #include #include "cupti_result.h" #ifndef CUPTIAPI #ifdef _WIN32 #define CUPTIAPI __stdcall #else #define CUPTIAPI #endif #endif #define ACTIVITY_RECORD_ALIGNMENT 8 #if defined(_WIN32) // Windows 32- and 64-bit #define START_PACKED_ALIGNMENT __pragma(pack(push,1)) // exact fit - no padding #define PACKED_ALIGNMENT __declspec(align(ACTIVITY_RECORD_ALIGNMENT)) #define END_PACKED_ALIGNMENT __pragma(pack(pop)) #elif defined(__GNUC__) // GCC #define START_PACKED_ALIGNMENT #define PACKED_ALIGNMENT __attribute__ ((__packed__)) __attribute__ ((aligned (ACTIVITY_RECORD_ALIGNMENT))) #define END_PACKED_ALIGNMENT #else // all other compilers #define START_PACKED_ALIGNMENT #define PACKED_ALIGNMENT #define END_PACKED_ALIGNMENT #endif #if defined(__cplusplus) extern "C" { #endif #if defined(__GNUC__) && defined(CUPTI_LIB) #pragma GCC visibility push(default) #endif /** * \defgroup CUPTI_PCSAMPLING_API CUPTI PC Sampling API * Functions, types, and enums that implement the CUPTI PC Sampling API. * @{ */ #ifndef CUPTI_PCSAMPLING_STRUCT_SIZE #define CUPTI_PCSAMPLING_STRUCT_SIZE(type_, lastfield_) (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_)) #endif #ifndef CUPTI_STALL_REASON_STRING_SIZE #define CUPTI_STALL_REASON_STRING_SIZE 128 #endif /** * \brief PC Sampling collection mode */ typedef enum { /** * INVALID Value */ CUPTI_PC_SAMPLING_COLLECTION_MODE_INVALID = 0, /** * Continuous mode. Kernels are not serialized in this mode. */ CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS = 1, /** * Serialized mode. Kernels are serialized in this mode. */ CUPTI_PC_SAMPLING_COLLECTION_MODE_KERNEL_SERIALIZED = 2, } CUpti_PCSamplingCollectionMode; /** * \brief PC Sampling stall reasons */ typedef struct PACKED_ALIGNMENT { /** * [r] Collected stall reason index */ uint32_t pcSamplingStallReasonIndex; /** * [r] Number of times the PC was sampled with the stallReason. */ uint32_t samples; } CUpti_PCSamplingStallReason; /** * \brief PC Sampling data */ typedef struct PACKED_ALIGNMENT { /** * [w] Size of the data structure. * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are * available in the structure. Used to preserve backward compatibility. */ size_t size; /** * [r] Unique cubin id */ uint64_t cubinCrc; /** * [r] PC offset */ uint64_t pcOffset; /** * The function's unique symbol index in the module. */ uint32_t functionIndex; /** * Padding */ uint32_t pad; /** * [r] The function name. This name string might be shared across all the records * including records from activity APIs representing the same function, and so it should not be * modified or freed until post processing of all the records is done. Once done, it is user’s responsibility to * free the memory using free() function. */ char* functionName; /** * [r] Collected stall reason count */ size_t stallReasonCount; /** * [r] Stall reason id * Total samples */ CUpti_PCSamplingStallReason *stallReason; } CUpti_PCSamplingPCData; /** * \brief PC Sampling output data format */ typedef enum { CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_INVALID = 0, /** * HW buffer data will be parsed during collection of data */ CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_PARSED = 1, } CUpti_PCSamplingOutputDataFormat; /** * \brief Collected PC Sampling data * */ typedef struct PACKED_ALIGNMENT { /** * [w] Size of the data structure. * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are * available in the structure. Used to preserve backward compatibility. */ size_t size; /** * [w] Number of PCs to be collected */ size_t collectNumPcs; /** * [r] Number of samples collected across all PCs. * It includes samples for user modules, samples for non-user kernels and dropped samples. * It includes counts for all non selected stall reasons. * CUPTI does not provide PC records for non-user kernels. * CUPTI does not provide PC records for instructions for which all selected stall reason metrics counts are zero. */ uint64_t totalSamples; /** * [r] Number of samples that were dropped by hardware due to backpressure/overflow. */ uint64_t droppedSamples; /** * [r] Number of PCs collected */ size_t totalNumPcs; /** * [r] Number of PCs available for collection */ size_t remainingNumPcs; /** * [r] Unique identifier for each range. * Data collected across multiple ranges in multiple buffers can be identified using range id. */ uint64_t rangeId; /** * [r] Profiled PC data * This data struct should have enough memory to collect number of PCs mentioned in \brief collectNumPcs */ CUpti_PCSamplingPCData *pPcData; /** * [r] Number of samples collected across all non user kernels PCs. * It includes samples for non-user kernels. * It includes counts for all non selected stall reasons as well. * CUPTI does not provide PC records for non-user kernels. */ uint64_t nonUsrKernelsTotalSamples; } CUpti_PCSamplingData; /** * \brief PC Sampling configuration attributes * * PC Sampling configuration attribute types. These attributes can be read * using \ref cuptiPCSamplingGetConfigurationAttribute and can be written * using \ref cuptiPCSamplingSetConfigurationAttribute. Attributes marked * [r] can only be read using \ref cuptiPCSamplingGetConfigurationAttribute * [w] can only be written using \ref cuptiPCSamplingSetConfigurationAttribute * [rw] can be read using \ref cuptiPCSamplingGetConfigurationAttribute and * written using \ref cuptiPCSamplingSetConfigurationAttribute */ typedef enum { CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_INVALID = 0, /** * [rw] Sampling period for PC Sampling. * DEFAULT - CUPTI defined value based on number of SMs * Valid values for the sampling * periods are between 5 to 31 both inclusive. This will set the * sampling period to (2^samplingPeriod) cycles. * For e.g. for sampling period = 5 to 31, cycles = 32, 64, 128,..., 2^31 * Value is a uint32_t */ CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_PERIOD = 1, /** * [w] Number of stall reasons to collect. * DEFAULT - All stall reasons will be collected * Value is a size_t * [w] Stall reasons to collect * DEFAULT - All stall reasons will be collected * Input value should be a pointer pointing to array of stall reason indexes * containing all the stall reason indexes to collect. */ CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON = 2, /** * [rw] Size of SW buffer for raw PC counter data downloaded from HW buffer * DEFAULT - 1 MB, which can accommodate approximately 5500 PCs * with all stall reasons * Approximately it takes 16 Bytes (and some fixed size memory) * to accommodate one PC with one stall reason * For e.g. 1 PC with 1 stall reason = 32 Bytes * 1 PC with 2 stall reason = 48 Bytes * 1 PC with 4 stall reason = 96 Bytes * Value is a size_t */ CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SCRATCH_BUFFER_SIZE = 3, /** * [rw] Size of HW buffer in bytes * DEFAULT - 512 MB * If sampling period is too less, HW buffer can overflow * and drop PC data * Value is a size_t */ CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_HARDWARE_BUFFER_SIZE = 4, /** * [rw] PC Sampling collection mode * DEFAULT - CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS * Input value should be of type \ref CUpti_PCSamplingCollectionMode. */ CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_COLLECTION_MODE = 5, /** * [rw] Control over PC Sampling data collection range * Default - 0 * 1 - Allows user to start and stop PC Sampling using APIs - * \ref cuptiPCSamplingStart() - Start PC Sampling * \ref cuptiPCSamplingStop() - Stop PC Sampling * Value is a uint32_t */ CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL = 6, /** * [w] Value for output data format * Default - CUPTI_PC_SAMPLING_OUTPUT_DATA_FORMAT_PARSED * Input value should be of type \ref CUpti_PCSamplingOutputDataFormat. */ CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_OUTPUT_DATA_FORMAT = 7, /** * [w] Data buffer to hold collected PC Sampling data PARSED_DATA * Default - none. * Buffer type is void * which can point to PARSED_DATA * Refer \ref CUpti_PCSamplingData for buffer format for PARSED_DATA */ CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_DATA_BUFFER = 8, CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_FORCE_INT = 0x7fffffff, } CUpti_PCSamplingConfigurationAttributeType; /** * \brief PC sampling configuration information structure * * This structure provides \ref CUpti_PCSamplingConfigurationAttributeType which can be configured * or queried for PC sampling configuration */ typedef struct { /** * Refer \ref CUpti_PCSamplingConfigurationAttributeType for all supported attribute types */ CUpti_PCSamplingConfigurationAttributeType attributeType; /* * Configure or query status for \p attributeType * CUPTI_SUCCESS for valid \p attributeType and \p attributeData * CUPTI_ERROR_INVALID_OPERATION if \p attributeData is not valid * CUPTI_ERROR_INVALID_PARAMETER if \p attributeType is not valid */ CUptiResult attributeStatus; union { /** * Invalid Value */ struct { uint64_t data[3]; } invalidData; /** * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_PERIOD */ struct { uint32_t samplingPeriod; } samplingPeriodData; /** * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON */ struct { size_t stallReasonCount; uint32_t *pStallReasonIndex; } stallReasonData; /** * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SCRATCH_BUFFER_SIZE */ struct { size_t scratchBufferSize; } scratchBufferSizeData; /** * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_HARDWARE_BUFFER_SIZE */ struct { size_t hardwareBufferSize; } hardwareBufferSizeData; /** * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_COLLECTION_MODE */ struct { CUpti_PCSamplingCollectionMode collectionMode; } collectionModeData; /** * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL */ struct { uint32_t enableStartStopControl; } enableStartStopControlData; /** * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_OUTPUT_DATA_FORMAT */ struct { CUpti_PCSamplingOutputDataFormat outputDataFormat; } outputDataFormatData; /** * Refer \ref CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_SAMPLING_DATA_BUFFER */ struct { void *samplingDataBuffer; } samplingDataBufferData; } attributeData; } CUpti_PCSamplingConfigurationInfo; /** * \brief PC sampling configuration structure * * This structure configures PC sampling using \ref cuptiPCSamplingSetConfigurationAttribute * and queries PC sampling default configuration using \ref cuptiPCSamplingGetConfigurationAttribute */ typedef struct { /** * [w] Size of the data structure i.e. CUpti_PCSamplingConfigurationInfoParamsSize * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are * available in the structure. Used to preserve backward compatibility. */ size_t size; /** * [w] Assign to NULL */ void* pPriv; /** * [w] CUcontext */ CUcontext ctx; /** * [w] Number of attributes to configure using \ref cuptiPCSamplingSetConfigurationAttribute or query * using \ref cuptiPCSamplingGetConfigurationAttribute */ size_t numAttributes; /** * Refer \ref CUpti_PCSamplingConfigurationInfo */ CUpti_PCSamplingConfigurationInfo *pPCSamplingConfigurationInfo; } CUpti_PCSamplingConfigurationInfoParams; #define CUpti_PCSamplingConfigurationInfoParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingConfigurationInfoParams,pPCSamplingConfigurationInfo) /** * \brief Write PC Sampling configuration attribute. * * \param pParams A pointer to \ref CUpti_PCSamplingConfigurationInfoParams * containing PC sampling configuration. * * \retval CUPTI_SUCCESS * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called with * some invalid \p attrib. * \retval CUPTI_ERROR_INVALID_PARAMETER if attribute \p value is not valid * or any \p pParams is not valid * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device * does not support the API */ CUptiResult CUPTIAPI cuptiPCSamplingSetConfigurationAttribute(CUpti_PCSamplingConfigurationInfoParams *pParams); /** * \brief Read PC Sampling configuration attribute. * * \param pParams A pointer to \ref CUpti_PCSamplingConfigurationInfoParams * containing PC sampling configuration. * * \retval CUPTI_SUCCESS * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called with * some invalid attribute. * \retval CUPTI_ERROR_INVALID_PARAMETER if \p attrib is not valid * or any \p pParams is not valid * \retval CUPTI_ERROR_PARAMETER_SIZE_NOT_SUFFICIENT indicates that * the \p value buffer is too small to hold the attribute value * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device * does not support the API */ CUptiResult CUPTIAPI cuptiPCSamplingGetConfigurationAttribute(CUpti_PCSamplingConfigurationInfoParams *pParams); /** * \brief Params for cuptiPCSamplingEnable */ typedef struct { /** * [w] Size of the data structure i.e. CUpti_PCSamplingGetDataParamsSize * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are * available in the structure. Used to preserve backward compatibility. */ size_t size; /** * [w] Assign to NULL */ void* pPriv; /** * [w] CUcontext */ CUcontext ctx; /** * \param pcSamplingData Data buffer to hold collected PC Sampling data PARSED_DATA * Buffer type is void * which can point to PARSED_DATA * Refer \ref CUpti_PCSamplingData for buffer format for PARSED_DATA */ void *pcSamplingData; } CUpti_PCSamplingGetDataParams; #define CUpti_PCSamplingGetDataParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingGetDataParams, pcSamplingData) /** * \brief Flush GPU PC sampling data periodically. * * Flushing of GPU PC Sampling data is required at following point to maintain uniqueness of PCs: * For \brief CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS, after every module load-unload-load * For \brief CUPTI_PC_SAMPLING_COLLECTION_MODE_KERNEL_SERIALIZED, after every kernel ends * If configuration option \brief CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL * is enabled, then after every range end i.e. \brief cuptiPCSamplingStop() * * If application is profiled in \brief CUPTI_PC_SAMPLING_COLLECTION_MODE_CONTINUOUS, with disabled * \brief CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL, and there is no module unload, * user can collect data in two ways: * Use \brief cuptiPCSamplingGetData() API periodically * Use \brief cuptiPCSamplingDisable() on application exit and read GPU PC sampling data from sampling * data buffer passed during configuration. * Note: In case, \brief cuptiPCSamplingGetData() API is not called periodically, then sampling data buffer * passed during configuration should be large enough to hold all PCs data. * \brief cuptiPCSamplingGetData() API never does device synchronization. * It is possible that when the API is called there is some unconsumed data from the HW buffer. In this case * CUPTI provides only the data available with it at that moment. * * \param Refer \ref CUpti_PCSamplingGetDataParams * * \retval CUPTI_SUCCESS * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called without * enabling PC sampling. * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device * does not support the API */ CUptiResult CUPTIAPI cuptiPCSamplingGetData(CUpti_PCSamplingGetDataParams *pParams); /** * \brief Params for cuptiPCSamplingEnable */ typedef struct { /** * [w] Size of the data structure i.e. CUpti_PCSamplingEnableParamsSize * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are * available in the structure. Used to preserve backward compatibility. */ size_t size; /** * [w] Assign to NULL */ void* pPriv; /** * [w] CUcontext */ CUcontext ctx; } CUpti_PCSamplingEnableParams; #define CUpti_PCSamplingEnableParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingEnableParams, ctx) /** * \brief Enable PC sampling. * * \param Refer \ref CUpti_PCSamplingEnableParams * * \retval CUPTI_SUCCESS * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device * does not support the API */ CUptiResult CUPTIAPI cuptiPCSamplingEnable(CUpti_PCSamplingEnableParams *pParams); /** * \brief Params for cuptiPCSamplingDisable */ typedef struct { /** * [w] Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are * available in the structure. Used to preserve backward compatibility. */ size_t size; /** * [w] Assign to NULL */ void* pPriv; /** * [w] CUcontext */ CUcontext ctx; } CUpti_PCSamplingDisableParams; #define CUpti_PCSamplingDisableParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingDisableParams, ctx) /** * \brief Disable PC sampling. * * For application which doesn't destroy the CUDA context explicitly, * this API does the PC Sampling tear-down, joins threads and copies PC records in the buffer provided * during the PC sampling configuration. PC records which can't be accommodated in the buffer are discarded. * * \param Refer \ref CUpti_PCSamplingDisableParams * * \retval CUPTI_SUCCESS * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device * does not support the API */ CUptiResult CUPTIAPI cuptiPCSamplingDisable(CUpti_PCSamplingDisableParams *pParams); /** * \brief Params for cuptiPCSamplingStart */ typedef struct { /** * [w] Size of the data structure i.e. CUpti_PCSamplingStartParamsSize * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are * available in the structure. Used to preserve backward compatibility. */ size_t size; /** * [w] Assign to NULL */ void* pPriv; /** * [w] CUcontext */ CUcontext ctx; } CUpti_PCSamplingStartParams; #define CUpti_PCSamplingStartParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingStartParams, ctx) /** * \brief Start PC sampling. * * User can collect PC Sampling data for user-defined range specified by Start/Stop APIs. * This API can be used to mark starting of range. Set configuration option * \brief CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL to use this API. * * \param Refer \ref CUpti_PCSamplingStartParams * * \retval CUPTI_SUCCESS * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called with * incorrect PC Sampling configuration. * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device * does not support the API */ CUptiResult CUPTIAPI cuptiPCSamplingStart(CUpti_PCSamplingStartParams *pParams); /** * \brief Params for cuptiPCSamplingStop */ typedef struct { /** * [w] Size of the data structure i.e. CUpti_PCSamplingStopParamsSize * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are * available in the structure. Used to preserve backward compatibility. */ size_t size; /** * [w] Assign to NULL */ void* pPriv; /** * [w] CUcontext */ CUcontext ctx; } CUpti_PCSamplingStopParams; #define CUpti_PCSamplingStopParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingStopParams, ctx) /** * \brief Stop PC sampling. * * User can collect PC Sampling data for user-defined range specified by Start/Stop APIs. * This API can be used to mark end of range. Set configuration option * \brief CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_ENABLE_START_STOP_CONTROL to use this API. * * \param Refer \ref CUpti_PCSamplingStopParams * * \retval CUPTI_SUCCESS * \retval CUPTI_ERROR_INVALID_OPERATION if this API is called with * incorrect PC Sampling configuration. * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device * does not support the API */ CUptiResult CUPTIAPI cuptiPCSamplingStop(CUpti_PCSamplingStopParams *pParams); /** * \brief Params for cuptiPCSamplingGetNumStallReasons */ typedef struct { /** * [w] Size of the data structure i.e. CUpti_PCSamplingGetNumStallReasonsParamsSize * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are * available in the structure. Used to preserve backward compatibility. */ size_t size; /** * [w] Assign to NULL */ void* pPriv; /** * [w] CUcontext */ CUcontext ctx; /** * [r] Number of stall reasons */ size_t *numStallReasons; } CUpti_PCSamplingGetNumStallReasonsParams; #define CUpti_PCSamplingGetNumStallReasonsParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingGetNumStallReasonsParams, numStallReasons) /** * \brief Get PC sampling stall reason count. * * \param Refer \ref CUpti_PCSamplingGetNumStallReasonsParams * * \retval CUPTI_SUCCESS * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device * does not support the API */ CUptiResult CUPTIAPI cuptiPCSamplingGetNumStallReasons(CUpti_PCSamplingGetNumStallReasonsParams *pParams); /** * \brief Params for cuptiPCSamplingGetStallReasons */ typedef struct { /** * [w] Size of the data structure i.e. CUpti_PCSamplingGetStallReasonsParamsSize * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are * available in the structure. Used to preserve backward compatibility. */ size_t size; /** * [w] Assign to NULL */ void* pPriv; /** * [w] CUcontext */ CUcontext ctx; /** * [w] Number of stall reasons */ size_t numStallReasons; /** * [r] Stall reason index */ uint32_t *stallReasonIndex; /** * [r] Stall reasons name */ char **stallReasons; } CUpti_PCSamplingGetStallReasonsParams; #define CUpti_PCSamplingGetStallReasonsParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_PCSamplingGetStallReasonsParams, stallReasons) /** * \brief Get PC sampling stall reasons. * * \param Refer \ref CUpti_PCSamplingGetStallReasonsParams * * \retval CUPTI_SUCCESS * \retval CUPTI_ERROR_INVALID_PARAMETER if any \p pParams is not valid * \retval CUPTI_ERROR_NOT_SUPPORTED indicates that the system/device * does not support the API */ CUptiResult CUPTIAPI cuptiPCSamplingGetStallReasons(CUpti_PCSamplingGetStallReasonsParams *pParams); /** * \brief Params for cuptiGetSassToSourceCorrelation */ typedef struct { /** * [w] Size of the data structure i.e. CUpti_GetSassToSourceCorrelationParamsSize * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are * available in the structure. Used to preserve backward compatibility. */ size_t size; /** * [w] Pointer to cubin binary where function belongs. */ const void* cubin; /** * [w] Function name to which PC belongs. */ const char *functionName; /** * [w] Size of cubin binary. */ size_t cubinSize; /** * [r] Line number in the source code. */ uint32_t lineNumber; /** * [w] PC offset */ uint64_t pcOffset; /** * [r] Path for the source file. */ char *fileName; /** * [r] Path for the directory of source file. */ char *dirName; } CUpti_GetSassToSourceCorrelationParams; #define CUpti_GetSassToSourceCorrelationParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_GetSassToSourceCorrelationParams, dirName) /** * \brief SASS to Source correlation. * * \param Refer \ref CUpti_GetSassToSourceCorrelationParams * * It is expected from user to free allocated memory for fileName and dirName after use. * * \retval CUPTI_SUCCESS * \retval CUPTI_ERROR_INVALID_PARAMETER if either of the parameters cubin or functionName * is NULL or cubinSize is zero or size field is not set correctly. * \retval CUPTI_ERROR_INVALID_MODULE provided cubin is invalid. * \retval CUPTI_ERROR_UNKNOWN an internal error occurred. * This error code is also used for cases when the function is not present in the module. * A better error code will be returned in the future release. */ CUptiResult CUPTIAPI cuptiGetSassToSourceCorrelation(CUpti_GetSassToSourceCorrelationParams *pParams); /** * \brief Params for cuptiGetCubinCrc */ typedef struct { /** * [w] Size of configuration structure. * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are * available in the structure. Used to preserve backward compatibility. */ size_t size; /** * [w] Size of cubin binary. */ size_t cubinSize; /** * [w] Pointer to cubin binary */ const void* cubin; /** * [r] Computed CRC will be stored in it. */ uint64_t cubinCrc; } CUpti_GetCubinCrcParams; #define CUpti_GetCubinCrcParamsSize CUPTI_PCSAMPLING_STRUCT_SIZE(CUpti_GetCubinCrcParams, cubinCrc) /** * \brief Get the CRC of cubin. * * This function returns the CRC of provided cubin binary. * * \param Refer \ref CUpti_GetCubinCrcParams * * \retval CUPTI_SUCCESS * \retval CUPTI_ERROR_INVALID_PARAMETER if parameter cubin is NULL or * provided cubinSize is zero or size field is not set. */ CUptiResult CUPTIAPI cuptiGetCubinCrc(CUpti_GetCubinCrcParams *pParams); /** * \brief Function type for callback used by CUPTI to request crc of * loaded module. * * This callback function ask for crc of provided module in function. * The provided crc will be stored in PC sampling records i.e. in the field 'cubinCrc' of the PC sampling * struct CUpti_PCSamplingPCData. The CRC is uses during the offline source correlation to uniquely identify the module. * * \param cubin The pointer to cubin binary * \param cubinSize The size of cubin binary. * \param cubinCrc Returns the computed crc of cubin. */ typedef void (CUPTIAPI *CUpti_ComputeCrcCallbackFunc)( const void* cubin, size_t cubinSize, uint64_t *cubinCrc); /** * \brief Register callback function with CUPTI to use * your own algorithm to compute cubin crc. * * This function registers a callback function and it gets called * from CUPTI when a CUDA module is loaded. * * \param funcComputeCubinCrc callback is invoked when a CUDA module * is loaded. * * \retval CUPTI_SUCCESS * \retval CUPTI_ERROR_INVALID_PARAMETER if \p funcComputeCubinCrc is NULL. */ CUptiResult CUPTIAPI cuptiRegisterComputeCrcCallback(CUpti_ComputeCrcCallbackFunc funcComputeCubinCrc); /** @} */ /* END CUPTI_PCSAMPLING_API */ #if defined(__GNUC__) && defined(CUPTI_LIB) #pragma GCC visibility pop #endif #if defined(__cplusplus) } #endif #endif /*_CUPTI_PCSAMPLING_H_*/