/*
 * Decompiled with CFR 0.152.
 */
package com.nvidia.viper.analysis;

import com.nvidia.viper.ViperException;
import com.nvidia.viper.ViperExceptionHandler;
import com.nvidia.viper.ViperMessages;
import com.nvidia.viper.jni.CuException;
import com.nvidia.viper.jni.CudaOccupancyDeviceProp;
import com.nvidia.viper.jni.CudaOccupancyDeviceState;
import com.nvidia.viper.jni.CudaOccupancyFuncAttribute;
import com.nvidia.viper.jni.CudaOccupancyResult;
import com.nvidia.viper.jni.NativeCuda;
import com.nvidia.viper.model.ComputeLimits;
import com.nvidia.viper.model.TimelineDevice;
import com.nvidia.viper.model.TimelineIntervalKernel;

public class OccupancyCalculator {
    static final double GRID_SIZE_LIMITED_THRESHOLD = 80.0;
    private int warpsPerBlock;
    private boolean isGridSizeLimited;
    private final ComputeLimits computeLimits;
    private final TimelineIntervalKernel kernel;
    private final TimelineDevice deviceTimeline;
    private CudaOccupancyResult occupancyResult;

    public OccupancyCalculator(TimelineIntervalKernel kernel, TimelineDevice deviceTimeline) {
        this(kernel, deviceTimeline, kernel.getThreadsPerBlock(), kernel.getRegistersPerThread(), kernel.getDynamicSharedMemory(), kernel.getStaticSharedMemory());
    }

    public OccupancyCalculator(TimelineIntervalKernel kernel, TimelineDevice deviceTimeline, Integer threadsPerBlock, Integer registersPerThread, Integer staticSharedMemoryUsage, Integer dynamicSharedMemoryUsage) {
        this.kernel = kernel;
        this.deviceTimeline = deviceTimeline;
        int computeMajor = deviceTimeline.getComputeCapabilityMajor();
        int computeMinor = deviceTimeline.getComputeCapabilityMinor();
        this.computeLimits = ComputeLimits.getComputeLimits(computeMajor, computeMinor);
        this.isGridSizeLimited = false;
        if (threadsPerBlock != null && registersPerThread != null && staticSharedMemoryUsage != null && dynamicSharedMemoryUsage != null && kernel.getCacheConfigExecuted() != null && this.computeLimits != null) {
            long waves;
            this.warpsPerBlock = (int)Math.ceil((double)threadsPerBlock.intValue() / (double)this.computeLimits.threadsPerWarp);
            CudaOccupancyDeviceProp occupancyDeviceProp = new CudaOccupancyDeviceProp(computeMajor, computeMinor, this.computeLimits.maxBlockSize, this.computeLimits.threadsPerSM, this.computeLimits.regsPerBlock, this.computeLimits.regsPerSM, this.computeLimits.threadsPerWarp, this.computeLimits.sharedMemPerBlock, this.computeLimits.sharedMemPerSM, deviceTimeline.getNumMultiprocessors());
            CudaOccupancyFuncAttribute occupancyFuncAttributes = new CudaOccupancyFuncAttribute(threadsPerBlock, registersPerThread, staticSharedMemoryUsage);
            CudaOccupancyDeviceState occupancyDeviceState = new CudaOccupancyDeviceState(kernel.getCacheConfigExecuted().getCode());
            this.occupancyResult = null;
            try {
                this.occupancyResult = NativeCuda.cudaOccupancyMaxActiveBlocksPerMultiprocessor(occupancyDeviceProp, occupancyFuncAttributes, threadsPerBlock, dynamicSharedMemoryUsage, occupancyDeviceState);
            }
            catch (CuException exception) {
                ViperExceptionHandler.handle(ViperMessages.Occupancy_Fail_Title, ViperMessages.Occupancy_Fail_Message, exception);
            }
            int activeBlocksPerDevice = this.occupancyResult.getActiveBlocksPerSM() * deviceTimeline.getNumMultiprocessors();
            if (activeBlocksPerDevice > 0 && (waves = (kernel.getBlocksPerGrid() + (long)activeBlocksPerDevice - 1L) / (long)activeBlocksPerDevice) > 0L) {
                double deviceOccupancy = 100.0 * (double)kernel.getBlocksPerGrid() / (double)(waves * (long)activeBlocksPerDevice);
                this.isGridSizeLimited = deviceOccupancy < 80.0 && deviceOccupancy < this.occupancyResult.getOccupancy() * 100.0;
            }
        }
    }

    public Double getTheoreticOccupancy() {
        return this.occupancyResult == null ? null : Double.valueOf(this.occupancyResult.getOccupancy());
    }

    public Limiter getLimiter() {
        if (this.occupancyResult == null) {
            return null;
        }
        if (this.occupancyResult.getWarpsPerSM() == this.computeLimits.warpsPerSM) {
            return Limiter.NONE;
        }
        int limitingfactors = this.occupancyResult.getLimitingFactors();
        if (Limiter.REGISTER.isSet(limitingfactors)) {
            return Limiter.REGISTER;
        }
        if (Limiter.SHARED_MEMORY.isSet(limitingfactors)) {
            return Limiter.SHARED_MEMORY;
        }
        if (Limiter.BLOCK.isSet(limitingfactors)) {
            return Limiter.BLOCK;
        }
        if (this.occupancyResult.getBlockLimitWarps() > this.computeLimits.blocksPerSM) {
            return Limiter.BLOCK;
        }
        return Limiter.NONE;
    }

    public Integer getBlocksPerSM() {
        return this.occupancyResult == null ? null : Integer.valueOf(this.occupancyResult.getActiveBlocksPerSM());
    }

    public Integer getWarpsPerSM() {
        return this.occupancyResult == null ? null : Integer.valueOf(this.occupancyResult.getWarpsPerSM());
    }

    public boolean isGridSizeLimited() {
        return this.isGridSizeLimited;
    }

    public int getBlockLimit() {
        return this.occupancyResult == null ? 0 : this.occupancyResult.getBlockLimitWarps();
    }

    public int getRegisterLimit() {
        return this.occupancyResult == null ? 0 : this.occupancyResult.getBlockLimitRegisters();
    }

    public int getSharedMemoryLimit() {
        return this.occupancyResult == null ? 0 : this.occupancyResult.getBlockLimitSharedMem();
    }

    public int getWarpsPerBlock() {
        return this.warpsPerBlock;
    }

    public int getRegistersPerBlock() {
        return this.occupancyResult == null ? 0 : this.occupancyResult.getAllocatedRegsPerBlock();
    }

    public Integer getWarpsPerSMForBlockSize(int blockSize) {
        OccupancyCalculator calc = new OccupancyCalculator(this.kernel, this.deviceTimeline, blockSize, this.kernel.getRegistersPerThread(), this.kernel.getStaticSharedMemory(), this.kernel.getDynamicSharedMemory());
        return calc.getWarpsPerSM();
    }

    public Integer getWarpsPerSMForRegistersPerThread(int registersPerThread) {
        OccupancyCalculator calc = new OccupancyCalculator(this.kernel, this.deviceTimeline, this.kernel.getThreadsPerBlock(), registersPerThread, this.kernel.getStaticSharedMemory(), this.kernel.getDynamicSharedMemory());
        return calc.getWarpsPerSM();
    }

    public Integer getWarpsPerSMForSharedMemoryUsage(int sharedMemory) throws ViperException {
        OccupancyCalculator calc = new OccupancyCalculator(this.kernel, this.deviceTimeline, this.kernel.getThreadsPerBlock(), this.kernel.getRegistersPerThread(), sharedMemory, 0);
        return calc.getWarpsPerSM();
    }

    public ComputeLimits getComputeLimits() {
        return this.computeLimits;
    }

    public static enum Limiter {
        NONE(0, "<none>"),
        BLOCK(1, "Block Size"),
        REGISTER(2, "Registers"),
        SHARED_MEMORY(4, "Shared Memory");

        private int code;
        public String label;

        private Limiter(int code, String label) {
            this.code = code;
            this.label = label;
        }

        public String toString() {
            return this.label;
        }

        public boolean isKnownLimit() {
            return !NONE.equals((Object)this);
        }

        private boolean isSet(int limiter) {
            return (this.code & limiter) != 0;
        }
    }
}

