/*
 * Decompiled with CFR 0.152.
 */
package com.nvidia.viper.analysis;

import com.nvidia.viper.ViperExceptionHandler;
import com.nvidia.viper.analysis.AnalysisBase;
import com.nvidia.viper.analysis.AnalysisDescriptor;
import com.nvidia.viper.analysis.AnalysisResult;
import com.nvidia.viper.analysis.AnalysisResultIntervalNoData;
import com.nvidia.viper.analysis.AnalysisResultKernelExecutionEfficiency;
import com.nvidia.viper.analysis.AnalysisResultKernelFlops;
import com.nvidia.viper.analysis.AnalysisResultKernelInstructionClass;
import com.nvidia.viper.analysis.AnalysisResultKernelUnitBusy;
import com.nvidia.viper.analysis.AnalysisStage;
import com.nvidia.viper.jni.CuptiMetricValueUtilizationLevel;
import com.nvidia.viper.model.Analysis;
import com.nvidia.viper.model.Session;
import com.nvidia.viper.model.Timeline;
import com.nvidia.viper.model.TimelineDevice;
import com.nvidia.viper.model.TimelineIntervalKernel;
import com.nvidia.viper.model.TimelineKind;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class KernelInstanceSMAnalysis
extends AnalysisBase {
    public static final double BLOCK_SIZE_WARP_EFFICIENCY_THRESHOLD = 0.9;
    private boolean needNoData;

    @Override
    public boolean run(Session session, List<AnalysisResult> results, boolean generateAllResults) {
        boolean ret = true;
        Analysis analysis = session.getAnalysis();
        if (analysis != null) {
            TimelineIntervalKernel interval = AnalysisStage.getHostLaunchedKernel(analysis.getActiveScopeInterval());
            if (interval == null || generateAllResults) {
                results.add(new AnalysisResult(AnalysisDescriptor.KERNEL_COMPUTE_NO_KERNEL));
            }
            if (interval instanceof TimelineIntervalKernel) {
                this.needNoData = false;
                TimelineIntervalKernel kernel = interval;
                boolean warpRet = this.analyzeWarpEfficiency(session, kernel, results, generateAllResults);
                boolean busyRet = this.analyzeBusy(session, kernel, results, generateAllResults);
                boolean instClassRet = this.analyzeInstructionClass(session, kernel, results, generateAllResults);
                boolean flopsRet = this.analyzeFlops(session, kernel, results, generateAllResults);
                boolean bl = ret = warpRet && busyRet && instClassRet && flopsRet;
                if (this.needNoData) {
                    AnalysisResultIntervalNoData noDataSM = new AnalysisResultIntervalNoData(session, AnalysisDescriptor.KERNEL_COMPUTE_NO_DATA);
                    noDataSM.addInterval(kernel, null);
                    results.add(noDataSM);
                }
            }
        }
        return ret;
    }

    private double getMaxPotentialWarpEfficiency(TimelineIntervalKernel kernel, int threadsPerWarp) {
        int total = kernel.getThreadsPerBlock();
        int warps = (total + threadsPerWarp - 1) / threadsPerWarp;
        int capacity = warps * threadsPerWarp;
        return (double)total / (double)capacity;
    }

    private boolean analyzeWarpEfficiency(Session session, TimelineIntervalKernel kernel, List<AnalysisResult> results, boolean generateAllResults) {
        boolean ret = true;
        Timeline timeline = kernel.getPrimaryTimeline();
        TimelineDevice device = (TimelineDevice)timeline.getAncestor(TimelineKind.DEVICE);
        Number execEff = this.getAggregateMetricValue(kernel, "warp_execution_efficiency");
        Number execNonpredEff = this.getAggregateMetricValue(kernel, "warp_nonpred_execution_efficiency");
        if (execEff == null) {
            this.needNoData = true;
            if (device.getComputeCapabilityMajor() != 3 || device.getComputeCapabilityMinor() != 0) {
                ret = false;
            }
        } else {
            Number bestWarpExecNumber;
            int tpw;
            int threadsPerWarp = 32;
            if (device != null && (tpw = device.getNumThreadsPerWarp()) > 0) {
                threadsPerWarp = tpw;
            }
            Number number = bestWarpExecNumber = execNonpredEff != null ? (Number)execNonpredEff : (Number)execEff;
            if (bestWarpExecNumber.doubleValue() < 80.0 || generateAllResults) {
                double maxWarpEff = this.getMaxPotentialWarpEfficiency(kernel, threadsPerWarp);
                AnalysisResultKernelExecutionEfficiency eeResult = new AnalysisResultKernelExecutionEfficiency(kernel, execEff, execNonpredEff, maxWarpEff);
                results.add(eeResult);
            }
        }
        return ret;
    }

    private boolean analyzeBusy(Session session, TimelineIntervalKernel kernel, List<AnalysisResult> results, boolean generateAllResults) {
        boolean ret = false;
        Timeline timeline = kernel.getPrimaryTimeline();
        TimelineDevice device = (TimelineDevice)timeline.getAncestor(TimelineKind.DEVICE);
        AnalysisResultKernelUnitBusy.FunctionUnit[] availFus = AnalysisResultKernelUnitBusy.FunctionUnit.getFunctionUnits(device.getComputeCapabilityMajor(), device.getComputeCapabilityMinor());
        HashMap<AnalysisResultKernelUnitBusy.FunctionUnit, Number> fuMap = new HashMap<AnalysisResultKernelUnitBusy.FunctionUnit, Number>();
        AnalysisResultKernelUnitBusy.FunctionUnit[] functionUnitArray = availFus;
        int n = availFus.length;
        int n2 = 0;
        while (n2 < n) {
            AnalysisResultKernelUnitBusy.FunctionUnit fu = functionUnitArray[n2];
            this.getFuMetricValue(fuMap, kernel, fu);
            ++n2;
        }
        if (fuMap.size() != availFus.length) {
            this.needNoData = true;
            ret = false;
        } else {
            AnalysisResultKernelUnitBusy busyResult = new AnalysisResultKernelUnitBusy(kernel);
            AnalysisResultKernelUnitBusy.FunctionUnit[] functionUnitArray2 = availFus;
            int n3 = availFus.length;
            n = 0;
            while (n < n3) {
                AnalysisResultKernelUnitBusy.FunctionUnit fu = functionUnitArray2[n];
                this.setFuBusy(busyResult, fuMap, fu);
                ++n;
            }
            results.add(busyResult);
            ret = true;
        }
        return ret;
    }

    private void setFuBusy(AnalysisResultKernelUnitBusy result, Map<AnalysisResultKernelUnitBusy.FunctionUnit, Number> fuMap, AnalysisResultKernelUnitBusy.FunctionUnit fu) {
        Number busy = fuMap.get((Object)fu);
        if (busy != null) {
            CuptiMetricValueUtilizationLevel level = CuptiMetricValueUtilizationLevel.valueOf(busy.intValue());
            if (level == null) {
                ViperExceptionHandler.logError((Object)((Object)fu) + ": unexpected utilization level " + busy);
                level = CuptiMetricValueUtilizationLevel.CUPTI_METRIC_VALUE_UTILIZATION_IDLE;
            }
            result.setFunctionUnitBusy(fu, level);
        }
    }

    private void getFuMetricValue(Map<AnalysisResultKernelUnitBusy.FunctionUnit, Number> fuMap, TimelineIntervalKernel kernel, AnalysisResultKernelUnitBusy.FunctionUnit fu) {
        Number val = this.getAggregateMetricValue(kernel, fu.getUtilizationMetricName());
        if (val != null) {
            fuMap.put(fu, val);
        }
    }

    private boolean analyzeInstructionClass(Session session, TimelineIntervalKernel kernel, List<AnalysisResult> results, boolean generateAllResults) {
        boolean ret = false;
        HashMap<AnalysisResultKernelInstructionClass.InstructionClass, Number> icMap = new HashMap<AnalysisResultKernelInstructionClass.InstructionClass, Number>();
        AnalysisResultKernelInstructionClass.InstructionClass[] instructionClassArray = AnalysisResultKernelInstructionClass.InstructionClass.values();
        int n = instructionClassArray.length;
        int n2 = 0;
        while (n2 < n) {
            AnalysisResultKernelInstructionClass.InstructionClass ic = instructionClassArray[n2];
            this.getICMetricValue(icMap, kernel, ic);
            ++n2;
        }
        Number instExecuted = this.getAggregateMetricValue(kernel, "inst_executed");
        if (instExecuted == null || icMap.size() != AnalysisResultKernelInstructionClass.InstructionClass.values().length) {
            this.needNoData = true;
            ret = false;
        } else {
            long totalExecCount = instExecuted.longValue();
            AnalysisResultKernelInstructionClass icResult = new AnalysisResultKernelInstructionClass(kernel, totalExecCount);
            AnalysisResultKernelInstructionClass.InstructionClass[] instructionClassArray2 = AnalysisResultKernelInstructionClass.InstructionClass.values();
            int n3 = instructionClassArray2.length;
            int n4 = 0;
            while (n4 < n3) {
                AnalysisResultKernelInstructionClass.InstructionClass ic = instructionClassArray2[n4];
                this.setICExecCount(icResult, icMap, ic);
                ++n4;
            }
            results.add(icResult);
            ret = true;
        }
        return ret;
    }

    private boolean analyzeFlops(Session session, TimelineIntervalKernel kernel, List<AnalysisResult> results, boolean generateAllResults) {
        boolean ret = false;
        HashMap<AnalysisResultKernelFlops.FlopsKind, Number> flopsMap = new HashMap<AnalysisResultKernelFlops.FlopsKind, Number>();
        AnalysisResultKernelFlops.FlopsKind[] flopsKindArray = AnalysisResultKernelFlops.FlopsKind.values();
        int n = flopsKindArray.length;
        int n2 = 0;
        while (n2 < n) {
            AnalysisResultKernelFlops.FlopsKind kind = flopsKindArray[n2];
            this.getFlopsMetricValue(flopsMap, kernel, kind);
            ++n2;
        }
        Number instExecuted = this.getAggregateMetricValue(kernel, "inst_executed");
        if (instExecuted == null || flopsMap.size() != AnalysisResultKernelFlops.FlopsKind.values().length) {
            this.needNoData = true;
            ret = false;
        } else {
            long totalExecCount = instExecuted.longValue();
            AnalysisResultKernelFlops flopsResult = new AnalysisResultKernelFlops(kernel, totalExecCount);
            AnalysisResultKernelFlops.FlopsKind[] flopsKindArray2 = AnalysisResultKernelFlops.FlopsKind.values();
            int n3 = flopsKindArray2.length;
            int n4 = 0;
            while (n4 < n3) {
                AnalysisResultKernelFlops.FlopsKind kind = flopsKindArray2[n4];
                this.setFlopsExecCount(flopsResult, flopsMap, kind);
                ++n4;
            }
            results.add(flopsResult);
            ret = true;
        }
        return ret;
    }

    private void setICExecCount(AnalysisResultKernelInstructionClass result, Map<AnalysisResultKernelInstructionClass.InstructionClass, Number> icMap, AnalysisResultKernelInstructionClass.InstructionClass ic) {
        Number execCountNumber = icMap.get((Object)ic);
        if (execCountNumber != null) {
            long execCount = execCountNumber.longValue();
            result.setInstructionClassExecCount(ic, execCount);
        }
    }

    private void getICMetricValue(Map<AnalysisResultKernelInstructionClass.InstructionClass, Number> icMap, TimelineIntervalKernel kernel, AnalysisResultKernelInstructionClass.InstructionClass ic) {
        Number val = this.getAggregateMetricValue(kernel, ic.getMetricName());
        if (val != null) {
            icMap.put(ic, val);
        }
    }

    private void setFlopsExecCount(AnalysisResultKernelFlops result, Map<AnalysisResultKernelFlops.FlopsKind, Number> flopsMap, AnalysisResultKernelFlops.FlopsKind kind) {
        Number execCountNumber = flopsMap.get((Object)kind);
        if (execCountNumber != null) {
            long execCount = execCountNumber.longValue();
            result.setExecCount(kind, execCount);
        }
    }

    private void getFlopsMetricValue(Map<AnalysisResultKernelFlops.FlopsKind, Number> flopsMap, TimelineIntervalKernel kernel, AnalysisResultKernelFlops.FlopsKind kind) {
        Number val = this.getAggregateMetricValue(kernel, kind.getMetricName());
        if (val != null) {
            flopsMap.put(kind, val);
        }
    }
}

