/* Autogenerated by mlir-tblgen; don't manually edit */

#ifdef GEN_PASS_DECL
// Generate declarations for all passes.
#define GEN_PASS_DECL_CONVERTPROTONTOPROTONGPU
#undef GEN_PASS_DECL
#endif // GEN_PASS_DECL

//===----------------------------------------------------------------------===//
// ConvertProtonToProtonGPU
//===----------------------------------------------------------------------===//
#ifdef GEN_PASS_DECL_CONVERTPROTONTOPROTONGPU
struct ConvertProtonToProtonGPUOptions {
  MetricType metricType = MetricType::CYCLE;
  gpu::Granularity granularity = gpu::Granularity::WARP;
  SamplingStrategy samplingStrategy = SamplingStrategy::NONE;
  std::string samplingOptions = "";
  gpu::BufferStrategy bufferStrategy = gpu::BufferStrategy::CIRCULAR;
  gpu::BufferType bufferType = gpu::BufferType::SHARED;
  int32_t bufferSize = 0;
  int32_t maxSharedMemSize = 32768;
  int64_t profileScratchSize = 32768;
  int32_t profileScratchAlignment = 128;
  bool clockExtension = false;
};
#undef GEN_PASS_DECL_CONVERTPROTONTOPROTONGPU
#endif // GEN_PASS_DECL_CONVERTPROTONTOPROTONGPU
#ifdef GEN_PASS_DEF_CONVERTPROTONTOPROTONGPU
namespace impl {

template <typename DerivedT>
class ConvertProtonToProtonGPUBase : public ::mlir::OperationPass<mlir::ModuleOp> {
public:
  using Base = ConvertProtonToProtonGPUBase;

  ConvertProtonToProtonGPUBase() : ::mlir::OperationPass<mlir::ModuleOp>(::mlir::TypeID::get<DerivedT>()) {}
  ConvertProtonToProtonGPUBase(const ConvertProtonToProtonGPUBase &other) : ::mlir::OperationPass<mlir::ModuleOp>(other) {}
  ConvertProtonToProtonGPUBase& operator=(const ConvertProtonToProtonGPUBase &) = delete;
  ConvertProtonToProtonGPUBase(ConvertProtonToProtonGPUBase &&) = delete;
  ConvertProtonToProtonGPUBase& operator=(ConvertProtonToProtonGPUBase &&) = delete;
  ~ConvertProtonToProtonGPUBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("convert-proton-to-protongpu");
  }
  ::llvm::StringRef getArgument() const override { return "convert-proton-to-protongpu"; }

  ::llvm::StringRef getDescription() const override { return "Lowering pass of ProtonIR to ProtonGPU IR"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("ConvertProtonToProtonGPU");
  }
  ::llvm::StringRef getName() const override { return "ConvertProtonToProtonGPU"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Return the dialect that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    registry.insert<ProtonDialect>();
    registry.insert<gpu::ProtonGPUDialect>();
    registry.insert<mlir::triton::gpu::TritonGPUDialect>();
    registry.insert<mlir::triton::TritonDialect>();
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ConvertProtonToProtonGPUBase<DerivedT>)

  ConvertProtonToProtonGPUBase(ConvertProtonToProtonGPUOptions options) : ConvertProtonToProtonGPUBase() {
    metricType = std::move(options.metricType);
    granularity = std::move(options.granularity);
    samplingStrategy = std::move(options.samplingStrategy);
    samplingOptions = std::move(options.samplingOptions);
    bufferStrategy = std::move(options.bufferStrategy);
    bufferType = std::move(options.bufferType);
    bufferSize = std::move(options.bufferSize);
    maxSharedMemSize = std::move(options.maxSharedMemSize);
    profileScratchSize = std::move(options.profileScratchSize);
    profileScratchAlignment = std::move(options.profileScratchAlignment);
    clockExtension = std::move(options.clockExtension);
  }
protected:
  ::mlir::Pass::Option<MetricType> metricType{*this, "metric-type", ::llvm::cl::desc("The performance counter metric type we are profiling"), ::llvm::cl::init(MetricType::CYCLE), ::llvm::cl::values(
                    clEnumValN(MetricType::CYCLE, "cycle", "Cycle")
              )};
  ::mlir::Pass::Option<gpu::Granularity> granularity{*this, "granularity", ::llvm::cl::desc("Profiling granularity: warp, warp_group, or cta"), ::llvm::cl::init(gpu::Granularity::WARP), ::llvm::cl::values(
                    clEnumValN(gpu::Granularity::THREAD, "thread", "Thread"),
                    clEnumValN(gpu::Granularity::WARP, "warp", "Warp"),
                    clEnumValN(gpu::Granularity::WARP_2, "warp-2", "2 Warps"),
                    clEnumValN(gpu::Granularity::WARP_4, "warp-4", "4 Warps"),
                    clEnumValN(gpu::Granularity::WARP_8, "warp-8", "8 Warps"),
                    clEnumValN(gpu::Granularity::CTA, "cta", "CTA"),
                    clEnumValN(gpu::Granularity::WARP_GROUP, "warp-group", "Warp Group"),
                    clEnumValN(gpu::Granularity::WARP_GROUP_2, "warp-group-2", "2 Warp Groups"),
                    clEnumValN(gpu::Granularity::WARP_GROUP_4, "warp-group-4", "4 Warp Groups"),
                    clEnumValN(gpu::Granularity::WARP_GROUP_8, "warp-group-8", "8 Warp Groups")
              )};
  ::mlir::Pass::Option<SamplingStrategy> samplingStrategy{*this, "sampling-strategy", ::llvm::cl::desc("Profiling sampling strategy"), ::llvm::cl::init(SamplingStrategy::NONE), ::llvm::cl::values(
                    clEnumValN(SamplingStrategy::NONE, "none", "No Sampling"),
                    clEnumValN(SamplingStrategy::SELECTIVE, "selective", "Selective Sampling")
              )};
  ::mlir::Pass::Option<std::string> samplingOptions{*this, "sampling-options", ::llvm::cl::desc("Profiling sampling options"), ::llvm::cl::init("")};
  ::mlir::Pass::Option<gpu::BufferStrategy> bufferStrategy{*this, "buffer-strategy", ::llvm::cl::desc("Profiler buffer recording strategy (circular or flush)"), ::llvm::cl::init(gpu::BufferStrategy::CIRCULAR), ::llvm::cl::values(
                    clEnumValN(gpu::BufferStrategy::CIRCULAR, "circular", "Circular Buffer"),
                    clEnumValN(gpu::BufferStrategy::FLUSH, "flush", "Flush Buffer")
              )};
  ::mlir::Pass::Option<gpu::BufferType> bufferType{*this, "buffer-type", ::llvm::cl::desc("Internal buffer type (SHARED, GLOBAL) that stores the profiling data"), ::llvm::cl::init(gpu::BufferType::SHARED), ::llvm::cl::values(
                    clEnumValN(gpu::BufferType::SHARED, "shared", "Shared Memory"),
                    clEnumValN(gpu::BufferType::GLOBAL, "global", "Global Memory")
              )};
  ::mlir::Pass::Option<int32_t> bufferSize{*this, "buffer-size", ::llvm::cl::desc("Internal buffer byte size that stores the profiling data. 0 means auto-size based on the device's `maxSharedMemSize`"), ::llvm::cl::init(0)};
  ::mlir::Pass::Option<int32_t> maxSharedMemSize{*this, "max-shared-mem-size", ::llvm::cl::desc("Maximum available shared memory size per CTA"), ::llvm::cl::init(32768)};
  ::mlir::Pass::Option<int64_t> profileScratchSize{*this, "scratch-mem-size", ::llvm::cl::desc("Profiler global scratch memory size per CTA"), ::llvm::cl::init(32768)};
  ::mlir::Pass::Option<int32_t> profileScratchAlignment{*this, "scratch-mem-alignment", ::llvm::cl::desc("Profiler global scratch memory alignment"), ::llvm::cl::init(128)};
  ::mlir::Pass::Option<bool> clockExtension{*this, "clock-extension", ::llvm::cl::desc("Use long clock if true, otherwise use 32-bit clock"), ::llvm::cl::init(false)};
private:
};
} // namespace impl
#undef GEN_PASS_DEF_CONVERTPROTONTOPROTONGPU
#endif // GEN_PASS_DEF_CONVERTPROTONTOPROTONGPU
#ifdef GEN_PASS_REGISTRATION

//===----------------------------------------------------------------------===//
// ConvertProtonToProtonGPU Registration
//===----------------------------------------------------------------------===//

inline void registerConvertProtonToProtonGPU() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createConvertProtonToProtonGPUPass();
  });
}

// Old registration code, kept for temporary backwards compatibility.
inline void registerConvertProtonToProtonGPUPass() {
  ::mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> {
    return createConvertProtonToProtonGPUPass();
  });
}

//===----------------------------------------------------------------------===//
// ProtonToProtonGPU Registration
//===----------------------------------------------------------------------===//

inline void registerProtonToProtonGPUPasses() {
  registerConvertProtonToProtonGPU();
}
#undef GEN_PASS_REGISTRATION
#endif // GEN_PASS_REGISTRATION
// Deprecated. Please use the new per-pass macros.
#ifdef GEN_PASS_CLASSES

template <typename DerivedT>
class ConvertProtonToProtonGPUBase : public ::mlir::OperationPass<mlir::ModuleOp> {
public:
  using Base = ConvertProtonToProtonGPUBase;

  ConvertProtonToProtonGPUBase() : ::mlir::OperationPass<mlir::ModuleOp>(::mlir::TypeID::get<DerivedT>()) {}
  ConvertProtonToProtonGPUBase(const ConvertProtonToProtonGPUBase &other) : ::mlir::OperationPass<mlir::ModuleOp>(other) {}
  ConvertProtonToProtonGPUBase& operator=(const ConvertProtonToProtonGPUBase &) = delete;
  ConvertProtonToProtonGPUBase(ConvertProtonToProtonGPUBase &&) = delete;
  ConvertProtonToProtonGPUBase& operator=(ConvertProtonToProtonGPUBase &&) = delete;
  ~ConvertProtonToProtonGPUBase() = default;

  /// Returns the command-line argument attached to this pass.
  static constexpr ::llvm::StringLiteral getArgumentName() {
    return ::llvm::StringLiteral("convert-proton-to-protongpu");
  }
  ::llvm::StringRef getArgument() const override { return "convert-proton-to-protongpu"; }

  ::llvm::StringRef getDescription() const override { return "Lowering pass of ProtonIR to ProtonGPU IR"; }

  /// Returns the derived pass name.
  static constexpr ::llvm::StringLiteral getPassName() {
    return ::llvm::StringLiteral("ConvertProtonToProtonGPU");
  }
  ::llvm::StringRef getName() const override { return "ConvertProtonToProtonGPU"; }

  /// Support isa/dyn_cast functionality for the derived pass class.
  static bool classof(const ::mlir::Pass *pass) {
    return pass->getTypeID() == ::mlir::TypeID::get<DerivedT>();
  }

  /// A clone method to create a copy of this pass.
  std::unique_ptr<::mlir::Pass> clonePass() const override {
    return std::make_unique<DerivedT>(*static_cast<const DerivedT *>(this));
  }

  /// Register the dialects that must be loaded in the context before this pass.
  void getDependentDialects(::mlir::DialectRegistry &registry) const override {
    registry.insert<ProtonDialect>();
    registry.insert<gpu::ProtonGPUDialect>();
    registry.insert<mlir::triton::gpu::TritonGPUDialect>();
    registry.insert<mlir::triton::TritonDialect>();
  }

  /// Explicitly declare the TypeID for this class. We declare an explicit private
  /// instantiation because Pass classes should only be visible by the current
  /// library.
  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ConvertProtonToProtonGPUBase<DerivedT>)

protected:
  ::mlir::Pass::Option<MetricType> metricType{*this, "metric-type", ::llvm::cl::desc("The performance counter metric type we are profiling"), ::llvm::cl::init(MetricType::CYCLE), ::llvm::cl::values(
                    clEnumValN(MetricType::CYCLE, "cycle", "Cycle")
              )};
  ::mlir::Pass::Option<gpu::Granularity> granularity{*this, "granularity", ::llvm::cl::desc("Profiling granularity: warp, warp_group, or cta"), ::llvm::cl::init(gpu::Granularity::WARP), ::llvm::cl::values(
                    clEnumValN(gpu::Granularity::THREAD, "thread", "Thread"),
                    clEnumValN(gpu::Granularity::WARP, "warp", "Warp"),
                    clEnumValN(gpu::Granularity::WARP_2, "warp-2", "2 Warps"),
                    clEnumValN(gpu::Granularity::WARP_4, "warp-4", "4 Warps"),
                    clEnumValN(gpu::Granularity::WARP_8, "warp-8", "8 Warps"),
                    clEnumValN(gpu::Granularity::CTA, "cta", "CTA"),
                    clEnumValN(gpu::Granularity::WARP_GROUP, "warp-group", "Warp Group"),
                    clEnumValN(gpu::Granularity::WARP_GROUP_2, "warp-group-2", "2 Warp Groups"),
                    clEnumValN(gpu::Granularity::WARP_GROUP_4, "warp-group-4", "4 Warp Groups"),
                    clEnumValN(gpu::Granularity::WARP_GROUP_8, "warp-group-8", "8 Warp Groups")
              )};
  ::mlir::Pass::Option<SamplingStrategy> samplingStrategy{*this, "sampling-strategy", ::llvm::cl::desc("Profiling sampling strategy"), ::llvm::cl::init(SamplingStrategy::NONE), ::llvm::cl::values(
                    clEnumValN(SamplingStrategy::NONE, "none", "No Sampling"),
                    clEnumValN(SamplingStrategy::SELECTIVE, "selective", "Selective Sampling")
              )};
  ::mlir::Pass::Option<std::string> samplingOptions{*this, "sampling-options", ::llvm::cl::desc("Profiling sampling options"), ::llvm::cl::init("")};
  ::mlir::Pass::Option<gpu::BufferStrategy> bufferStrategy{*this, "buffer-strategy", ::llvm::cl::desc("Profiler buffer recording strategy (circular or flush)"), ::llvm::cl::init(gpu::BufferStrategy::CIRCULAR), ::llvm::cl::values(
                    clEnumValN(gpu::BufferStrategy::CIRCULAR, "circular", "Circular Buffer"),
                    clEnumValN(gpu::BufferStrategy::FLUSH, "flush", "Flush Buffer")
              )};
  ::mlir::Pass::Option<gpu::BufferType> bufferType{*this, "buffer-type", ::llvm::cl::desc("Internal buffer type (SHARED, GLOBAL) that stores the profiling data"), ::llvm::cl::init(gpu::BufferType::SHARED), ::llvm::cl::values(
                    clEnumValN(gpu::BufferType::SHARED, "shared", "Shared Memory"),
                    clEnumValN(gpu::BufferType::GLOBAL, "global", "Global Memory")
              )};
  ::mlir::Pass::Option<int32_t> bufferSize{*this, "buffer-size", ::llvm::cl::desc("Internal buffer byte size that stores the profiling data. 0 means auto-size based on the device's `maxSharedMemSize`"), ::llvm::cl::init(0)};
  ::mlir::Pass::Option<int32_t> maxSharedMemSize{*this, "max-shared-mem-size", ::llvm::cl::desc("Maximum available shared memory size per CTA"), ::llvm::cl::init(32768)};
  ::mlir::Pass::Option<int64_t> profileScratchSize{*this, "scratch-mem-size", ::llvm::cl::desc("Profiler global scratch memory size per CTA"), ::llvm::cl::init(32768)};
  ::mlir::Pass::Option<int32_t> profileScratchAlignment{*this, "scratch-mem-alignment", ::llvm::cl::desc("Profiler global scratch memory alignment"), ::llvm::cl::init(128)};
  ::mlir::Pass::Option<bool> clockExtension{*this, "clock-extension", ::llvm::cl::desc("Use long clock if true, otherwise use 32-bit clock"), ::llvm::cl::init(false)};
};
#undef GEN_PASS_CLASSES
#endif // GEN_PASS_CLASSES
