Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#include "snippets/op/subgraph.hpp"
#include "primitive.hpp"

#include "ocl/ocl_engine.hpp"

namespace cldnn {

/// @brief Subgraph primitive
Expand All @@ -19,7 +21,8 @@ struct subgraph : public primitive_base<subgraph> {
/// @param id This primitive id
/// @param inputs Input primitive ids
/// @param subgraph Original subgraph node
subgraph(const primitive_id& id, const std::vector<input_info>& inputs, const std::shared_ptr<ov::snippets::op::Subgraph>& subgraph)
subgraph(const primitive_id& id, const std::vector<input_info>& inputs,
const std::shared_ptr<ov::snippets::op::Subgraph>& subgraph)
: primitive_base(id, inputs), ov_subgraph(subgraph->clone()) {}

std::shared_ptr<ov::snippets::op::Subgraph> ov_subgraph;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,43 @@ inline std::istream& operator>>(std::istream& is, DumpTensors& val) {
return is;
}

/**
* @brief Enum to define possible snippets mode hints.
*/
enum class SnippetsMode : uint8_t {
ENABLE = 0, //!< Enable
IGNORE_CALLBACK = 1, //!< Ignore callback
DISABLE = 2, //!< Disable
};

inline std::ostream& operator<<(std::ostream& os, const SnippetsMode& mode) {
switch (mode) {
case SnippetsMode::ENABLE:
return os << "ENABLE";
case SnippetsMode::IGNORE_CALLBACK:
return os << "IGNORE_CALLBACK";
case SnippetsMode::DISABLE:
return os << "DISABLE";
default:
OPENVINO_THROW("Unsupported snippets mode value");
}
}

inline std::istream& operator>>(std::istream& is, SnippetsMode& mode) {
std::string str;
is >> str;
if (str == "ENABLE") {
mode = SnippetsMode::ENABLE;
} else if (str == "IGNORE_CALLBACK") {
mode = SnippetsMode::IGNORE_CALLBACK;
} else if (str == "DISABLE") {
mode = SnippetsMode::DISABLE;
} else {
OPENVINO_THROW("Unsupported snippets mode: ", str);
}
return is;
}

/**
* @brief Defines queue type that must be used for model execution
*/
Expand Down Expand Up @@ -168,6 +205,7 @@ static constexpr Property<ShapePredictor::Settings, ov::PropertyMutability::RW>
static constexpr Property<std::vector<std::string>, ov::PropertyMutability::RW> load_dump_raw_binary{"GPU_LOAD_DUMP_RAW_BINARY"};
static constexpr Property<bool, ov::PropertyMutability::RW> could_use_flashattn_v2{"GPU_COULD_USE_FLASHATTN_V2"};
static constexpr Property<uint64_t, PropertyMutability::RW> dynamic_quantization_group_size_max{"GPU_DYNAMIC_QUANTIZATION_GROUP_SIZE_MAX"};
static constexpr Property<SnippetsMode, PropertyMutability::RW> snippets_mode{"SNIPPETS_MODE"};
} // namespace ov::intel_gpu

namespace cldnn {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ OV_CONFIG_RELEASE_OPTION(ov::hint, activations_scale_factor, -1.0f, "Scalar floa
OV_CONFIG_RELEASE_OPTION(ov::internal, enable_lp_transformations, false, "Enable/Disable Low precision transformations set")
OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, config_file, "", "Path to custom layers config file")
OV_CONFIG_RELEASE_OPTION(ov::hint, model, nullptr, "Shared pointer to the ov::Model")
OV_CONFIG_RELEASE_OPTION(ov::intel_gpu, snippets_mode, ov::intel_gpu::SnippetsMode::DISABLE, "Define tokenization mode for Snippets.")

OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, shape_predictor_settings, {10, 16 * 1024, 2, 1.1f}, "Preallocation settings")
OV_CONFIG_RELEASE_INTERNAL_OPTION(ov::intel_gpu, queue_type, QueueTypes::out_of_order, "Type of the queue that must be used for model execution. May be in-order or out-of-order")
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/graph/impls/jit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ set(TARGET_NAME "openvino_intel_gpu_jit_obj")

ov_gpu_add_backend_target(
NAME ${TARGET_NAME}
LINK_LIBRARIES onednn_gpu_tgt
LINK_LIBRARIES onednn_gpu_tgt openvino::snippets
)

ov_build_target_faster(${TARGET_NAME} PCH PCH_EXCLUDE detection_output.cpp)
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright (C) 2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "jit_emitter.hpp"


namespace ov::intel_gpu::jit {

template <dnnl::impl::gpu::intel::jit::gpu_gen_t hw>
class jit_add_emitter : public jit_emitter<hw> {
public:
jit_add_emitter(dnnl::impl::gpu::intel::jit::ngen_code_generator_t<hw>* host,
ov::element::Type exec_prc = ov::element::f32) : jit_emitter<hw>(host, exec_prc) {};

static std::set<std::vector<ov::element::Type>> get_supported_precisions(
[[maybe_unused]] const std::shared_ptr<ov::Node>& node) {
return {{element::f32, element::f32}, {element::f16, element::f16}};
}
};

} // namespace ov::intel_gpu::jit
46 changes: 46 additions & 0 deletions src/plugins/intel_gpu/src/graph/impls/jit/emitters/jit_emitter.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Copyright (C) 2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "snippets/emitter.hpp"

#include "graph/impls/jit/jit_generator.hpp"

#include "openvino/core/type/element_type.hpp"
#include "openvino/core/node.hpp"

namespace ov::intel_gpu::jit {

template <dnnl::impl::gpu::intel::jit::gpu_gen_t hw>
class jit_emitter : public ov::snippets::Emitter {
public:
jit_emitter(dnnl::impl::gpu::intel::jit::ngen_code_generator_t<hw>* host,
ov::element::Type exec_prc = ov::element::f32) :
m_h(host),
m_exec_prc(exec_prc) {}

/**
* @brief Returns supported precisions.
* Precisions are ordered, the first bigger bitness precision with the same type will be selected.
* Empty collection means the emitter supports any input precisions.
*/
static std::set<std::vector<ov::element::Type>> get_supported_precisions(
const std::shared_ptr<ov::Node>& node = nullptr) {
return {};
}

protected:
void emit_code_impl(const std::vector<size_t>& in,
const std::vector<size_t>& out,
const std::vector<size_t>& pool,
const std::vector<size_t>& gpr) const override {
OPENVINO_THROW("Unimplemented");
}

dnnl::impl::gpu::intel::jit::ngen_code_generator_t<hw>* m_h;
ov::element::Type m_exec_prc;
};

} // namespace ov::intel_gpu::jit
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright (C) 2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "jit_emitter.hpp"


namespace ov::intel_gpu::jit {

template <dnnl::impl::gpu::intel::jit::gpu_gen_t hw>
class jit_nop_emitter : public jit_emitter<hw> {
public:
jit_nop_emitter(dnnl::impl::gpu::intel::jit::ngen_code_generator_t<hw>* host,
ov::element::Type exec_prc = ov::element::f32) : jit_emitter<hw>(host, exec_prc) {};

static std::set<std::vector<ov::element::Type>> get_supported_precisions(
[[maybe_unused]] const std::shared_ptr<ov::Node>& node) {
return {};
}
};

} // namespace ov::intel_gpu::jit
135 changes: 135 additions & 0 deletions src/plugins/intel_gpu/src/graph/impls/jit/gpu_generator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
// Copyright (C) 2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "gpu_generator.hpp"

#include "snippets/runtime_configurator.hpp"
#include "emitters/jit_eltwise_emitters.hpp"
#include "emitters/jit_snippets_emitters.hpp"

#include "openvino/op/add.hpp"


using namespace dnnl::impl::gpu::intel::jit;

namespace ov::intel_gpu::jit {

#define CREATE_SNIPPETS_EMITTER(e_type, ...) \
{[this](const snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr<snippets::Emitter> { \
return std::make_shared<e_type<hw>>(m_h.get(), ##__VA_ARGS__); \
}, \
[](const std::shared_ptr<ov::Node>& n) -> std::set<std::vector<element::Type>> { \
return e_type<hw>::get_supported_precisions(n); \
}}

template <ngen::HW hw>
GPUTargetMachine<hw>::GPUTargetMachine()
: TargetMachine(std::make_shared<ov::snippets::RuntimeConfigurator>(std::make_shared<ov::snippets::RuntimeConfig>())),
m_h(std::make_unique<jit_snippet_t<hw>>()) {
jitters[op::v0::Parameter::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_nop_emitter);
jitters[op::v0::Result::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_nop_emitter);
jitters[op::v1::Add::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_add_emitter);
}

template <ngen::HW hw>
std::shared_ptr<snippets::TargetMachine> GPUTargetMachine<hw>::clone() const {
const auto cloned = std::make_shared<GPUTargetMachine<hw>>();
cloned->configurator = std::make_shared<ov::snippets::RuntimeConfigurator>(*configurator);
return cloned;
}

template <ngen::HW hw>
size_t GPUTargetMachine<hw>::get_lanes() const {
assert(m_h);
return m_h->getSIMD();
}

template <ngen::HW hw>
std::vector<snippets::Reg> GPUTargetMachine<hw>::get_abi_arg_regs() const {
OPENVINO_THROW("Unimplemented!");
return {};
}

template <ngen::HW hw>
std::vector<snippets::Reg> GPUTargetMachine<hw>::get_gp_reg_pool() const {
OPENVINO_THROW("Unimplemented!");
return {};
}

template <ngen::HW hw>
std::vector<snippets::Reg> GPUTargetMachine<hw>::get_vec_reg_pool() const {
OPENVINO_THROW("Unimplemented!");
return {};
}

template <ngen::HW hw>
ngen::HW GPUTargetMachine<hw>::get_hw() const {
return hw;
}

template <ngen::HW hw>
snippets::CompiledSnippetPtr GPUTargetMachine<hw>::get_snippet() {
// OPENVINO_ASSERT(h->create_kernel() == dnnl::impl::status::success, "Failed to create jit_kernel in get_snippet()");
// const auto& result =
// std::make_shared<CompiledSnippetGPU>(std::unique_ptr<dnnl::impl::cpu::x64::jit_generator_t>(h.release()));
// // Note that we reset all the generated code, since it was copied into CompiledSnippetGPU
// h = std::make_unique<jit_snippet>();
// return result;
OPENVINO_THROW("Unimplemented!");
return nullptr;
}

const uint8_t* CompiledSnippetGPU::get_code() const {
//return h_compiled->jit_ker();
OPENVINO_THROW("Unimplemented!");
return nullptr;
}

size_t CompiledSnippetGPU::get_code_size() const {
OPENVINO_THROW("Unimplemented!");
}

bool CompiledSnippetGPU::empty() const {
return get_code_size() == 0;
}

GPUGenerator::GPUGenerator(ngen::HW hw)
: Generator(create_target_machine(hw)) {}

GPUGenerator::GPUGenerator(const std::shared_ptr<ov::snippets::TargetMachine>& target)
: Generator(target) {
OPENVINO_ASSERT(typeid(*target) == typeid(GPUTargetMachine<ngen::HW::Gen9>) ||
typeid(*target) == typeid(GPUTargetMachine<ngen::HW::Gen11>) ||
typeid(*target) == typeid(GPUTargetMachine<ngen::HW::Gen12LP>) ||
typeid(*target) == typeid(GPUTargetMachine<ngen::HW::XeHP>) ||
typeid(*target) == typeid(GPUTargetMachine<ngen::HW::XeHPG>) ||
typeid(*target) == typeid(GPUTargetMachine<ngen::HW::XeHPC>) ||
typeid(*target) == typeid(GPUTargetMachine<ngen::HW::Xe2>) ||
typeid(*target) == typeid(GPUTargetMachine<ngen::HW::Xe3>));
}

std::shared_ptr<snippets::Generator> GPUGenerator::clone() const {
return std::shared_ptr<GPUGenerator>(new GPUGenerator(target->clone()));
}

ov::snippets::RegType GPUGenerator::get_specific_op_out_reg_type(const ov::Output<ov::Node>& out) const {
return ov::snippets::RegType::undefined;
}

std::shared_ptr<ov::snippets::TargetMachine> GPUGenerator::create_target_machine(ngen::HW hw) {
switch (hw) {
case ngen::HW::Gen9: return std::make_unique<GPUTargetMachine<ngen::HW::Gen9>>();
case ngen::HW::Gen11: return std::make_unique<GPUTargetMachine<ngen::HW::Gen11>>();
case ngen::HW::Gen12LP: return std::make_unique<GPUTargetMachine<ngen::HW::Gen12LP>>();
case ngen::HW::XeHP: return std::make_unique<GPUTargetMachine<ngen::HW::XeHP>>();
case ngen::HW::XeHPG: return std::make_unique<GPUTargetMachine<ngen::HW::XeHPG>>();
case ngen::HW::XeHPC: return std::make_unique<GPUTargetMachine<ngen::HW::XeHPC>>();
case ngen::HW::Xe2: return std::make_unique<GPUTargetMachine<ngen::HW::Xe2>>();
case ngen::HW::Xe3: return std::make_unique<GPUTargetMachine<ngen::HW::Xe3>>();
default:
OPENVINO_THROW("Unknown GPU hardware!");
}
}

} // namespace ov::intel_gpu::jit
65 changes: 65 additions & 0 deletions src/plugins/intel_gpu/src/graph/impls/jit/gpu_generator.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Copyright (C) 2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <cstddef>
#include <cstdint>
#include <memory>
#include <vector>

#include "jit_generator.hpp"
#include "gpu/intel/jit/generator.hpp"
#include "openvino/core/node.hpp"
#include "openvino/core/node_output.hpp"
#include "snippets/emitter.hpp"
#include "snippets/generator.hpp"
#include "snippets/target_machine.hpp"

namespace ov::intel_gpu::jit {

class CompiledSnippetGPU : public snippets::CompiledSnippet {
public:
[[nodiscard]] const uint8_t* get_code() const override;
[[nodiscard]] size_t get_code_size() const override;
[[nodiscard]] bool empty() const override;
explicit CompiledSnippetGPU() = default;
};

template <ngen::HW hw>
class GPUTargetMachine : public ov::snippets::TargetMachine {
public:
explicit GPUTargetMachine();

[[nodiscard]] bool is_supported() const override { return true; }
[[nodiscard]] std::shared_ptr<snippets::TargetMachine> clone() const override;

[[nodiscard]] size_t get_lanes() const override;

[[nodiscard]] std::vector<snippets::Reg> get_abi_arg_regs() const override;
[[nodiscard]] std::vector<snippets::Reg> get_gp_reg_pool() const override;
[[nodiscard]] std::vector<snippets::Reg> get_vec_reg_pool() const override;

[[nodiscard]] dnnl::impl::gpu::intel::jit::gpu_gen_t get_hw() const;

snippets::CompiledSnippetPtr get_snippet() override;

private:
std::unique_ptr<jit_snippet_t<hw>> m_h;
};

class GPUGenerator : public ov::snippets::Generator {
public:
GPUGenerator(dnnl::impl::gpu::intel::jit::gpu_gen_t hw);
std::shared_ptr<Generator> clone() const override;

ov::snippets::RegType get_specific_op_out_reg_type(const ov::Output<ov::Node>& out) const override;

private:
GPUGenerator(const std::shared_ptr<ov::snippets::TargetMachine>& target);

static std::shared_ptr<ov::snippets::TargetMachine> create_target_machine(ngen::HW hw);
};

} // namespace ov::intel_gpu::jit
Loading
Loading