Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions O365.Security.Native.ETW.Debug.nuspec
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<package xmlns="http://schemas.microsoft.com/packaging/2011/08/nuspec.xsd">
<metadata>
<id>Microsoft.O365.Security.Native.ETW.Debug</id>
<version>4.4.7</version>
<version>4.4.8</version>
<title>Microsoft.O365.Security.Native.ETW Debug - managed wrappers for krabsetw</title>
<authors>Microsoft</authors>
<owners>Microsoft</owners>
Expand All @@ -12,8 +12,9 @@
<description>Microsoft.O365.Security.Native.ETW Debug is a managed wrapper around the krabsetw ETW library. This is the Debug build.</description>
<summary>Microsoft.O365.Security.Native.ETW Debug is a managed wrapper around the krabsetw ETW library. This is the Debug build.</summary>
<releaseNotes>
Version 4.4.7:
- Add process_start_key() to read ProcessStartKey from extended data
Version 4.4.8:
- Optimize parser property lookup with persistent name-to-index map
- Use std::wstring_view for parser property name parameters
</releaseNotes>
<copyright>© Microsoft Corporation. All rights reserved.</copyright>
<language />
Expand Down
7 changes: 4 additions & 3 deletions O365.Security.Native.ETW.nuspec
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<package xmlns="http://schemas.microsoft.com/packaging/2011/08/nuspec.xsd">
<metadata>
<id>Microsoft.O365.Security.Native.ETW</id>
<version>4.4.7</version>
<version>4.4.8</version>
<title>Microsoft.O365.Security.Native.ETW - managed wrappers for krabsetw</title>
<authors>Microsoft</authors>
<owners>Microsoft</owners>
Expand All @@ -12,8 +12,9 @@
<description>Microsoft.O365.Security.Native.ETW is a managed wrapper around the krabsetw ETW library.</description>
<summary>Microsoft.O365.Security.Native.ETW is a managed wrapper around the krabsetw ETW library.</summary>
<releaseNotes>
Version 4.4.7:
- Add process_start_key() to read ProcessStartKey from extended data
Version 4.4.8:
- Optimize parser property lookup with persistent name-to-index map
- Use std::wstring_view for parser property name parameters
</releaseNotes>
<copyright>© Microsoft Corporation. All rights reserved.</copyright>
<language />
Expand Down
118 changes: 68 additions & 50 deletions krabs/krabs/parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
#pragma once

#include <cassert>
#include <deque>
#include <utility>
#include <stdexcept>
#include <string_view>
#include <vector>

#include <functional>

Expand Down Expand Up @@ -76,7 +76,7 @@ namespace krabs {
* </remarks>
*/
template <typename T>
bool try_parse(const std::wstring &name, T &out);
bool try_parse(std::wstring_view name, T &out);

/**
* <summary>
Expand All @@ -85,23 +85,24 @@ namespace krabs {
* </summary>
*/
template <typename T>
T parse(const std::wstring &name);
T parse(std::wstring_view name);

template <typename Adapter>
auto view_of(const std::wstring &name, Adapter &adapter) -> collection_view<typename Adapter::const_iterator>;
auto view_of(std::wstring_view name, Adapter &adapter) -> collection_view<typename Adapter::const_iterator>;

private:
property_info find_property(const std::wstring &name);
void cache_property(const wchar_t *name, property_info info);
property_info find_property(std::wstring_view name);
void cache_property(ULONG index, property_info info);

private:
const schema &schema_;
const BYTE *pEndBuffer_;
BYTE *pBufferIndex_;
ULONG lastPropertyIndex_;

// Maintain a mapping from property name to blob data index.
std::deque<std::pair<const wchar_t *, property_info>> propertyCache_;
// Persistent name to index map shared across all events of the same type.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure this layer of caching even makes sense. It's just overhead.
I can't think of any reason not to just maintain a map of name->property_info from the schema.

I'm fuzzy on all the various event type schemas, but looking at this code, it seems like the property_info blobs are expected to be consistent. Just trying to think through whether we ever need any of the old fallback code if we pre-populate and cache.

const property_name_map *pPropertyNames_;
// Maintain a mapping from property index to blob data location.
std::vector<property_info> propertyCache_;
};

// Implementation
Expand All @@ -112,14 +113,16 @@ namespace krabs {
, pEndBuffer_((BYTE*)s.record_.UserData + s.record_.UserDataLength)
, pBufferIndex_((BYTE*)s.record_.UserData)
, lastPropertyIndex_(0)
, pPropertyNames_(s.pPropertyNames_)
, propertyCache_(s.pSchema_->PropertyCount)
{}

inline property_iterator parser::properties() const
{
return property_iterator(schema_);
}

inline property_info parser::find_property(const std::wstring &name)
inline property_info parser::find_property(std::wstring_view name)
{
// A schema contains a collection of properties that are keyed by name.
// These properties are stored in a blob of bytes that needs to be
Expand All @@ -129,15 +132,40 @@ namespace krabs {
// the contents within it. This is janky, so our strategy is to
// minimize this as much as possible via caching.

// The first step is to use our cache for the property to see if we've
// discovered it already.
for (auto &item : propertyCache_) {
if (name == item.first) {
return item.second;
const ULONG totalPropCount = schema_.pSchema_->PropertyCount;

// Resolve property name to index.
ULONG index = totalPropCount; // sentinel = not found
if (pPropertyNames_) {
// Fast path: use the persistent name to index map shared across
// all events of the same type.
auto it = pPropertyNames_->find(name);
if (it != pPropertyNames_->end()) {
index = it->second;
}
} else {
// Fallback: linear scan of property names in the schema.
for (ULONG i = 0; i < totalPropCount; ++i) {
auto &propInfo = schema_.pSchema_->EventPropertyInfoArray[i];
const wchar_t *pName = reinterpret_cast<const wchar_t*>(
reinterpret_cast<const BYTE*>(schema_.pSchema_) +
propInfo.NameOffset);
if (name == pName) {
index = i;
break;
}
}
}

const ULONG totalPropCount = schema_.pSchema_->PropertyCount;
if (index >= totalPropCount) {
return property_info();
}

// The first step is to use our cache for the property to see if we've
// discovered it already.
if (index < lastPropertyIndex_) {
return propertyCache_[index];
}

assert((pBufferIndex_ <= pEndBuffer_ && pBufferIndex_ >= schema_.record_.UserData) &&
"invariant: we should've already thrown for falling off the edge");
Expand All @@ -153,16 +181,13 @@ namespace krabs {
// to find it. While we're going through the blob to find it, we'll
// remember what we've seen to save time later.
//
// Question: Why don't we just populate the cache before looking up any
// properties and simplify our code (less state, etc)?
//
// Answer: Doing that introduces overhead in the case that only a
// subset of properties are needed. While this code is a bit
// more complicated, we introduce no additional performance
// overhead at runtime.
for (auto &i = lastPropertyIndex_; i < totalPropCount; ++i) {
// Note: The name-to-index map is built once per schema type (cheap
// metadata scan). But the blob walk below is lazy per-event -- we
// only walk forward to the requested index, avoiding overhead when
// only a subset of properties are needed.
while (lastPropertyIndex_ <= index) {

auto &currentPropInfo = schema_.pSchema_->EventPropertyInfoArray[i];
auto &currentPropInfo = schema_.pSchema_->EventPropertyInfoArray[lastPropertyIndex_];
const wchar_t *pName = reinterpret_cast<const wchar_t*>(
reinterpret_cast<const BYTE*>(schema_.pSchema_) +
currentPropInfo.NameOffset);
Expand All @@ -179,26 +204,19 @@ namespace krabs {
}

property_info propInfo(pBufferIndex_, currentPropInfo, propertyLength);
cache_property(pName, propInfo);
cache_property(lastPropertyIndex_, propInfo);

// advance the buffer index since we've already processed this property
pBufferIndex_ += propertyLength;

// The property was found, return it
if (name == pName) {
// advance the index since we've already processed this property
++i;
return propInfo;
}
lastPropertyIndex_++;
}

// property wasn't found, return an empty propInfo
return property_info();
return propertyCache_[index];
}

inline void parser::cache_property(const wchar_t *name, property_info propInfo)
inline void parser::cache_property(ULONG index, property_info info)
{
propertyCache_.push_front(std::make_pair(name, propInfo));
propertyCache_[index] = info;
}

inline void throw_if_property_not_found(const property_info &propInfo)
Expand Down Expand Up @@ -227,7 +245,7 @@ namespace krabs {
// ------------------------------------------------------------------------

template <typename T>
bool parser::try_parse(const std::wstring &name, T &out)
bool parser::try_parse(std::wstring_view name, T &out)
{
try {
out = parse<T>(name);
Expand All @@ -252,7 +270,7 @@ namespace krabs {
// ------------------------------------------------------------------------

template <typename T>
T parser::parse(const std::wstring &name)
T parser::parse(std::wstring_view name)
{
auto propInfo = find_property(name);
throw_if_property_not_found(propInfo);
Expand All @@ -268,7 +286,7 @@ namespace krabs {
}

template<>
inline bool parser::parse<bool>(const std::wstring& name)
inline bool parser::parse<bool>(std::wstring_view name)
{
auto propInfo = find_property(name);
throw_if_property_not_found(propInfo);
Expand All @@ -280,7 +298,7 @@ namespace krabs {
}

template <>
inline std::wstring parser::parse<std::wstring>(const std::wstring &name)
inline std::wstring parser::parse<std::wstring>(std::wstring_view name)
{
auto propInfo = find_property(name);
throw_if_property_not_found(propInfo);
Expand All @@ -294,7 +312,7 @@ namespace krabs {
}

template <>
inline std::string parser::parse<std::string>(const std::wstring &name)
inline std::string parser::parse<std::string>(std::wstring_view name)
{
auto propInfo = find_property(name);
throw_if_property_not_found(propInfo);
Expand All @@ -308,7 +326,7 @@ namespace krabs {
}

template<>
inline const counted_string* parser::parse<const counted_string*>(const std::wstring &name)
inline const counted_string* parser::parse<const counted_string*>(std::wstring_view name)
{
auto propInfo = find_property(name);
throw_if_property_not_found(propInfo);
Expand All @@ -319,7 +337,7 @@ namespace krabs {
}

template<>
inline binary parser::parse<binary>(const std::wstring &name)
inline binary parser::parse<binary>(std::wstring_view name)
{
auto propInfo = find_property(name);
throw_if_property_not_found(propInfo);
Expand All @@ -331,7 +349,7 @@ namespace krabs {

template<>
inline ip_address parser::parse<ip_address>(
const std::wstring &name)
std::wstring_view name)
{
auto propInfo = find_property(name);
throw_if_property_not_found(propInfo);
Expand All @@ -354,7 +372,7 @@ namespace krabs {

template<>
inline socket_address parser::parse<socket_address>(
const std::wstring &name)
std::wstring_view name)
{
auto propInfo = find_property(name);
throw_if_property_not_found(propInfo);
Expand All @@ -366,7 +384,7 @@ namespace krabs {

template<>
inline sid parser::parse<sid>(
const std::wstring& name)
std::wstring_view name)
{
auto propInfo = find_property(name);
throw_if_property_not_found(propInfo);
Expand Down Expand Up @@ -404,7 +422,7 @@ namespace krabs {
}

template<>
inline pointer parser::parse<pointer>(const std::wstring& name)
inline pointer parser::parse<pointer>(std::wstring_view name)
{
auto propInfo = find_property(name);
throw_if_property_not_found(propInfo);
Expand All @@ -418,7 +436,7 @@ namespace krabs {
// ------------------------------------------------------------------------

template <typename Adapter>
auto parser::view_of(const std::wstring &name, Adapter &adapter)
auto parser::view_of(std::wstring_view name, Adapter &adapter)
-> collection_view<typename Adapter::const_iterator>
{
auto propInfo = find_property(name);
Expand Down
4 changes: 4 additions & 0 deletions krabs/krabs/schema.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,8 @@ namespace krabs {
private:
const EVENT_RECORD &record_;
const TRACE_EVENT_INFO *pSchema_;
// Persistent name to index map, owned by schema_locator. May be nullptr.
const property_name_map *pPropertyNames_;

private:
friend std::wstring event_name(const schema &);
Expand Down Expand Up @@ -337,11 +339,13 @@ namespace krabs {
inline schema::schema(const EVENT_RECORD &record, const krabs::schema_locator &schema_locator)
: record_(record)
, pSchema_(schema_locator.get_event_schema(record))
, pPropertyNames_(schema_locator.get_property_names(pSchema_))
{ }

inline schema::schema(const EVENT_RECORD &record, const PTRACE_EVENT_INFO pSchema)
: record_(record)
, pSchema_(pSchema)
, pPropertyNames_(nullptr)
{ }

inline bool schema::operator==(const schema &other) const
Expand Down
Loading
Loading