diff --git a/.github/workflows/integrity-tests.yaml b/.github/workflows/integrity-tests.yaml new file mode 100644 index 0000000..9204ef3 --- /dev/null +++ b/.github/workflows/integrity-tests.yaml @@ -0,0 +1,49 @@ +name: Integrity Tests + +on: + pull_request: + workflow_dispatch: + +jobs: + test-pulse: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + configuration: [Debug, Release] + + env: + # Define the path to project and test project + PROJECT: src/ArrowDbCore/ArrowDbCore.csproj + TEST_PROJECT: tests/ArrowDbCore.Tests.Integrity/ArrowDbCore.Tests.Integrity.csproj + + steps: + # 1. Checkout the repository code + - name: Checkout Repository + uses: actions/checkout@v4 + + # 2. Cache NuGet packages + - name: Cache NuGet Packages + uses: actions/cache@v4 + with: + path: ~/.nuget/packages + key: ${{ runner.os }}-nuget-${{ hashFiles('**/*.csproj') }} + restore-keys: | + ${{ runner.os }}-nuget- + + # 3. Setup .NET + - name: Setup .NET + uses: actions/setup-dotnet@v4 + with: + dotnet-version: 9.0.x + + # 4. Clean + - name: Clean + run: | + dotnet clean ${{ env.PROJECT }} -c ${{ matrix.configuration }} + dotnet clean ${{ env.TEST_PROJECT }} -c ${{ matrix.configuration }} + + # 5. Run Integrity Tests + - name: Run Integrity Tests + run: dotnet test ${{ env.TEST_PROJECT }} -c ${{ matrix.configuration }} \ No newline at end of file diff --git a/.github/workflows/run-unit-tests.yaml b/.github/workflows/unit-tests.yaml similarity index 98% rename from .github/workflows/run-unit-tests.yaml rename to .github/workflows/unit-tests.yaml index 9d80a72..4e85da6 100644 --- a/.github/workflows/run-unit-tests.yaml +++ b/.github/workflows/unit-tests.yaml @@ -1,4 +1,4 @@ -name: Run Unit Tests +name: Unit Tests on: pull_request: diff --git a/ArrowDbCore.sln b/ArrowDbCore.sln index 786006a..679eb26 100644 --- a/ArrowDbCore.sln +++ b/ArrowDbCore.sln @@ -15,6 +15,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "benchmarks", "benchmarks", EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ArrowDbCore.Benchmarks", "benchmarks\ArrowDbCore.Benchmarks\ArrowDbCore.Benchmarks.csproj", "{419CA340-26F0-4FC1-83AC-D06A93AAB190}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ArrowDbCore.Tests.Integrity", "tests\ArrowDbCore.Tests.Integrity\ArrowDbCore.Tests.Integrity.csproj", "{39B1435C-B9E0-40A8-ABA9-7BB2F2CCF787}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -36,10 +38,15 @@ Global {419CA340-26F0-4FC1-83AC-D06A93AAB190}.Debug|Any CPU.Build.0 = Debug|Any CPU {419CA340-26F0-4FC1-83AC-D06A93AAB190}.Release|Any CPU.ActiveCfg = Release|Any CPU {419CA340-26F0-4FC1-83AC-D06A93AAB190}.Release|Any CPU.Build.0 = Release|Any CPU + {39B1435C-B9E0-40A8-ABA9-7BB2F2CCF787}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {39B1435C-B9E0-40A8-ABA9-7BB2F2CCF787}.Debug|Any CPU.Build.0 = Debug|Any CPU + {39B1435C-B9E0-40A8-ABA9-7BB2F2CCF787}.Release|Any CPU.ActiveCfg = Release|Any CPU + {39B1435C-B9E0-40A8-ABA9-7BB2F2CCF787}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(NestedProjects) = preSolution {23F42F88-1579-4087-ABF2-814EDBD53F59} = {822210FC-B851-4C2C-AEAE-250F17687CC3} {CDBBF9DF-5F8B-41C0-AAE7-2EC157C3BA1D} = {4ED1B77D-F425-487C-B32C-53F92A8E5A2E} {419CA340-26F0-4FC1-83AC-D06A93AAB190} = {9844EA79-5000-4276-A2C4-D7BA430F18B4} + {39B1435C-B9E0-40A8-ABA9-7BB2F2CCF787} = {4ED1B77D-F425-487C-B32C-53F92A8E5A2E} EndGlobalSection EndGlobal diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..9e655ac --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,10 @@ +# Changelog (Sorted by Date in Descending Order) + +## 1.1.0.0 + +* Fixed issue with `FileSerializer` where serialization would write over existing file data which could create invalid tokens, causing deserialization to fail. +* Added static `ArrowDb.GenerateTypedKey` method that accepts the type of the value, specific key (identifier) and a buffer, it returns a `ReadOnlySpan` key that prefixes the type to the specific key. + +## 1.0.0.0 + +* Initial Release diff --git a/README.md b/README.md index 86ce886..ed0bb73 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,9 @@
- [![NuGet](https://img.shields.io/nuget/v/ArrowDb.svg)](https://www.nuget.org/packages/ArrowDb) + [![NuGet Downloads](https://img.shields.io/nuget/dt/ArrowDb?style=flat&label=Nuget%20-%20ArrowDb)](https://www.nuget.org/packages/ArrowDb) + [![Unit Tests](https://github.com/dusrdev/ArrowDb/actions/workflows/unit-tests.yaml/badge.svg)](https://github.com/dusrdev/ArrowDb/actions/workflows/unit-tests.yaml) + [![Integrity Tests](https://github.com/dusrdev/ArrowDb/actions/workflows/integrity-tests.yaml/badge.svg)](https://github.com/dusrdev/ArrowDb/actions/workflows/integrity-tests.yaml)
@@ -151,6 +153,38 @@ do { As the example shows retries is the usual way to resolve these conflicts, but custom logic can also be used, you can simply reject the operation, and also use other loops or even `goto` statements if you are brave enough. +## `ReadOnlySpan` Key Generation + +`ArrowDb` APIs use `ReadOnlySpan` for keys to minimize unnecessary string allocations. Usually using the API with `Upsert` doesn't require specific logic as string can also be interpreted as `ReadOnlySpan`, however when checking if a key exists or removing keys, usually you don't have pre-existing reference to the key, which means you have to use rather low level APIs to efficiently generate a `ReadOnlySpan` key. + +To make this process much easier, and help with type safety, `ArrowDb` exposes a static `GenerateTypedKey` method that accepts the type of the value, specific key (identifier) and a buffer, it returns a `ReadOnlySpan` key that prefixes the type to the specific key. + +For example, if you have a `Person` class (from examples above): + +```csharp +// we need a buffer (we can rent one from a pool, or allocate it ourselves) +// in this example we will rent memory +using var memoryOwner = MemoryPool.Shared.Rent(128); +// in this example 128 chars will be sufficient, use the smallest size that fits your needs +ReadOnlySpan key = ArrowDb.GenerateTypedKey("john", buffer.Memory.Span); +// key is now ReadOnlySpan that contains "Person:john" +// we can use it for Upsert, ContainsKey, TryGetValue, Remove, etc... +_ = db.ContainsKey(key); +_ = db.TryGetValue(key, MyJsonContext.Default.Person, out var person); +// etc... +``` + +This can also be used to filter out keys for mass lookups: + +```csharp +// get all keys +var keys = db.Keys; +// get the type name +var prefix = typeof(Person).Name; +// get all keys where the value type is Person +var people = keys.Where(k => k.StartsWith(prefix)); +``` + ## Use `ArrowDb` for Runtime Caching `ArrowDb` is a great fit for runtime caching, as it is extremely lightweight, fast, type-safe and thread-safe. To support this use case, `ArrowDb` provides a ‘NoOp’ serializer that does not persist the data and keeps it in volatile memory. This is used via the factory method: diff --git a/src/ArrowDbCore/ArrowDb.Factory.cs b/src/ArrowDbCore/ArrowDb.Factory.cs index bce9036..2bb2739 100644 --- a/src/ArrowDbCore/ArrowDb.Factory.cs +++ b/src/ArrowDbCore/ArrowDb.Factory.cs @@ -31,4 +31,31 @@ public static async ValueTask CreateCustom(IDbSerializer serializer) { var data = await serializer.DeserializeAsync(); return new ArrowDb(data, serializer); } + + /// + /// Generates a typed key for the specified specific key in a very efficient manner + /// + /// The type of the value + /// The key that is specific to the value + /// The buffer to use for the generation + /// + /// A key that is formatted as ":" + /// + public static ReadOnlySpan GenerateTypedKey(ReadOnlySpan specificKey, Span buffer) { + var typeName = TypeNameCache.TypeName; + var length = typeName.Length + 1 + specificKey.Length; // type:specificKey + ArgumentOutOfRangeException.ThrowIfGreaterThan(length, buffer.Length); + typeName.CopyTo(buffer); + buffer[typeName.Length] = ':'; + specificKey.CopyTo(buffer.Slice(typeName.Length + 1)); + return buffer.Slice(0, length); + } + + // A static class that caches type names during runtime + private static class TypeNameCache { + /// + /// The name of the type of T + /// + public static readonly string TypeName = typeof(T).Name; + } } diff --git a/src/ArrowDbCore/ArrowDb.Read.cs b/src/ArrowDbCore/ArrowDb.Read.cs index ae74729..9d01550 100644 --- a/src/ArrowDbCore/ArrowDb.Read.cs +++ b/src/ArrowDbCore/ArrowDb.Read.cs @@ -29,7 +29,7 @@ public bool TryGetValue(ReadOnlySpan key, JsonTypeInfo jso value = default!; return false; } - value = JsonSerializer.Deserialize(existingReference.AsSpan(), jsonTypeInfo)!; + value = JsonSerializer.Deserialize(new ReadOnlySpan(existingReference), jsonTypeInfo)!; return !EqualityComparer.Default.Equals(value, default); } diff --git a/src/ArrowDbCore/ArrowDbCore.csproj b/src/ArrowDbCore/ArrowDbCore.csproj index d07fcd8..07c1da9 100644 --- a/src/ArrowDbCore/ArrowDbCore.csproj +++ b/src/ArrowDbCore/ArrowDbCore.csproj @@ -4,7 +4,7 @@ net9.0 enable enable - 1.0.0.0 + 1.1.0.0 true @@ -39,6 +39,9 @@ <_Parameter1>ArrowDbCore.Tests.Unit + + <_Parameter1>ArrowDbCore.Tests.Integrity + \ No newline at end of file diff --git a/src/ArrowDbCore/FileSerializer.cs b/src/ArrowDbCore/FileSerializer.cs index 1caae67..f424fca 100644 --- a/src/ArrowDbCore/FileSerializer.cs +++ b/src/ArrowDbCore/FileSerializer.cs @@ -40,7 +40,7 @@ public ValueTask> DeserializeAsync() { /// public ValueTask SerializeAsync(ConcurrentDictionary data) { - using var file = File.OpenWrite(_path); + using var file = File.Create(_path); JsonSerializer.Serialize(file, data, _jsonTypeInfo); return ValueTask.CompletedTask; } diff --git a/tests/ArrowDbCore.Tests.Integrity/ArrowDbCore.Tests.Integrity.csproj b/tests/ArrowDbCore.Tests.Integrity/ArrowDbCore.Tests.Integrity.csproj new file mode 100644 index 0000000..e211a5e --- /dev/null +++ b/tests/ArrowDbCore.Tests.Integrity/ArrowDbCore.Tests.Integrity.csproj @@ -0,0 +1,27 @@ + + + + net9.0 + enable + enable + false + + + + + + + + + + + + + + + + + + + + diff --git a/tests/ArrowDbCore.Tests.Integrity/JContext.cs b/tests/ArrowDbCore.Tests.Integrity/JContext.cs new file mode 100644 index 0000000..20a8de5 --- /dev/null +++ b/tests/ArrowDbCore.Tests.Integrity/JContext.cs @@ -0,0 +1,7 @@ +using System.Text.Json.Serialization; + +namespace ArrowDbCore.Tests.Integrity; + +[JsonSourceGenerationOptions(WriteIndented = false, NumberHandling = JsonNumberHandling.AllowReadingFromString, UseStringEnumConverter = true)] +[JsonSerializable(typeof(Person))] +public partial class JContext : JsonSerializerContext { } \ No newline at end of file diff --git a/tests/ArrowDbCore.Tests.Integrity/LargeFile.cs b/tests/ArrowDbCore.Tests.Integrity/LargeFile.cs new file mode 100644 index 0000000..32e58c9 --- /dev/null +++ b/tests/ArrowDbCore.Tests.Integrity/LargeFile.cs @@ -0,0 +1,45 @@ +using Bogus; + +namespace ArrowDbCore.Tests.Integrity; + +public class LargeFile { + [Fact] + public async Task LargeFile_Passes_OneReadWriteCycle() { + const int itemCount = 500_000; + + var faker = new Faker(); + faker.UseSeed(1337); + faker.RuleFor(p => p.Name, (f, _) => f.Name.FullName()); + faker.RuleFor(p => p.Age, (f, _) => f.Random.Int(1, 100)); + faker.RuleFor(p => p.BirthDate, (f, _) => f.Date.Past(1, DateTime.Now.AddYears(-100))); + faker.RuleFor(p => p.IsMarried, (f, _) => f.Random.Bool()); + + var buffer = new char[256]; + + var path = Sharpify.Utils.Env.PathInBaseDirectory("long-test.db"); + try { + // load the db + var db = await ArrowDb.CreateFromFile(path); + // clear + db.Clear(); + // add items + for (var j = 0; j < itemCount; j++) { + var person = faker.Generate(); + var key = ArrowDb.GenerateTypedKey(person.Name, buffer); + db.Upsert(key, person, JContext.Default.Person); + } + // save + await db.SerializeAsync(); + var actualCount = db.Count; + // try to load again + var db2 = await ArrowDb.CreateFromFile(path); + Assert.Equal(actualCount, db2.Count); + } finally { + if (File.Exists(path)) { + File.Delete(path); + } + } + + // this test fails if an exception is thrown + } +} \ No newline at end of file diff --git a/tests/ArrowDbCore.Tests.Integrity/Person.cs b/tests/ArrowDbCore.Tests.Integrity/Person.cs new file mode 100644 index 0000000..ace3c71 --- /dev/null +++ b/tests/ArrowDbCore.Tests.Integrity/Person.cs @@ -0,0 +1,8 @@ +namespace ArrowDbCore.Tests.Integrity; + +public class Person { + public string Name { get; set; } = string.Empty; + public int Age { get; set; } + public DateTime BirthDate { get; set; } + public bool IsMarried { get; set; } +} diff --git a/tests/ArrowDbCore.Tests.Integrity/ReadWriteCycles.cs b/tests/ArrowDbCore.Tests.Integrity/ReadWriteCycles.cs new file mode 100644 index 0000000..f7f5e88 --- /dev/null +++ b/tests/ArrowDbCore.Tests.Integrity/ReadWriteCycles.cs @@ -0,0 +1,46 @@ +using Bogus; + +namespace ArrowDbCore.Tests.Integrity; + +public class ReadWriteCycles +{ + [Fact] + public async Task FileIO_Passes_ReadWriteCycles() + { + const int iterations = 200; + const int itemCount = 100; + + var faker = new Faker(); + faker.UseSeed(1337); + faker.RuleFor(p => p.Name, (f, _) => f.Name.FullName()); + faker.RuleFor(p => p.Age, (f, _) => f.Random.Int(1, 100)); + faker.RuleFor(p => p.BirthDate, (f, _) => f.Date.Past(1, DateTime.Now.AddYears(-100))); + faker.RuleFor(p => p.IsMarried, (f, _) => f.Random.Bool()); + + var buffer = new char[256]; + + var path = Sharpify.Utils.Env.PathInBaseDirectory("rdc-test.db"); + try { + for (var i = 0; i < iterations; i++) { + // load the db + var db = await ArrowDb.CreateFromFile(path); + // clear + db.Clear(); + // add items + for (var j = 0; j < itemCount; j++) { + var person = faker.Generate(); + var key = ArrowDb.GenerateTypedKey(person.Name, buffer); + db.Upsert(key, person, JContext.Default.Person); + } + // save + await db.SerializeAsync(); + } + } finally { + if (File.Exists(path)) { + File.Delete(path); + } + } + + // this test fails if an exception is thrown + } +} \ No newline at end of file diff --git a/tests/ArrowDbCore.Tests.Unit/KeyGeneration.cs b/tests/ArrowDbCore.Tests.Unit/KeyGeneration.cs new file mode 100644 index 0000000..a4d22ca --- /dev/null +++ b/tests/ArrowDbCore.Tests.Unit/KeyGeneration.cs @@ -0,0 +1,31 @@ +using System.Runtime.CompilerServices; + +namespace ArrowDbCore.Tests.Unit; + +public class KeyGeneration { + [InlineArray(128)] + private struct Buffer { + private char _first; + } + + [Fact] + public void GenerateTypedKey_Primitive() { + var buffer = new Buffer(); + var key = ArrowDb.GenerateTypedKey("1", buffer); + Assert.Equal("Int32:1", key); + } + + [Fact] + public void GenerateTypedKey_String() { + var buffer = new Buffer(); + var key = ArrowDb.GenerateTypedKey("1", buffer); + Assert.Equal("String:1", key); + } + + [Fact] + public void GenerateTypedKey_Person() { + var buffer = new Buffer(); + var key = ArrowDb.GenerateTypedKey("1", buffer); + Assert.Equal("Buffer:1", key); + } +} \ No newline at end of file