Skip to content

Commit f8d5cf5

Browse files
committed
perf: Cache dbTables FuzzySet per schema
Calculation of hint message when requested relation is not present in schema cache requires creation of a FuzzySet (to use fuzzy search to find candidate tables). For schemas with many tables it is costly. This patch introduces dbTablesFuzzyIndex in SchemaCache to memoize the FuzzySet creation.
1 parent 379eaec commit f8d5cf5

File tree

6 files changed

+73
-25
lines changed

6 files changed

+73
-25
lines changed

src/PostgREST/Error.hs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,8 @@ import PostgREST.SchemaCache.Relationship (Cardinality (..),
4949
RelationshipsMap)
5050
import PostgREST.SchemaCache.Routine (Routine (..),
5151
RoutineParam (..))
52-
import PostgREST.SchemaCache.Table (Table (..))
5352
import Protolude
54-
53+
import PostgREST.SchemaCache (TablesFuzzyIndex)
5554

5655
class (ErrorBody a, JSON.ToJSON a) => PgrstError a where
5756
status :: a -> HTTP.Status
@@ -250,7 +249,7 @@ data SchemaCacheError
250249
| NoRelBetween Text Text (Maybe Text) Text RelationshipsMap
251250
| NoRpc Text Text [Text] MediaType Bool [QualifiedIdentifier] [Routine]
252251
| ColumnNotFound Text Text
253-
| TableNotFound Text Text [Table]
252+
| TableNotFound Text Text TablesFuzzyIndex
254253
deriving Show
255254

256255
instance PgrstError SchemaCacheError where
@@ -428,12 +427,12 @@ noRpcHint schema procName params allProcs overloadedProcs =
428427

429428
-- |
430429
-- Do a fuzzy search in all tables in the same schema and return closest result
431-
tableNotFoundHint :: Text -> Text -> [Table] -> Maybe Text
432-
tableNotFoundHint schema tblName tblList
430+
tableNotFoundHint :: Text -> Text -> HM.HashMap Schema Fuzzy.FuzzySet -> Maybe Text
431+
tableNotFoundHint schema tblName dbTablesFuzzyIndex
433432
= fmap (\tbl -> "Perhaps you meant the table '" <> schema <> "." <> tbl <> "'") perhapsTable
434433
where
435434
perhapsTable = Fuzzy.getOne fuzzyTableSet tblName
436-
fuzzyTableSet = Fuzzy.fromList [ tableName tbl | tbl <- tblList, tableSchema tbl == schema]
435+
fuzzyTableSet = fromMaybe Fuzzy.defaultSet (HM.lookup schema dbTablesFuzzyIndex)
437436

438437

439438
compressedRel :: Relationship -> JSON.Value

src/PostgREST/Plan.hs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -172,15 +172,15 @@ dbActionPlan dbAct conf apiReq sCache = case dbAct of
172172

173173
wrappedReadPlan :: QualifiedIdentifier -> AppConfig -> SchemaCache -> ApiRequest -> Bool -> Either Error CrudPlan
174174
wrappedReadPlan identifier conf sCache apiRequest@ApiRequest{iPreferences=Preferences{..},..} headersOnly = do
175-
qi <- findTable identifier (dbTables sCache)
175+
qi <- findTable identifier sCache
176176
rPlan <- readPlan qi conf sCache apiRequest
177177
(handler, mediaType) <- mapLeft ApiRequestError $ negotiateContent conf apiRequest qi iAcceptMediaType (dbMediaHandlers sCache) (hasDefaultSelect rPlan)
178178
if not (null invalidPrefs) && preferHandling == Just Strict then Left $ ApiRequestError $ InvalidPreferences invalidPrefs else Right ()
179179
return $ WrappedReadPlan rPlan SQL.Read handler mediaType headersOnly qi
180180

181181
mutateReadPlan :: Mutation -> ApiRequest -> QualifiedIdentifier -> AppConfig -> SchemaCache -> Either Error CrudPlan
182182
mutateReadPlan mutation apiRequest@ApiRequest{iPreferences=Preferences{..},..} identifier conf sCache = do
183-
qi <- findTable identifier (dbTables sCache)
183+
qi <- findTable identifier sCache
184184
rPlan <- readPlan qi conf sCache apiRequest
185185
mPlan <- mutatePlan mutation qi apiRequest sCache rPlan
186186
if not (null invalidPrefs) && preferHandling == Just Strict then Left $ ApiRequestError $ InvalidPreferences invalidPrefs else Right ()
@@ -810,10 +810,10 @@ validateAggFunctions aggFunctionsAllowed (Node rp@ReadPlan {select} forest)
810810
| otherwise = Node rp <$> traverse (validateAggFunctions aggFunctionsAllowed) forest
811811

812812
-- | Lookup table in the schema cache before creating read plan
813-
findTable :: QualifiedIdentifier -> TablesMap -> Either Error QualifiedIdentifier
814-
findTable qi@QualifiedIdentifier{..} tableMap =
815-
case HM.lookup qi tableMap of
816-
Nothing -> Left $ SchemaCacheErr $ TableNotFound qiSchema qiName (HM.elems tableMap)
813+
findTable :: QualifiedIdentifier -> SchemaCache -> Either Error QualifiedIdentifier
814+
findTable qi@QualifiedIdentifier{..} SchemaCache{dbTables, dbTablesFuzzyIndex} =
815+
case HM.lookup qi dbTables of
816+
Nothing -> Left $ SchemaCacheErr $ TableNotFound qiSchema qiName dbTablesFuzzyIndex
817817
Just _ -> Right qi
818818

819819
addFilters :: ResolverContext -> ApiRequest -> ReadPlanTree -> Either Error ReadPlanTree

src/PostgREST/Response.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,10 +209,10 @@ actionResponse (MaybeDbResult InspectPlan{ipHdrsOnly=headersOnly} body) _ versio
209209
in
210210
Right $ PgrstResponse HTTP.status200 (MediaType.toContentType MTOpenAPI : cLHeader ++ maybeToList (profileHeader schema negotiatedByProfile)) rsBody
211211

212-
actionResponse (NoDbResult (RelInfoPlan qi@QualifiedIdentifier{..})) _ _ _ SchemaCache{dbTables} _ _ =
212+
actionResponse (NoDbResult (RelInfoPlan qi@QualifiedIdentifier{..})) _ _ _ SchemaCache{dbTables, dbTablesFuzzyIndex} _ _ =
213213
case HM.lookup qi dbTables of
214214
Just tbl -> respondInfo $ allowH tbl
215-
Nothing -> Left $ Error.SchemaCacheErr $ Error.TableNotFound qiSchema qiName (HM.elems dbTables)
215+
Nothing -> Left $ Error.SchemaCacheErr $ Error.TableNotFound qiSchema qiName dbTablesFuzzyIndex
216216
where
217217
allowH table =
218218
let hasPK = not . null $ tablePKCols table in

src/PostgREST/SchemaCache.hs

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ These queries are executed once at startup or when PostgREST is reloaded.
2020

2121
module PostgREST.SchemaCache
2222
( SchemaCache(..)
23+
, TablesFuzzyIndex
2324
, querySchemaCache
2425
, showSummary
2526
, decodeFuncs
@@ -66,21 +67,28 @@ import PostgREST.SchemaCache.Table (Column (..), ColumnMap,
6667

6768
import qualified PostgREST.MediaType as MediaType
6869

69-
import Control.Arrow ((&&&))
70-
import Protolude
71-
import System.IO.Unsafe (unsafePerformIO)
70+
import Control.Arrow ((&&&))
71+
import qualified Data.FuzzySet as Fuzzy
72+
import Protolude
73+
import System.IO.Unsafe (unsafePerformIO)
74+
75+
type TablesFuzzyIndex = HM.HashMap Schema Fuzzy.FuzzySet
7276

7377
data SchemaCache = SchemaCache
74-
{ dbTables :: TablesMap
75-
, dbRelationships :: RelationshipsMap
76-
, dbRoutines :: RoutineMap
77-
, dbRepresentations :: RepresentationsMap
78-
, dbMediaHandlers :: MediaHandlerMap
79-
, dbTimezones :: TimezoneNames
78+
{ dbTables :: TablesMap
79+
, dbRelationships :: RelationshipsMap
80+
, dbRoutines :: RoutineMap
81+
, dbRepresentations :: RepresentationsMap
82+
, dbMediaHandlers :: MediaHandlerMap
83+
, dbTimezones :: TimezoneNames
84+
-- Fuzzy index of table names per schema to support approximate matching
85+
-- Since index construction can be expensive, we build it once and store in the SchemaCache
86+
-- Haskell lazy evaluation ensures it's only built on first use and memoized afterwards
87+
, dbTablesFuzzyIndex :: TablesFuzzyIndex
8088
}
8189

8290
instance JSON.ToJSON SchemaCache where
83-
toJSON (SchemaCache tabs rels routs reps hdlers tzs) = JSON.object [
91+
toJSON (SchemaCache tabs rels routs reps hdlers tzs _) = JSON.object [
8492
"dbTables" .= JSON.toJSON tabs
8593
, "dbRelationships" .= JSON.toJSON rels
8694
, "dbRoutines" .= JSON.toJSON routs
@@ -90,7 +98,7 @@ instance JSON.ToJSON SchemaCache where
9098
]
9199

92100
showSummary :: SchemaCache -> Text
93-
showSummary (SchemaCache tbls rels routs reps mediaHdlrs tzs) =
101+
showSummary (SchemaCache tbls rels routs reps mediaHdlrs tzs _) =
94102
T.intercalate ", "
95103
[ show (HM.size tbls) <> " Relations"
96104
, show (HM.size rels) <> " Relationships"
@@ -166,6 +174,8 @@ querySchemaCache conf@AppConfig{..} = do
166174
, dbRepresentations = reps
167175
, dbMediaHandlers = HM.union mHdlers initialMediaHandlers -- the custom handlers will override the initial ones
168176
, dbTimezones = tzones
177+
178+
, dbTablesFuzzyIndex = Fuzzy.fromList <$> HM.fromListWith (<>) ((qiSchema &&& pure . qiName) <$> HM.keys tabsWViewsPks)
169179
}
170180
where
171181
schemas = toList configDbSchemas
@@ -203,6 +213,7 @@ removeInternal schemas dbStruct =
203213
, dbRepresentations = dbRepresentations dbStruct -- no need to filter, not directly exposed through the API
204214
, dbMediaHandlers = dbMediaHandlers dbStruct
205215
, dbTimezones = dbTimezones dbStruct
216+
, dbTablesFuzzyIndex = dbTablesFuzzyIndex dbStruct
206217
}
207218
where
208219
hasInternalJunction ComputedRelationship{} = False

test/io/big_schema.sql

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11375,6 +11375,24 @@ ALTER TABLE ONLY apflora.zielber
1137511375

1137611376
ALTER TABLE apflora."user" ENABLE ROW LEVEL SECURITY;
1137711377

11378+
-- Create many tables to test fuzzy string search
11379+
-- computing hints for non existing tables
11380+
DO
11381+
$$
11382+
DECLARE
11383+
r record;
11384+
BEGIN
11385+
FOR r IN
11386+
SELECT
11387+
format('CREATE TABLE apflora.unknown_table_%s ()', n) AS ct
11388+
FROM
11389+
generate_series(1, 950) n
11390+
LOOP
11391+
EXECUTE r.ct;
11392+
END LOOP;
11393+
END
11394+
$$;
11395+
1137811396
DROP ROLE IF EXISTS postgrest_test_anonymous;
1137911397
CREATE ROLE postgrest_test_anonymous;
1138011398

test/io/test_big_schema.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,23 @@ def test_should_not_fail_with_stack_overflow(defaultenv):
7070
assert response.status_code == 404
7171
data = response.json()
7272
assert data["code"] == "PGRST205"
73+
74+
75+
def test_second_request_for_non_existent_table_should_be_quick(defaultenv):
76+
"requesting a non-existent relationship should be quick after the fuzzy search index is loaded (2nd request)"
77+
78+
env = {
79+
**defaultenv,
80+
"PGRST_DB_SCHEMAS": "apflora",
81+
"PGRST_DB_POOL": "2",
82+
"PGRST_DB_ANON_ROLE": "postgrest_test_anonymous",
83+
}
84+
85+
with run(env=env, wait_max_seconds=30) as postgrest:
86+
response = postgrest.session.get("/unknown-table")
87+
assert response.status_code == 404
88+
data = response.json()
89+
assert data["code"] == "PGRST205"
90+
first_duration = response.elapsed.total_seconds()
91+
response = postgrest.session.get("/unknown-table")
92+
assert response.elapsed.total_seconds() < first_duration / 20

0 commit comments

Comments
 (0)