Skip to content

Commit 68d6e86

Browse files
committed
perf: Cache dbTables FuzzySet per schema
Calculation of hint message when requested relation is not present in schema cache requires creation of a FuzzySet (to use fuzzy search to find candidate tables). For schemas with many tables it is costly. This patch introduces dbTablesFuzzyIndex in SchemaCache to memoize the FuzzySet creation.
1 parent 1724e45 commit 68d6e86

File tree

6 files changed

+86
-30
lines changed

6 files changed

+86
-30
lines changed

src/PostgREST/Error.hs

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ Module : PostgREST.Error
33
Description : PostgREST error HTTP responses
44
-}
55
{-# OPTIONS_GHC -fno-warn-orphans #-}
6+
{-# LANGUAGE NamedFieldPuns #-}
67
{-# LANGUAGE RecordWildCards #-}
78

89
module PostgREST.Error
@@ -41,6 +42,7 @@ import Network.HTTP.Types.Header (Header)
4142
import PostgREST.MediaType (MediaType (..))
4243
import qualified PostgREST.MediaType as MediaType
4344

45+
import PostgREST.SchemaCache (SchemaCache (SchemaCache, dbTablesFuzzyIndex))
4446
import PostgREST.SchemaCache.Identifiers (QualifiedIdentifier (..),
4547
Schema)
4648
import PostgREST.SchemaCache.Relationship (Cardinality (..),
@@ -49,10 +51,8 @@ import PostgREST.SchemaCache.Relationship (Cardinality (..),
4951
RelationshipsMap)
5052
import PostgREST.SchemaCache.Routine (Routine (..),
5153
RoutineParam (..))
52-
import PostgREST.SchemaCache.Table (Table (..))
5354
import Protolude
5455

55-
5656
class (ErrorBody a, JSON.ToJSON a) => PgrstError a where
5757
status :: a -> HTTP.Status
5858
headers :: a -> [Header]
@@ -250,7 +250,7 @@ data SchemaCacheError
250250
| NoRelBetween Text Text (Maybe Text) Text RelationshipsMap
251251
| NoRpc Text Text [Text] MediaType Bool [QualifiedIdentifier] [Routine]
252252
| ColumnNotFound Text Text
253-
| TableNotFound Text Text [Table]
253+
| TableNotFound Text Text SchemaCache
254254
deriving Show
255255

256256
instance PgrstError SchemaCacheError where
@@ -313,7 +313,7 @@ instance ErrorBody SchemaCacheError where
313313
where
314314
onlySingleParams = isInvPost && contentType `elem` [MTTextPlain, MTTextXML, MTOctetStream]
315315
hint (AmbiguousRpc _) = Just "Try renaming the parameters or the function itself in the database so function overloading can be resolved"
316-
hint (TableNotFound schemaName relName tbls) = JSON.String <$> tableNotFoundHint schemaName relName tbls
316+
hint (TableNotFound schemaName relName schemaCache) = JSON.String <$> tableNotFoundHint schemaName relName schemaCache
317317

318318
hint _ = Nothing
319319

@@ -428,13 +428,11 @@ noRpcHint schema procName params allProcs overloadedProcs =
428428

429429
-- |
430430
-- Do a fuzzy search in all tables in the same schema and return closest result
431-
tableNotFoundHint :: Text -> Text -> [Table] -> Maybe Text
432-
tableNotFoundHint schema tblName tblList
431+
tableNotFoundHint :: Text -> Text -> SchemaCache -> Maybe Text
432+
tableNotFoundHint schema tblName SchemaCache{dbTablesFuzzyIndex}
433433
= fmap (\tbl -> "Perhaps you meant the table '" <> schema <> "." <> tbl <> "'") perhapsTable
434434
where
435-
perhapsTable = Fuzzy.getOne fuzzyTableSet tblName
436-
fuzzyTableSet = Fuzzy.fromList [ tableName tbl | tbl <- tblList, tableSchema tbl == schema]
437-
435+
perhapsTable = (`Fuzzy.getOne` tblName) =<< HM.lookup schema dbTablesFuzzyIndex
438436

439437
compressedRel :: Relationship -> JSON.Value
440438
-- An ambiguousness error cannot happen for computed relationships TODO refactor so this mempty is not needed

src/PostgREST/Plan.hs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -172,15 +172,15 @@ dbActionPlan dbAct conf apiReq sCache = case dbAct of
172172

173173
wrappedReadPlan :: QualifiedIdentifier -> AppConfig -> SchemaCache -> ApiRequest -> Bool -> Either Error CrudPlan
174174
wrappedReadPlan identifier conf sCache apiRequest@ApiRequest{iPreferences=Preferences{..},..} headersOnly = do
175-
qi <- findTable identifier (dbTables sCache)
175+
qi <- findTable identifier sCache
176176
rPlan <- readPlan qi conf sCache apiRequest
177177
(handler, mediaType) <- mapLeft ApiRequestError $ negotiateContent conf apiRequest qi iAcceptMediaType (dbMediaHandlers sCache) (hasDefaultSelect rPlan)
178178
if not (null invalidPrefs) && preferHandling == Just Strict then Left $ ApiRequestError $ InvalidPreferences invalidPrefs else Right ()
179179
return $ WrappedReadPlan rPlan SQL.Read handler mediaType headersOnly qi
180180

181181
mutateReadPlan :: Mutation -> ApiRequest -> QualifiedIdentifier -> AppConfig -> SchemaCache -> Either Error CrudPlan
182182
mutateReadPlan mutation apiRequest@ApiRequest{iPreferences=Preferences{..},..} identifier conf sCache = do
183-
qi <- findTable identifier (dbTables sCache)
183+
qi <- findTable identifier sCache
184184
rPlan <- readPlan qi conf sCache apiRequest
185185
mPlan <- mutatePlan mutation qi apiRequest sCache rPlan
186186
if not (null invalidPrefs) && preferHandling == Just Strict then Left $ ApiRequestError $ InvalidPreferences invalidPrefs else Right ()
@@ -810,10 +810,10 @@ validateAggFunctions aggFunctionsAllowed (Node rp@ReadPlan {select} forest)
810810
| otherwise = Node rp <$> traverse (validateAggFunctions aggFunctionsAllowed) forest
811811

812812
-- | Lookup table in the schema cache before creating read plan
813-
findTable :: QualifiedIdentifier -> TablesMap -> Either Error QualifiedIdentifier
814-
findTable qi@QualifiedIdentifier{..} tableMap =
815-
case HM.lookup qi tableMap of
816-
Nothing -> Left $ SchemaCacheErr $ TableNotFound qiSchema qiName (HM.elems tableMap)
813+
findTable :: QualifiedIdentifier -> SchemaCache -> Either Error QualifiedIdentifier
814+
findTable qi@QualifiedIdentifier{..} sc@SchemaCache{dbTables} =
815+
case HM.lookup qi dbTables of
816+
Nothing -> Left $ SchemaCacheErr $ TableNotFound qiSchema qiName sc
817817
Just _ -> Right qi
818818

819819
addFilters :: ResolverContext -> ApiRequest -> ReadPlanTree -> Either Error ReadPlanTree

src/PostgREST/Response.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,10 +209,10 @@ actionResponse (MaybeDbResult InspectPlan{ipHdrsOnly=headersOnly} body) _ versio
209209
in
210210
Right $ PgrstResponse HTTP.status200 (MediaType.toContentType MTOpenAPI : cLHeader ++ maybeToList (profileHeader schema negotiatedByProfile)) rsBody
211211

212-
actionResponse (NoDbResult (RelInfoPlan qi@QualifiedIdentifier{..})) _ _ _ SchemaCache{dbTables} _ _ =
212+
actionResponse (NoDbResult (RelInfoPlan qi@QualifiedIdentifier{..})) _ _ _ sc@SchemaCache{dbTables} _ _ =
213213
case HM.lookup qi dbTables of
214214
Just tbl -> respondInfo $ allowH tbl
215-
Nothing -> Left $ Error.SchemaCacheErr $ Error.TableNotFound qiSchema qiName (HM.elems dbTables)
215+
Nothing -> Left $ Error.SchemaCacheErr $ Error.TableNotFound qiSchema qiName sc
216216
where
217217
allowH table =
218218
let hasPK = not . null $ tablePKCols table in

src/PostgREST/SchemaCache.hs

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ These queries are executed once at startup or when PostgREST is reloaded.
2020

2121
module PostgREST.SchemaCache
2222
( SchemaCache(..)
23+
, TablesFuzzyIndex
2324
, querySchemaCache
2425
, showSummary
2526
, decodeFuncs
@@ -66,21 +67,28 @@ import PostgREST.SchemaCache.Table (Column (..), ColumnMap,
6667

6768
import qualified PostgREST.MediaType as MediaType
6869

69-
import Control.Arrow ((&&&))
70-
import Protolude
71-
import System.IO.Unsafe (unsafePerformIO)
70+
import Control.Arrow ((&&&))
71+
import qualified Data.FuzzySet as Fuzzy
72+
import Protolude
73+
import System.IO.Unsafe (unsafePerformIO)
74+
75+
type TablesFuzzyIndex = HM.HashMap Schema Fuzzy.FuzzySet
7276

7377
data SchemaCache = SchemaCache
74-
{ dbTables :: TablesMap
75-
, dbRelationships :: RelationshipsMap
76-
, dbRoutines :: RoutineMap
77-
, dbRepresentations :: RepresentationsMap
78-
, dbMediaHandlers :: MediaHandlerMap
79-
, dbTimezones :: TimezoneNames
80-
}
78+
{ dbTables :: TablesMap
79+
, dbRelationships :: RelationshipsMap
80+
, dbRoutines :: RoutineMap
81+
, dbRepresentations :: RepresentationsMap
82+
, dbMediaHandlers :: MediaHandlerMap
83+
, dbTimezones :: TimezoneNames
84+
-- Memoized fuzzy index of table names per schema to support approximate matching
85+
-- Since index construction can be expensive, we build it once and store in the SchemaCache
86+
-- Haskell lazy evaluation ensures it's only built on first use and memoized afterwards
87+
, dbTablesFuzzyIndex :: TablesFuzzyIndex
88+
} deriving (Show)
8189

8290
instance JSON.ToJSON SchemaCache where
83-
toJSON (SchemaCache tabs rels routs reps hdlers tzs) = JSON.object [
91+
toJSON (SchemaCache tabs rels routs reps hdlers tzs _) = JSON.object [
8492
"dbTables" .= JSON.toJSON tabs
8593
, "dbRelationships" .= JSON.toJSON rels
8694
, "dbRoutines" .= JSON.toJSON routs
@@ -90,9 +98,9 @@ instance JSON.ToJSON SchemaCache where
9098
]
9199

92100
showSummary :: SchemaCache -> Text
93-
showSummary (SchemaCache tbls rels routs reps mediaHdlrs tzs) =
101+
showSummary (SchemaCache dbTables rels routs reps mediaHdlrs tzs _) =
94102
T.intercalate ", "
95-
[ show (HM.size tbls) <> " Relations"
103+
[ show (HM.size dbTables) <> " Relations"
96104
, show (HM.size rels) <> " Relationships"
97105
, show (HM.size routs) <> " Functions"
98106
, show (HM.size reps) <> " Domain Representations"
@@ -138,6 +146,8 @@ data KeyDep
138146
-- | A SQL query that can be executed independently
139147
type SqlQuery = ByteString
140148

149+
maxDbTablesForFuzzySearch :: Int
150+
maxDbTablesForFuzzySearch = 500
141151

142152
querySchemaCache :: AppConfig -> SQL.Transaction SchemaCache
143153
querySchemaCache conf@AppConfig{..} = do
@@ -166,6 +176,11 @@ querySchemaCache conf@AppConfig{..} = do
166176
, dbRepresentations = reps
167177
, dbMediaHandlers = HM.union mHdlers initialMediaHandlers -- the custom handlers will override the initial ones
168178
, dbTimezones = tzones
179+
180+
, dbTablesFuzzyIndex =
181+
-- Only build fuzzy index for schemas with a reasonable number of tables
182+
-- Fuzzy.FuzzySet is memory heavy we just don't use it for large schemas
183+
Fuzzy.fromList <$> HM.filter ((< maxDbTablesForFuzzySearch) . length) (HM.fromListWith (<>) ((qiSchema &&& pure . qiName) <$> HM.keys tabsWViewsPks))
169184
}
170185
where
171186
schemas = toList configDbSchemas
@@ -203,6 +218,7 @@ removeInternal schemas dbStruct =
203218
, dbRepresentations = dbRepresentations dbStruct -- no need to filter, not directly exposed through the API
204219
, dbMediaHandlers = dbMediaHandlers dbStruct
205220
, dbTimezones = dbTimezones dbStruct
221+
, dbTablesFuzzyIndex = dbTablesFuzzyIndex dbStruct
206222
}
207223
where
208224
hasInternalJunction ComputedRelationship{} = False

test/io/fixtures/big_schema.sql

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11375,12 +11375,34 @@ ALTER TABLE ONLY apflora.zielber
1137511375

1137611376
ALTER TABLE apflora."user" ENABLE ROW LEVEL SECURITY;
1137711377

11378+
11379+
CREATE SCHEMA fuzzysearch;
11380+
11381+
-- Create many tables to test fuzzy string search
11382+
-- computing hints for non existing tables
11383+
DO
11384+
$$
11385+
DECLARE
11386+
r record;
11387+
BEGIN
11388+
FOR r IN
11389+
SELECT
11390+
format('CREATE TABLE fuzzysearch.unknown_table_%s ()', n) AS ct
11391+
FROM
11392+
generate_series(1, 499) n
11393+
LOOP
11394+
EXECUTE r.ct;
11395+
END LOOP;
11396+
END
11397+
$$;
11398+
1137811399
DROP ROLE IF EXISTS postgrest_test_anonymous;
1137911400
CREATE ROLE postgrest_test_anonymous;
1138011401

1138111402
GRANT postgrest_test_anonymous TO :PGUSER;
1138211403

1138311404
GRANT USAGE ON SCHEMA apflora TO postgrest_test_anonymous;
11405+
GRANT USAGE ON SCHEMA fuzzysearch TO postgrest_test_anonymous;
1138411406

1138511407
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA apflora
1138611408
TO postgrest_test_anonymous;

test/io/test_big_schema.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,23 @@ def test_should_not_fail_with_stack_overflow(defaultenv):
7070
assert response.status_code == 404
7171
data = response.json()
7272
assert data["code"] == "PGRST205"
73+
74+
75+
def test_second_request_for_non_existent_table_should_be_quick(defaultenv):
76+
"requesting a non-existent relationship should be quick after the fuzzy search index is loaded (2nd request)"
77+
78+
env = {
79+
**defaultenv,
80+
"PGRST_DB_SCHEMAS": "fuzzysearch",
81+
"PGRST_DB_POOL": "2",
82+
"PGRST_DB_ANON_ROLE": "postgrest_test_anonymous",
83+
}
84+
85+
with run(env=env, wait_max_seconds=30) as postgrest:
86+
response = postgrest.session.get("/unknown-table")
87+
assert response.status_code == 404
88+
data = response.json()
89+
assert data["code"] == "PGRST205"
90+
first_duration = response.elapsed.total_seconds()
91+
response = postgrest.session.get("/unknown-table")
92+
assert response.elapsed.total_seconds() < first_duration / 10

0 commit comments

Comments
 (0)