Hello, I am running the following query in arangodb:
LET catalogDatasets = []
LET openDatasets = ( FOR d IN datasets FILTER d.visibility == "open" RETURN d._id )
LET myDatasets = []
LET myPurchasedDatasets = []
LET searchTarget = UNIQUE( UNION( catalogDatasets, openDatasets, myDatasets, myPurchasedDatasets ) )
LET unorderedDatasetsIds = (
FOR dataset IN FULLTEXT(datasets, "word_list", @searchWords)
FILTER dataset._id IN searchTarget RETURN dataset._id
)
LET ordered = (
FOR wl IN wordLinks
FILTER wl._from IN unorderedDatasetsIds
FOR x IN words
FILTER x._id == wl._to
COLLECT did = wl._from INTO score = wl.invFq/(x.numEdges+@epsilon)
SORT score
LIMIT 0, 20
RETURN did
)
RETURN {
dids: ordered,
number_of_items: LENGTH(unorderedDatasetsIds)
}
My searchwords all uses prefixes like:
pref:banana,|pref:chocollate
Basically I want to optimize this query because it's taking around 2 seconds to return. One of my ideas was to limit the number of items to 1000 in the fulltext search, but by doing so, the datasets will be random since it will depend of the order arangodb return the queries. What kind of optimizations could I apply to make it faster?