Skip to content

Commit 1036ffa

Browse files
committed
Split query into whitespace chunks and search with word boundaries
1 parent 3cb7eb1 commit 1036ffa

File tree

3 files changed

+33
-5
lines changed

3 files changed

+33
-5
lines changed

pydatalab/src/pydatalab/routes/v0_1/items.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -328,11 +328,14 @@ def search_items():
328328
pipeline.append({"$match": match_obj})
329329
pipeline.append({"$sort": {"score": {"$meta": "textScore"}}})
330330
else:
331-
query = re.escape(query)
332-
LOGGER.debug("Performing regex search for %s", query)
331+
query_parts = [r"\b" + re.escape(part) for part in query.split(" ") if part]
333332
match_obj = {
334-
"$or": [{field: {"$regex": query, "$options": "i"}} for field in ITEMS_FTS_FIELDS]
333+
"$or": [
334+
{"$and": [{field: {"$regex": query, "$options": "i"}} for query in query_parts]}
335+
for field in ITEMS_FTS_FIELDS
336+
]
335337
}
338+
LOGGER.debug("Performing regex search for %s with full search %s", query_parts, match_obj)
336339
match_obj = {"$and": [get_default_permissions(user_only=False), match_obj]}
337340
if types is not None:
338341
match_obj["$and"].append({"type": {"$in": types}})

pydatalab/tests/server/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,7 @@ def example_items(user_id, admin_user_id):
494494
Sample(
495495
**{
496496
"item_id": "sample_2",
497-
"chemform": "vanadium (II) oxide",
497+
"chemform": "vanadium(II) oxide",
498498
"name": "other_sample",
499499
"date": "1970-02-01",
500500
"refcode": "grey:TEST3",
@@ -516,7 +516,7 @@ def example_items(user_id, admin_user_id):
516516
**{
517517
"item_id": "test",
518518
"chemform": "NaNiO2",
519-
"name": "NaNiO2-v",
519+
"name": "NaNiO2v",
520520
"date": "1970-02-01",
521521
"description": "magic",
522522
"refcode": "grey:TEST5",

pydatalab/tests/server/test_samples.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,23 @@ def test_item_search(client, admin_client, real_mongo_client, example_items):
251251
assert len(item_ids) == 1
252252
assert "sample_2" in item_ids
253253

254+
# Search for two words present in sample in either order
255+
response = admin_client.get("/search-items/?query='vanadium oxide'")
256+
257+
assert response.status_code == 200
258+
assert response.json["status"] == "success"
259+
item_ids = {item["item_id"] for item in response.json["items"]}
260+
assert len(item_ids) == 1
261+
assert "sample_2" in item_ids
262+
263+
response = admin_client.get("/search-items/?query='oxide vanadium'")
264+
265+
assert response.status_code == 200
266+
assert response.json["status"] == "success"
267+
item_ids = {item["item_id"] for item in response.json["items"]}
268+
assert len(item_ids) == 1
269+
assert "sample_2" in item_ids
270+
254271
# Search for single char at start of word
255272
response = admin_client.get("/search-items/?query='v'")
256273

@@ -260,6 +277,14 @@ def test_item_search(client, admin_client, real_mongo_client, example_items):
260277
assert len(item_ids) == 1
261278
assert "sample_2" in item_ids
262279

280+
# Search for word ending that should not return results
281+
response = admin_client.get("/search-items/?query='anadium'")
282+
283+
assert response.status_code == 200
284+
assert response.json["status"] == "success"
285+
item_ids = {item["item_id"] for item in response.json["items"]}
286+
assert len(item_ids) == 0
287+
263288

264289
@pytest.mark.dependency(depends=["test_delete_sample"])
265290
def test_new_sample_with_relationships(client, complicated_sample):

0 commit comments

Comments
 (0)