Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion src/daemon/src/core/file_index_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,41 @@ file_record make_file_record(const std::filesystem::path& p,
#define PINYIN_FIELD L"pinyin"
#define PINYIN_ACRONYM_FIELD L"pinyin_acronym"
#define IS_HIDDEN_FIELD L"is_hidden"
#define ANCESTOR_PATHS_FIELD L"ancestor_paths"

/**
* 辅助函数:解析文件路径,将所有父级目录作为 Term 添加到索引中。
*
* 逻辑示例:
* 如果 full_path 是 "/home/user/project/main.cpp"
* 我们会添加以下 Term 到 ANCESTOR_PATHS_FIELD:
* 1. "/home/user/project"
* 2. "/home/user"
* 3. "/home"
* 4. "/"
*/
static void add_ancestor_paths(DocumentPtr doc, const std::string &full_path)
{
if (full_path.empty() || full_path[0] != '/')
return;

std::string current_path = full_path;
size_t last_slash = 1;

while (last_slash != 0) {
last_slash = current_path.find_last_of('/');
current_path.resize(last_slash == 0 ? 1 : last_slash);

// 添加 Field
// 注意:
// 1. 使用 Field::STORE_NO,因为我们只需要搜索它,不需要把这些父路径取出来显示给用户,节省空间。
// 2. 使用 Field::INDEX_NOT_ANALYZED,确保路径作为一个整体被索引,不要被分词器拆开。
doc->add(newLucene<Field>(ANCESTOR_PATHS_FIELD,
StringUtils::toUnicode(current_path),
Field::STORE_NO,
Field::INDEX_NOT_ANALYZED));
}
}

DocumentPtr create_document(const file_record& record) {
DocumentPtr doc = newLucene<Document>();
Expand Down Expand Up @@ -154,11 +189,13 @@ DocumentPtr create_document(const file_record& record) {
(record.is_hidden ? L"Y" : L"N"),
Field::STORE_YES, Field::INDEX_NOT_ANALYZED));

add_ancestor_paths(doc, record.full_path);

return doc;
}


#define INDEX_VERSION L"3"
#define INDEX_VERSION L"4"
#define INDEX_VERSION_FIELD L"index_version"

file_index_manager::file_index_manager(const std::string& persistent_index_dir,
Expand Down
51 changes: 28 additions & 23 deletions src/searcher/searcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,50 +53,55 @@ std::vector<std::string> Searcher::search(const std::string& path,
}

try {
QueryPtr query_ptr;
BooleanQueryPtr finalQuery = newLucene<BooleanQuery>();
String query_string = StringUtils::toLower(StringUtils::toUnicode(query.c_str()));

// 文件名搜索
if (wildcard_query) {
TermPtr term = newLucene<Term>(L"file_name_lower", query_string);
query_ptr = newLucene<WildcardQuery>(term);
QueryPtr wildcardQuery = newLucene<WildcardQuery>(term);
finalQuery->add(wildcardQuery, BooleanClause::MUST);
} else {
// 创建查询解析器
AnalyzerPtr analyzer = newLucene<ChineseAnalyzer>();
QueryParserPtr parser = newLucene<QueryParser>(LuceneVersion::LUCENE_CURRENT, L"file_name", analyzer);

// 解析查询
query_ptr = parser->parse(query_string);
QueryPtr parserQuery = parser->parse(query_string);
finalQuery->add(parserQuery, BooleanClause::MUST);
}


// 搜索路径前缀过滤
{
g_autofree gchar *canonical = g_canonicalize_filename (path.c_str(), NULL);
TermPtr term = newLucene<Term>(L"ancestor_paths", StringUtils::toUnicode(canonical));
QueryPtr prefixQuery = newLucene<TermQuery>(term);
finalQuery->add(prefixQuery, BooleanClause::MUST);
}

// 执行搜索
if (max_results == 0) {
max_results = reader->numDocs();
}
std::cout << "Doc num: " << reader->numDocs() << std::endl;
TopDocsPtr topDocs = searcher->search(query_ptr, max_results);
TopDocsPtr topDocs = searcher->search(finalQuery, max_results);
results.reserve(topDocs->totalHits);

// 处理搜索结果
std::string path_with_slash = path;
if (!string_helper::ends_with(path_with_slash, "/")) {
path_with_slash += "/";
}
for (int32_t i = 0; i < topDocs->totalHits; ++i) {
ScoreDocPtr scoreDoc = topDocs->scoreDocs[i];
DocumentPtr doc = searcher->doc(scoreDoc->doc);
std::string full_path = StringUtils::toUTF8(doc->get(L"full_path"));
if (string_helper::starts_with(full_path, path_with_slash)) {
std::stringstream ss;
ss << full_path
<< "<\\>" << StringUtils::toUTF8(doc->get(L"file_type"))
<< "<\\>" << StringUtils::toUTF8(doc->get(L"file_ext"))
<< "<\\>" << StringUtils::toUTF8(doc->get(L"modify_time_str"))
<< "<\\>" << StringUtils::toUTF8(doc->get(L"file_size_str"))
<< "<\\>" << StringUtils::toUTF8(doc->get(L"pinyin"))
<< "<\\>" << StringUtils::toUTF8(doc->get(L"pinyin_acronym"))
<< "<\\>" << StringUtils::toUTF8(doc->get(L"is_hidden"));
std::string result = ss.str();
results.push_back(result);
}
std::stringstream ss;
ss << StringUtils::toUTF8(doc->get(L"full_path"))
<< "<\\>" << StringUtils::toUTF8(doc->get(L"file_type"))
<< "<\\>" << StringUtils::toUTF8(doc->get(L"file_ext"))
<< "<\\>" << StringUtils::toUTF8(doc->get(L"modify_time_str"))
<< "<\\>" << StringUtils::toUTF8(doc->get(L"file_size_str"))
<< "<\\>" << StringUtils::toUTF8(doc->get(L"pinyin"))
<< "<\\>" << StringUtils::toUTF8(doc->get(L"pinyin_acronym"))
<< "<\\>" << StringUtils::toUTF8(doc->get(L"is_hidden"));
std::string result = ss.str();
results.push_back(result);
}
} catch (const LuceneException& e) {
std::cerr << "Search failed: " << StringUtils::toUTF8(e.getError()) << std::endl;
Expand Down