From 0370616b949b57440aa8369d5e5ca0246ae94983 Mon Sep 17 00:00:00 2001 From: agoujot Date: Sat, 9 Aug 2025 20:08:26 +0200 Subject: [PATCH 1/8] check replicas are there, when meta_p says so --- src/Model/Project.php | 2 +- src/Repository/ProjectRepository.php | 30 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/Model/Project.php b/src/Model/Project.php index 6fe86081e..b40496ffa 100644 --- a/src/Model/Project.php +++ b/src/Model/Project.php @@ -116,7 +116,7 @@ protected function getMetadata(): ?array */ public function exists(): bool { - return !empty($this->getDomain()); + return !empty($this->getDomain()) && $this->repository->hasPageTable($this->getBasicInfo()['dbName']); } /** diff --git a/src/Repository/ProjectRepository.php b/src/Repository/ProjectRepository.php index 48ddfe5b4..0bf9f80dd 100644 --- a/src/Repository/ProjectRepository.php +++ b/src/Repository/ProjectRepository.php @@ -241,6 +241,36 @@ public function getOne(string $project): ?array return $this->setCache($cacheKey, $basicInfo, 'PT1H'); } + /** + * Can we find this project's page table? + * If not, this project has not been replicated yet, + * despite being listed in meta_p.wiki. See T322466. + * The implementation is a bit dirty, but we do not + * have the permissions for anything better. + * @param string $project Database name, without _p. + * @return bool + */ + public function hasPageTable(string $project): bool + { + $cacheKey = $this->getCacheKey($project, "has_page"); + if ($this->cache->hasItem($cacheKey) && false) { + return $this->cache->getItem($cacheKey)->get(); + } + + $pageTable = $this->getTableName($project, "page"); + $sql = "SELECT 1 FROM $pageTable LIMIT 1"; + try { + $this->executeProjectsQuery($project, $sql, [ + 'project' => $project + ])->fetchAssociative(); + $result = true; + } catch(\Exception $e) { + $result = false; + } + // Cache for 1h and return + return $this->setCache($cacheKey, $result, 'PT1H'); // feels long to me, but as long as getOne + } + /** * Get metadata about a project, including the 'dbName', 'url' and 'lang' * From a6e31fea6a23ce5e18b50662547c0b477a7e7ab9 Mon Sep 17 00:00:00 2001 From: agoujot Date: Sun, 10 Aug 2025 01:36:00 +0200 Subject: [PATCH 2/8] lints --- src/Repository/ProjectRepository.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Repository/ProjectRepository.php b/src/Repository/ProjectRepository.php index 0bf9f80dd..a544a5cdf 100644 --- a/src/Repository/ProjectRepository.php +++ b/src/Repository/ProjectRepository.php @@ -261,10 +261,10 @@ public function hasPageTable(string $project): bool $sql = "SELECT 1 FROM $pageTable LIMIT 1"; try { $this->executeProjectsQuery($project, $sql, [ - 'project' => $project + 'project' => $project, ])->fetchAssociative(); $result = true; - } catch(\Exception $e) { + } catch (\Exception $e) { $result = false; } // Cache for 1h and return From 6d8087b49b7b45b060259e7fdb1538fc142d7426 Mon Sep 17 00:00:00 2001 From: agoujot Date: Sun, 10 Aug 2025 02:25:58 +0200 Subject: [PATCH 3/8] fix tests --- src/Repository/ProjectRepository.php | 8 ++++++-- tests/Model/GlobalContribsTest.php | 2 ++ tests/TestAdapter.php | 2 ++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Repository/ProjectRepository.php b/src/Repository/ProjectRepository.php index a544a5cdf..5ec26b989 100644 --- a/src/Repository/ProjectRepository.php +++ b/src/Repository/ProjectRepository.php @@ -7,6 +7,7 @@ use App\Model\PageAssessments; use App\Model\Project; use Doctrine\DBAL\Connection; +use Doctrine\DBAL\Driver\PDO\Exception as PDOException; use Doctrine\Persistence\ManagerRegistry; use Exception; use GuzzleHttp\Client; @@ -264,8 +265,11 @@ public function hasPageTable(string $project): bool 'project' => $project, ])->fetchAssociative(); $result = true; - } catch (\Exception $e) { - $result = false; + } catch (PDOException $e) { + $code = (int)$e->getCode(); + $result = 42000 !== $code; // Syntax error/access violation; including specifically missing table + } catch (\Exception $e) { // Some other exception--AGF. Notably prevents crash of many tests + $result = true; } // Cache for 1h and return return $this->setCache($cacheKey, $result, 'PT1H'); // feels long to me, but as long as getOne diff --git a/tests/Model/GlobalContribsTest.php b/tests/Model/GlobalContribsTest.php index 635eb2196..eb67888fc 100644 --- a/tests/Model/GlobalContribsTest.php +++ b/tests/Model/GlobalContribsTest.php @@ -90,6 +90,8 @@ public function testGlobalEdits(): void 'dbName' => 'wiki1', 'url' => 'https://wiki1.example.org', ]); + $wiki1Repo->method('hasPageTable') + ->willReturn(true); $wiki1 = new Project('wiki1'); $wiki1->setRepository($wiki1Repo); diff --git a/tests/TestAdapter.php b/tests/TestAdapter.php index 18c586178..792a6d57b 100644 --- a/tests/TestAdapter.php +++ b/tests/TestAdapter.php @@ -32,6 +32,8 @@ public function getProjectRepo(): MockObject 'dbName' => 'test_wiki', 'lang' => 'en', ]); + $repo->method('hasPageTable') + ->willReturn(true); return $repo; } From 272b43187cef02843281231c957046afc2369873 Mon Sep 17 00:00:00 2001 From: alien4444 Date: Tue, 21 Oct 2025 00:15:28 +0200 Subject: [PATCH 4/8] cleaner method --- src/Model/Project.php | 2 +- src/Repository/ProjectRepository.php | 35 +++++++++++----------------- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/src/Model/Project.php b/src/Model/Project.php index b40496ffa..f3aed71a7 100644 --- a/src/Model/Project.php +++ b/src/Model/Project.php @@ -116,7 +116,7 @@ protected function getMetadata(): ?array */ public function exists(): bool { - return !empty($this->getDomain()) && $this->repository->hasPageTable($this->getBasicInfo()['dbName']); + return !empty($this->getDomain()) && $this->repository->hasTables($this->getBasicInfo()['dbName']); } /** diff --git a/src/Repository/ProjectRepository.php b/src/Repository/ProjectRepository.php index 5ec26b989..0ea68c63f 100644 --- a/src/Repository/ProjectRepository.php +++ b/src/Repository/ProjectRepository.php @@ -243,34 +243,27 @@ public function getOne(string $project): ?array } /** - * Can we find this project's page table? - * If not, this project has not been replicated yet, + * Is this project actually replicated? Sometimes projets aren't, * despite being listed in meta_p.wiki. See T322466. - * The implementation is a bit dirty, but we do not - * have the permissions for anything better. * @param string $project Database name, without _p. * @return bool */ - public function hasPageTable(string $project): bool + public function hasTables(string $project): bool { - $cacheKey = $this->getCacheKey($project, "has_page"); - if ($this->cache->hasItem($cacheKey) && false) { + $cacheKey = $this->getCacheKey($project, "has_tables"); + if ($this->cache->hasItem($cacheKey)) { return $this->cache->getItem($cacheKey)->get(); } - - $pageTable = $this->getTableName($project, "page"); - $sql = "SELECT 1 FROM $pageTable LIMIT 1"; - try { - $this->executeProjectsQuery($project, $sql, [ - 'project' => $project, - ])->fetchAssociative(); - $result = true; - } catch (PDOException $e) { - $code = (int)$e->getCode(); - $result = 42000 !== $code; // Syntax error/access violation; including specifically missing table - } catch (\Exception $e) { // Some other exception--AGF. Notably prevents crash of many tests - $result = true; - } + $dbList = $this->getDbList(); + $dbSlice = $dbList[$project]; + $sql = "SELECT 1 + FROM information_schema.tables + WHERE table_schema = :project + LIMIT 1"; + $queryResult = $this->executeProjectsQuery($dbSlice, $sql, [ + 'project' => $project . "_p", + ])->fetchAssociative(); + $result = (count($queryResult) == 1); // Cache for 1h and return return $this->setCache($cacheKey, $result, 'PT1H'); // feels long to me, but as long as getOne } From cb801b61dc2ca8e78071592e4f4b3922d1994d56 Mon Sep 17 00:00:00 2001 From: alien4444 Date: Tue, 21 Oct 2025 00:39:23 +0200 Subject: [PATCH 5/8] some fixes; tests currently broken due to UA policy --- config/packages/eight_points_guzzle.yaml | 1 + src/Repository/ProjectRepository.php | 3 +-- tests/Model/GlobalContribsTest.php | 2 +- tests/TestAdapter.php | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config/packages/eight_points_guzzle.yaml b/config/packages/eight_points_guzzle.yaml index 5b1d976e6..4615c8931 100644 --- a/config/packages/eight_points_guzzle.yaml +++ b/config/packages/eight_points_guzzle.yaml @@ -6,4 +6,5 @@ eight_points_guzzle: options: headers: Accept: "application/json" + User-Agent: 'XTools/%env(APP_VERSION)% (https://xtools.wmcloud.org %env(MAILER_TO_EMAIL)%) GuzzleHttp' timeout: 30 diff --git a/src/Repository/ProjectRepository.php b/src/Repository/ProjectRepository.php index 0ea68c63f..ecd339053 100644 --- a/src/Repository/ProjectRepository.php +++ b/src/Repository/ProjectRepository.php @@ -7,7 +7,6 @@ use App\Model\PageAssessments; use App\Model\Project; use Doctrine\DBAL\Connection; -use Doctrine\DBAL\Driver\PDO\Exception as PDOException; use Doctrine\Persistence\ManagerRegistry; use Exception; use GuzzleHttp\Client; @@ -263,7 +262,7 @@ public function hasTables(string $project): bool $queryResult = $this->executeProjectsQuery($dbSlice, $sql, [ 'project' => $project . "_p", ])->fetchAssociative(); - $result = (count($queryResult) == 1); + $result = (1 == count($queryResult)); // Cache for 1h and return return $this->setCache($cacheKey, $result, 'PT1H'); // feels long to me, but as long as getOne } diff --git a/tests/Model/GlobalContribsTest.php b/tests/Model/GlobalContribsTest.php index eb67888fc..398595906 100644 --- a/tests/Model/GlobalContribsTest.php +++ b/tests/Model/GlobalContribsTest.php @@ -90,7 +90,7 @@ public function testGlobalEdits(): void 'dbName' => 'wiki1', 'url' => 'https://wiki1.example.org', ]); - $wiki1Repo->method('hasPageTable') + $wiki1Repo->method('hasTables') ->willReturn(true); $wiki1 = new Project('wiki1'); $wiki1->setRepository($wiki1Repo); diff --git a/tests/TestAdapter.php b/tests/TestAdapter.php index 792a6d57b..55ab3974b 100644 --- a/tests/TestAdapter.php +++ b/tests/TestAdapter.php @@ -32,7 +32,7 @@ public function getProjectRepo(): MockObject 'dbName' => 'test_wiki', 'lang' => 'en', ]); - $repo->method('hasPageTable') + $repo->method('hasTables') ->willReturn(true); return $repo; } From a3c7a56b55fc7c39efaa89bba1209b0230405743 Mon Sep 17 00:00:00 2001 From: alien4444 Date: Tue, 21 Oct 2025 12:49:18 +0200 Subject: [PATCH 6/8] batching of replication checks in gc and some cleanup --- src/Model/Project.php | 2 +- src/Repository/GlobalContribsRepository.php | 29 +++++++++++++++++++ src/Repository/ProjectRepository.php | 32 ++++++++++++++------- tests/Model/GlobalContribsTest.php | 5 +++- tests/TestAdapter.php | 2 +- 5 files changed, 56 insertions(+), 14 deletions(-) diff --git a/src/Model/Project.php b/src/Model/Project.php index f3aed71a7..8999629b7 100644 --- a/src/Model/Project.php +++ b/src/Model/Project.php @@ -116,7 +116,7 @@ protected function getMetadata(): ?array */ public function exists(): bool { - return !empty($this->getDomain()) && $this->repository->hasTables($this->getBasicInfo()['dbName']); + return !empty($this->getDomain()) && $this->repository->checkReplication($this->getBasicInfo()['dbName']); } /** diff --git a/src/Repository/GlobalContribsRepository.php b/src/Repository/GlobalContribsRepository.php index 26b4ba9e9..396f2db0b 100644 --- a/src/Repository/GlobalContribsRepository.php +++ b/src/Repository/GlobalContribsRepository.php @@ -60,6 +60,7 @@ public function globalEditCounts(User $user): ?array // Pre-populate all projects' metadata, to prevent each project call from fetching it. $this->caProject->getRepository()->getAll(); + $this->checkReplicationAllProjects(); // Compile the output. $out = []; @@ -117,6 +118,34 @@ protected function globalEditCountsFromCentralAuth(User $user): ?array return $this->setCache($cacheKey, $out); } + /** + * Get, slice by slice, the list of projects that are actually replicated. + * Takes about 0.5s per slice. + * @return bool[] Keyed by database name, all values are true. + */ + public function checkReplicationAllProjects(): array + { + $cacheKey = $this->getCacheKey("global_replicationn_check"); + if ($this->cache->hasItem($cacheKey)) { + return $this->cache->getItem($cacheKey)->get(); + } + $result = []; + $exists = true; + $i = 0; + $sql = "SELECT DISTINCT table_schema + FROM information_schema.tables"; + while ($exists) { + $i += 1; + try { + $queryResult = $this->executeProjetsQuery("s$i", $sql)->fetchFirstColumn(); + $result = array_merge($result, $queryResult); + } catch (Exception) { + $exists = false; + } + } + return $this->setCache($cacheKey, $result, 'PT1H'); + } + /** * Loop through the given dbNames and create Project objects for each. * @param array $dbNames diff --git a/src/Repository/ProjectRepository.php b/src/Repository/ProjectRepository.php index ecd339053..e09dbf36f 100644 --- a/src/Repository/ProjectRepository.php +++ b/src/Repository/ProjectRepository.php @@ -247,22 +247,32 @@ public function getOne(string $project): ?array * @param string $project Database name, without _p. * @return bool */ - public function hasTables(string $project): bool + public function checkReplication(string $project): bool { - $cacheKey = $this->getCacheKey($project, "has_tables"); + $cacheKey = $this->getCacheKey($project, "replication_check"); if ($this->cache->hasItem($cacheKey)) { return $this->cache->getItem($cacheKey)->get(); } + // GlobalContribs preloads replication checks for *all* projects + $allProjectsCacheKey = $this->getCacheKey("global_replication_check"); + if ($this->cache->hasItem($allProjectsCacheKey)) { + $globalReplicationChecks = $this->cache->getItem($allProjectsCacheKey)->get(); + return array_key_exists($globalReplicationChecks, $project); + } $dbList = $this->getDbList(); - $dbSlice = $dbList[$project]; - $sql = "SELECT 1 - FROM information_schema.tables - WHERE table_schema = :project - LIMIT 1"; - $queryResult = $this->executeProjectsQuery($dbSlice, $sql, [ - 'project' => $project . "_p", - ])->fetchAssociative(); - $result = (1 == count($queryResult)); + if (!array_key_exists($project, $dbList)) { + $result = false; + } else { + $dbSlice = $dbList[$project]; + $sql = "SELECT 1 + FROM information_schema.tables + WHERE table_schema = :project + LIMIT 1"; + $queryResult = $this->executeProjectsQuery($dbSlice, $sql, [ + 'project' => $project . "_p", + ])->fetchAssociative(); + $result = (1 == count($queryResult)); + } // Cache for 1h and return return $this->setCache($cacheKey, $result, 'PT1H'); // feels long to me, but as long as getOne } diff --git a/tests/Model/GlobalContribsTest.php b/tests/Model/GlobalContribsTest.php index 398595906..edb753018 100644 --- a/tests/Model/GlobalContribsTest.php +++ b/tests/Model/GlobalContribsTest.php @@ -29,6 +29,9 @@ class GlobalContribsTest extends TestAdapter public function setUp(): void { $this->globalContribsRepo = $this->createMock(GlobalContribsRepository::class); + $this->globalContribsRepo->expects(static::once()) + ->method('checkReplicationAllProjects') + ->willReturn([]); $userRepo = $this->createMock(UserRepository::class); $this->globalContribs = new GlobalContribs( $this->globalContribsRepo, @@ -90,7 +93,7 @@ public function testGlobalEdits(): void 'dbName' => 'wiki1', 'url' => 'https://wiki1.example.org', ]); - $wiki1Repo->method('hasTables') + $wiki1Repo->method('checkReplication') ->willReturn(true); $wiki1 = new Project('wiki1'); $wiki1->setRepository($wiki1Repo); diff --git a/tests/TestAdapter.php b/tests/TestAdapter.php index 55ab3974b..6511d426d 100644 --- a/tests/TestAdapter.php +++ b/tests/TestAdapter.php @@ -32,7 +32,7 @@ public function getProjectRepo(): MockObject 'dbName' => 'test_wiki', 'lang' => 'en', ]); - $repo->method('hasTables') + $repo->method('checkReplication') ->willReturn(true); return $repo; } From 51bcb36a6150abf9819ac60c6040a643bccef3d1 Mon Sep 17 00:00:00 2001 From: alien4444 Date: Tue, 21 Oct 2025 13:00:02 +0200 Subject: [PATCH 7/8] typo --- src/Repository/GlobalContribsRepository.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Repository/GlobalContribsRepository.php b/src/Repository/GlobalContribsRepository.php index 396f2db0b..9bacc1fc1 100644 --- a/src/Repository/GlobalContribsRepository.php +++ b/src/Repository/GlobalContribsRepository.php @@ -125,7 +125,7 @@ protected function globalEditCountsFromCentralAuth(User $user): ?array */ public function checkReplicationAllProjects(): array { - $cacheKey = $this->getCacheKey("global_replicationn_check"); + $cacheKey = $this->getCacheKey("global_replication_check"); if ($this->cache->hasItem($cacheKey)) { return $this->cache->getItem($cacheKey)->get(); } From f2d27572b7b88b08fb1c4e5a5f29a731fe0fcb56 Mon Sep 17 00:00:00 2001 From: alien4444 Date: Tue, 21 Oct 2025 15:20:14 +0200 Subject: [PATCH 8/8] some fixes, notably to tests --- src/Model/Project.php | 3 ++- src/Repository/GlobalContribsRepository.php | 4 ++-- src/Repository/ProjectRepository.php | 9 +++++++-- tests/Model/GlobalContribsTest.php | 3 --- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/Model/Project.php b/src/Model/Project.php index f798fe0e9..d1da374d9 100644 --- a/src/Model/Project.php +++ b/src/Model/Project.php @@ -116,7 +116,8 @@ protected function getMetadata(): ?array */ public function exists(): bool { - return !empty($this->getDomain()) && $this->repository->checkReplication($this->getBasicInfo()['dbName']); + return (!empty($this->getDomain())) + && $this->repository->checkReplication($this->getBasicInfo()['dbName']); } /** diff --git a/src/Repository/GlobalContribsRepository.php b/src/Repository/GlobalContribsRepository.php index 9bacc1fc1..3d0035a7b 100644 --- a/src/Repository/GlobalContribsRepository.php +++ b/src/Repository/GlobalContribsRepository.php @@ -137,9 +137,9 @@ public function checkReplicationAllProjects(): array while ($exists) { $i += 1; try { - $queryResult = $this->executeProjetsQuery("s$i", $sql)->fetchFirstColumn(); + $queryResult = $this->executeProjectsQuery("s$i", $sql)->fetchFirstColumn(); $result = array_merge($result, $queryResult); - } catch (Exception) { + } catch (\Throwable) { $exists = false; } } diff --git a/src/Repository/ProjectRepository.php b/src/Repository/ProjectRepository.php index e09dbf36f..3e61ffbe2 100644 --- a/src/Repository/ProjectRepository.php +++ b/src/Repository/ProjectRepository.php @@ -249,15 +249,20 @@ public function getOne(string $project): ?array */ public function checkReplication(string $project): bool { + if ('' == $project) { + // This means we failed to getBasicInfo. Let's try and AGF. + // Plus, keeps tests from breaking down. + return true; + } $cacheKey = $this->getCacheKey($project, "replication_check"); if ($this->cache->hasItem($cacheKey)) { return $this->cache->getItem($cacheKey)->get(); } // GlobalContribs preloads replication checks for *all* projects - $allProjectsCacheKey = $this->getCacheKey("global_replication_check"); + $allProjectsCacheKey = $this->getCacheKey('', "global_replication_check"); if ($this->cache->hasItem($allProjectsCacheKey)) { $globalReplicationChecks = $this->cache->getItem($allProjectsCacheKey)->get(); - return array_key_exists($globalReplicationChecks, $project); + return array_key_exists($project, $globalReplicationChecks); } $dbList = $this->getDbList(); if (!array_key_exists($project, $dbList)) { diff --git a/tests/Model/GlobalContribsTest.php b/tests/Model/GlobalContribsTest.php index edb753018..752968b8b 100644 --- a/tests/Model/GlobalContribsTest.php +++ b/tests/Model/GlobalContribsTest.php @@ -29,9 +29,6 @@ class GlobalContribsTest extends TestAdapter public function setUp(): void { $this->globalContribsRepo = $this->createMock(GlobalContribsRepository::class); - $this->globalContribsRepo->expects(static::once()) - ->method('checkReplicationAllProjects') - ->willReturn([]); $userRepo = $this->createMock(UserRepository::class); $this->globalContribs = new GlobalContribs( $this->globalContribsRepo,