From 7e536c594c98b7be46d7e308549e0a39a2d577fb Mon Sep 17 00:00:00 2001 From: Tacsipacsi Date: Sat, 13 Dec 2025 15:08:49 +0100 Subject: [PATCH] fix(WikidataCommand): Always process P138 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The latest update for Budapest [0] complained about `data/wikidata/Q44377.json` missing. This is probably the result of the following: 1. It discovered https://www.openstreetmap.org/relation/19901264. 2. Based on its `name:etymology:wikidata` tag, it downloaded Q850862. Since we’re not interested in what the namesakes are named after, it didn’t bother about P138. 3. Later it also discovered https://www.openstreetmap.org/way/156974362. 4. It saw that Q850862 has already been downloaded, so it skipped downloading it – and it also skipped downloading its namesake, Q44377. 5. Subsequently GeoJSONCommand couldn’t find `Q44377.json`. To fix this, simply don’t skip reading back the entity and trying to extract P138 from it. This also allows moving the `file_exists()` check to `save()`, reducing code duplication. This might result in a slight performance degradation (more local disk I/O if a street with its own Wikidata item consists of multiple ways not bundled in a relation), but no increase in HTTP requests (except for now doing the one or two HTTP requests per city that used to be erronously skipped), and correctness is worth that slight performance degradation. I confirmed that running on Budapest locally, the warning is printed without the fix, and not printed with the fix. I also confirmed that with the fix, `gender.csv` correctly includes `Q44377` for Budavári alagút. Please note that while the current example doesn’t involve people, so it may seem borderline out of scope, it’s very possible for similar cases to involve people: e.g. https://www.openstreetmap.org/way/335395340 (Flórián tér overpass) could have `name:etymology:wikidata=Q65215958` (Flórián tér) – I’m not 100% sure if this would be right, but arguably yes –, and that would potentially (depending on the processing order) break the yellow color of https://www.openstreetmap.org/way/908080555. [0] https://github.com/EqualStreetNames/equalstreetnames-budapest/actions/runs/19877172828/job/56967142726 --- Command/WikidataCommand.php | 43 +++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/Command/WikidataCommand.php b/Command/WikidataCommand.php index adcb56c..71f05b2 100644 --- a/Command/WikidataCommand.php +++ b/Command/WikidataCommand.php @@ -118,9 +118,7 @@ function ($element): bool { // Download Wikidata item $path = sprintf('%s/%s.json', $outputDir, $identifier); - if (!file_exists($path)) { - self::save($identifier, $element, $path, $warnings); - } + self::save($identifier, $element, $path, $warnings); } } @@ -134,27 +132,22 @@ function ($element): bool { // Download Wikidata item $path = sprintf('%s/%s.json', $outputDir, $wikidataTag); - if (!file_exists($path)) { - self::save($wikidataTag, $element, $path, $warnings); - - $wikiPath = sprintf('%s/%s.json', $outputDir, $wikidataTag); - $entity = Wikidata::read($wikiPath); - - $identifiers = Wikidata::extractNamedAfter($entity); - if (!is_null($identifiers)) { - foreach ($identifiers as $identifier) { - // Check that the value of the tag is a valid Wikidata item identifier - if (preg_match('/^Q[0-9]+$/', $identifier) !== 1) { - $warnings[] = sprintf('Format of `P138` (NamedAfter) property is invalid (%s) for in item "%s".', $identifier, $wikidataTag); - continue; - } - - // Download Wikidata item - $path = sprintf('%s/%s.json', $outputDir, $identifier); - if (!file_exists($path)) { - self::save($identifier, $element, $path, $warnings); - } + self::save($wikidataTag, $element, $path, $warnings); + + $entity = Wikidata::read($path); + + $identifiers = Wikidata::extractNamedAfter($entity); + if (!is_null($identifiers)) { + foreach ($identifiers as $identifier) { + // Check that the value of the tag is a valid Wikidata item identifier + if (preg_match('/^Q[0-9]+$/', $identifier) !== 1) { + $warnings[] = sprintf('Format of `P138` (NamedAfter) property is invalid (%s) for in item "%s".', $identifier, $wikidataTag); + continue; } + + // Download Wikidata item + $path = sprintf('%s/%s.json', $outputDir, $identifier); + self::save($identifier, $element, $path, $warnings); } } } @@ -189,6 +182,10 @@ function ($element): bool { */ private static function save(string $identifier, $element, string $path, array &$warnings = []): void { + if (file_exists($path)) { + return; + } + $url = sprintf('%s%s.json', self::URL, $identifier); $retryMiddleware = Middleware::retry(