From beedd24accdef8823a02af768f7199b4fbca81cd Mon Sep 17 00:00:00 2001 From: Antonio Contreras Date: Mon, 23 Feb 2026 19:10:29 +0100 Subject: [PATCH 1/4] work in progress --- functions/src/functions/scraping-functions.ts | 10 ++++ .../scraping-cuenca-segura/package.json | 18 +++++-- .../scraping-cuenca-segura/src/api/index.ts | 4 +- .../src/console-runner.ts | 11 ++-- .../scraping-cuenca-segura/src/index.ts | 2 +- .../scraping-cuenca-segura/src/integration.ts | 18 ++++--- .../src/scraper/business.ts | 18 +++---- .../src/scraper/index.ts | 4 +- .../src/scraper/mapper.ts | 2 +- .../scraping-cuenca-segura/tsconfig.json | 17 +++--- package-lock.json | 3 +- .../db/src/dals/embalses/embalses.mappers.ts | 14 +++++ .../src/dals/embalses/embalses.repository.ts | 52 ++++++++++++++++++- 13 files changed, 132 insertions(+), 41 deletions(-) diff --git a/functions/src/functions/scraping-functions.ts b/functions/src/functions/scraping-functions.ts index 75bd7bc..4623fa3 100644 --- a/functions/src/functions/scraping-functions.ts +++ b/functions/src/functions/scraping-functions.ts @@ -33,6 +33,8 @@ export async function scrapingsFunction( const responseCuencaJucar = await embalsesRepository.actualizarCuencaJucar(); + const responseCuencaSegura = await embalsesRepository.actualizarCuencaSegura(); + if (responseCuencaMediterranea) { context.log( "scrapings-function: Se han actualizado los embalses de la cuenca Mediterránea", @@ -81,6 +83,14 @@ export async function scrapingsFunction( ); } + if (responseCuencaSegura) { + context.log(`Se han actualizado los embalses de la cuenca Segura`); + } else { + context.log( + "No se han podido actualizar los embalses de la cuenca Segura" + ); + } + } catch (error) { context.error("scrapings-function: ERROR", error); throw error; diff --git a/integrations/scraping-cuenca-segura/package.json b/integrations/scraping-cuenca-segura/package.json index 35d5c5a..f6930ba 100644 --- a/integrations/scraping-cuenca-segura/package.json +++ b/integrations/scraping-cuenca-segura/package.json @@ -1,15 +1,27 @@ { - "name": "scraping-cuenca-segura", +"name": "scraping-cuenca-segura", "version": "1.0.0", "private": true, "type": "module", "exports": { - ".": "./src/index.ts" + ".": "./dist/index.js" }, + "main": "./dist/index.js", + "types": "./dist/index.d.ts", "scripts": { - "start": "tsx --watch src/console-runner.ts" + "start": "tsx --watch ./src/console-runner.ts", + "build": "run-p clean type-check build:scraping-cuenca-segura", + "build:scraping-cuenca-segura": "tsc", + "clean": "rimraf dist", + "type-check": "tsc --noEmit --preserveWatchOutput" }, "dependencies": { + "axios": "^1.11.0", + "cheerio": "^1.1.2", "db-model": "^1.0.0" + }, + "devDependencies": { + "ts-node": "^10.9.2", + "typescript": "^5.9.2" } } diff --git a/integrations/scraping-cuenca-segura/src/api/index.ts b/integrations/scraping-cuenca-segura/src/api/index.ts index 3d2942a..ea837ee 100644 --- a/integrations/scraping-cuenca-segura/src/api/index.ts +++ b/integrations/scraping-cuenca-segura/src/api/index.ts @@ -1,3 +1,3 @@ // Barrel file for API exports -export * from './cuenca.api'; -export * from './cuenca.model'; +export * from './cuenca.api.js'; +export * from './cuenca.model.js'; diff --git a/integrations/scraping-cuenca-segura/src/console-runner.ts b/integrations/scraping-cuenca-segura/src/console-runner.ts index 34aec92..07acda9 100644 --- a/integrations/scraping-cuenca-segura/src/console-runner.ts +++ b/integrations/scraping-cuenca-segura/src/console-runner.ts @@ -1,8 +1,7 @@ -import { scrapeCuencaSegura } from './integration'; -import { mapToEmbalseUpdateSAIH } from './scraper'; +import { scrapeCuencaSegura } from './integration.js'; +import { mapToEmbalseUpdateSAIH } from './scraper/index.js'; + -const URL = 'https://chsegura.es/es/cuenca/redes-de-control/estadisticas-hidrologicas/estado-de-embalses/'; console.log('Estado de la Cuenca Segura:'); -const scrapedCuencaSegura = await scrapeCuencaSegura(URL); -const result = mapToEmbalseUpdateSAIH(scrapedCuencaSegura) -console.log(result); +const scrapedCuencaSegura = await scrapeCuencaSegura(); +console.log(scrapedCuencaSegura); diff --git a/integrations/scraping-cuenca-segura/src/index.ts b/integrations/scraping-cuenca-segura/src/index.ts index 852c5e3..ba7a4ee 100644 --- a/integrations/scraping-cuenca-segura/src/index.ts +++ b/integrations/scraping-cuenca-segura/src/index.ts @@ -1 +1 @@ -export * from "./integration"; +export * from "./integration.js"; diff --git a/integrations/scraping-cuenca-segura/src/integration.ts b/integrations/scraping-cuenca-segura/src/integration.ts index e8291d8..35d983d 100644 --- a/integrations/scraping-cuenca-segura/src/integration.ts +++ b/integrations/scraping-cuenca-segura/src/integration.ts @@ -1,17 +1,23 @@ import * as cheerio from 'cheerio'; -import { getCuencaPageHTMLContent, EmbalsesSegura } from '@/api'; -import { extractReservoirsFromSeguraPage } from '@/scraper'; +import { getCuencaPageHTMLContent, EmbalsesSegura } from './api/index.js'; +import { extractReservoirsFromSeguraPage } from './scraper/index.js'; +import { mapToEmbalseUpdateSAIH } from "./scraper/mapper.js"; +import { EmbalseUpdateSAIHEntity } from 'db-model'; + +const URL = 'https://chsegura.es/es/cuenca/redes-de-control/estadisticas-hidrologicas/estado-de-embalses/'; /** * Scrapes Segura reservoir data and returns it as an array. * @param url - The URL to scrape the data from */ export async function scrapeCuencaSegura( - url: string -): Promise { - const html = await getCuencaPageHTMLContent(url); +): Promise { + + + const html = await getCuencaPageHTMLContent(URL); const $: cheerio.CheerioAPI = cheerio.load(html); // Extract and map reservoir data - return extractReservoirsFromSeguraPage($); + const embalses = extractReservoirsFromSeguraPage($); + return mapToEmbalseUpdateSAIH(embalses); } diff --git a/integrations/scraping-cuenca-segura/src/scraper/business.ts b/integrations/scraping-cuenca-segura/src/scraper/business.ts index e8d0ec9..d344265 100644 --- a/integrations/scraping-cuenca-segura/src/scraper/business.ts +++ b/integrations/scraping-cuenca-segura/src/scraper/business.ts @@ -1,27 +1,27 @@ import { CheerioAPI } from 'cheerio'; import type { Element } from 'domhandler'; -import { EmbalsesSegura } from '@/api'; -import { mapEmbalsesToEntities } from '@/scraper' +import { EmbalsesSegura } from '../api/index.js'; +import { mapEmbalsesToEntities } from './mapper.js' // Function to extract capacity data from main table function getReservoirCapacities($: CheerioAPI): Record { const capacityMap: Record = {}; - + $('#n0 tbody tr').each((_, row) => { const $row = $(row); const cols = $row.find('td'); if (cols.length !== 4) return; const embalse = $(cols[0]).text().trim(); - if (!embalse || - embalse.toLowerCase().includes('total') || - embalse.toLowerCase().includes('resto')) { + if (!embalse || + embalse.toLowerCase().includes('total') || + embalse.toLowerCase().includes('resto')) { return; } const capacidadTotalHm3 = Number($(cols[1]).text().trim()); const porcentajeActual = Number($(cols[3]).text().trim()); - + capacityMap[embalse] = { capacity: capacidadTotalHm3, percentage: porcentajeActual @@ -67,11 +67,11 @@ function parseAnnualStatsRow( export function extractReservoirsFromSeguraPage($: CheerioAPI): EmbalsesSegura[] { // Get capacity data from main table (#n0) const capacityMap = getReservoirCapacities($); - + // Get most recent monthly data from annual table (#n1) const reservoirs: EmbalsesSegura[] = []; const annualRows = extractAnnualStatsRows($); - + // Take only the LAST row (most recent month) if (annualRows.length > 0) { const lastRow = annualRows[annualRows.length - 1]; diff --git a/integrations/scraping-cuenca-segura/src/scraper/index.ts b/integrations/scraping-cuenca-segura/src/scraper/index.ts index bbb6227..4f25535 100644 --- a/integrations/scraping-cuenca-segura/src/scraper/index.ts +++ b/integrations/scraping-cuenca-segura/src/scraper/index.ts @@ -1,2 +1,2 @@ -export * from './business'; -export * from './mapper'; +export * from './business.js'; +export * from './mapper.js'; diff --git a/integrations/scraping-cuenca-segura/src/scraper/mapper.ts b/integrations/scraping-cuenca-segura/src/scraper/mapper.ts index 71d6618..7450490 100644 --- a/integrations/scraping-cuenca-segura/src/scraper/mapper.ts +++ b/integrations/scraping-cuenca-segura/src/scraper/mapper.ts @@ -1,5 +1,5 @@ import { EmbalseUpdateSAIHEntity } from 'db-model'; -import { EmbalsesSegura } from '@/api'; +import { EmbalsesSegura } from '../api/index.js'; // Province lookup for each reservoir const reservoirProvince: Record = { diff --git a/integrations/scraping-cuenca-segura/tsconfig.json b/integrations/scraping-cuenca-segura/tsconfig.json index fad30e0..ce7b1d4 100644 --- a/integrations/scraping-cuenca-segura/tsconfig.json +++ b/integrations/scraping-cuenca-segura/tsconfig.json @@ -1,17 +1,16 @@ { "compilerOptions": { "target": "ESNext", - "module": "ESNext", - "moduleResolution": "bundler", + "module": "nodenext", + "moduleResolution": "nodenext", + "outDir": "dist", "skipLibCheck": true, "isolatedModules": true, "esModuleInterop": true, - "baseUrl": "./", - "paths": { - "@/*": ["src/*"], - "@/api/*": ["src/api/*"], - "@/scraper/*": ["src/scraper/*"] - } + "verbatimModuleSyntax": false, + "declaration": true, + "baseUrl": "./" }, - "include": ["src"] + "include": ["src/**/*"], + "exclude": ["dist", "node_modules"] } diff --git a/package-lock.json b/package-lock.json index bdf99ea..5a4ade5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5863,7 +5863,8 @@ "scraping-cuenca-duero": "*", "scraping-cuenca-guadalquivir": "*", "scraping-cuenca-jucar": "*", - "scraping-cuenca-mediterranea": "*" + "scraping-cuenca-mediterranea": "*", + "scraping-cuenca-segura": "*" } }, "packages/db-model": { diff --git a/packages/db/src/dals/embalses/embalses.mappers.ts b/packages/db/src/dals/embalses/embalses.mappers.ts index 09e4573..7035d10 100644 --- a/packages/db/src/dals/embalses/embalses.mappers.ts +++ b/packages/db/src/dals/embalses/embalses.mappers.ts @@ -200,3 +200,17 @@ export const mapperFromCuencasJucarToArcgis = new Map< [332, { nombre: "Guadalest", idArcgis: 153 }], [331, { nombre: "Amadorio", idArcgis: 23 }], ]); + +// Ojo Crevillente, Fuensanta, La Cierva, Puentes, Santomera, Taibilla, Valdeinfierno, Algeciras, +// Alfonso XIII, Anchuricas, Argos + +export const mapperFromCuencasSeguraToArcgis = new Map< + number, + InfoDestinoArcgis +>([ + [1, { nombre: "Fuensanta", idArcgis: 143 }], + [2, { nombre: "Talave", idArcgis: 304 }], + [3, { nombre: "Cenajo", idArcgis: 94 }], + [4, { nombre: "Camarillas", idArcgis: 72 }], + [5, { nombre: "La Pedrera", idArcgis: 180 }], +]); diff --git a/packages/db/src/dals/embalses/embalses.repository.ts b/packages/db/src/dals/embalses/embalses.repository.ts index af44fa2..1f45704 100644 --- a/packages/db/src/dals/embalses/embalses.repository.ts +++ b/packages/db/src/dals/embalses/embalses.repository.ts @@ -1,12 +1,13 @@ import { scrapeSeedEmbalses } from "arcgis"; import { getEmbalsesContext } from "./embalses.context.js"; -import { mapperFromCuencasMediterraneaToArcgis, mapperFromCuencasCantabricoToArcgis, mapperFromCuencasCatalanaToArcgis, mapperFromCuencasDueroToArcgis, mapperFromCuencasGuadalquivirToArcgis, mapperFromCuencasJucarToArcgis } from "./embalses.mappers.js"; +import { mapperFromCuencasMediterraneaToArcgis, mapperFromCuencasCantabricoToArcgis, mapperFromCuencasCatalanaToArcgis, mapperFromCuencasDueroToArcgis, mapperFromCuencasGuadalquivirToArcgis, mapperFromCuencasJucarToArcgis, mapperFromCuencasSeguraToArcgis } from "./embalses.mappers.js"; import { scrapeCuencaMediterranea } from "scraping-cuenca-mediterranea"; import { scrapeCuencaCantabrica } from 'scraping-cuenca-cantabrico'; import { integracionCuencaCatalana } from 'scraping-cuenca-catalana'; import { getEstadoCuencaDuero } from 'scraping-cuenca-duero'; import { scrapeCuencaGuadalquivir } from 'scraping-cuenca-guadalquivir'; import { scrapeCuencaJucar } from 'scraping-cuenca-jucar'; +import { scrapeCuencaSegura } from 'scraping-cuenca-segura'; import { parseDate } from "./embalses.helpers.js"; export const embalsesRepository = { @@ -337,6 +338,55 @@ export const embalsesRepository = { } } + return actualizados > 0; + }, + actualizarCuencaSegura: async (): Promise => { + const embalsesSegura = await scrapeCuencaSegura(); + + console.log( + `Se han scrapeado ${embalsesSegura.length} embalses de la Cuenca Segura` + ); + + let actualizados = 0; + let noEncontrados = 0; + let sinMapper = 0; + + for (const embalse of embalsesSegura) { + const infoDestino = mapperFromCuencasSeguraToArcgis.get(embalse.id); + + if (!infoDestino) { + sinMapper++; + console.warn(`Sin mapper para ID ${embalse.id} - ${embalse.nombre}`); + continue; + } + + console.log( + `🔍 Mapeando: ID scraping ${embalse.id} -> _id BD ${infoDestino.idArcgis} (${infoDestino.nombre})` + ); + + const { matchedCount } = await getEmbalsesContext().updateOne( + { _id: infoDestino.idArcgis.toString() }, + { + $set: { + aguaActualSAIH: embalse.aguaActualSAIH, + fechaMedidaAguaActualSAIH: parseDate(embalse.fechaMedidaSAIH), + }, + } + ); + + if (matchedCount > 0) { + actualizados++; + console.log( + `Actualizado: ${infoDestino.nombre} (_id: ${infoDestino.idArcgis}) -> ${embalse.aguaActualSAIH} hm³` + ); + } else { + noEncontrados++; + console.warn( + `No encontrado en BD: _id ${infoDestino.idArcgis} - ${infoDestino.nombre}` + ); + } + } + return actualizados > 0; } }; From 79325dd5b95cd270cf248f33b32c833c94d65c45 Mon Sep 17 00:00:00 2001 From: Antonio Contreras Date: Tue, 24 Feb 2026 11:45:55 +0100 Subject: [PATCH 2/4] fix: correct function signature for scrapeCuencaSegura --- integrations/scraping-cuenca-segura/src/integration.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/integrations/scraping-cuenca-segura/src/integration.ts b/integrations/scraping-cuenca-segura/src/integration.ts index 35d983d..5d6e6d4 100644 --- a/integrations/scraping-cuenca-segura/src/integration.ts +++ b/integrations/scraping-cuenca-segura/src/integration.ts @@ -10,8 +10,7 @@ const URL = 'https://chsegura.es/es/cuenca/redes-de-control/estadisticas-hidrolo * Scrapes Segura reservoir data and returns it as an array. * @param url - The URL to scrape the data from */ -export async function scrapeCuencaSegura( -): Promise { +export async function scrapeCuencaSegura(): Promise { const html = await getCuencaPageHTMLContent(URL); From 29500b4f280af202d8c3f96823c3460197a99eeb Mon Sep 17 00:00:00 2001 From: Antonio Contreras Date: Tue, 24 Feb 2026 11:53:23 +0100 Subject: [PATCH 3/4] refactor: remove unused import and clean up code in console-runner and integration modules --- integrations/scraping-cuenca-segura/src/console-runner.ts | 2 -- integrations/scraping-cuenca-segura/src/integration.ts | 4 +--- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/integrations/scraping-cuenca-segura/src/console-runner.ts b/integrations/scraping-cuenca-segura/src/console-runner.ts index 07acda9..83bd4e5 100644 --- a/integrations/scraping-cuenca-segura/src/console-runner.ts +++ b/integrations/scraping-cuenca-segura/src/console-runner.ts @@ -1,6 +1,4 @@ import { scrapeCuencaSegura } from './integration.js'; -import { mapToEmbalseUpdateSAIH } from './scraper/index.js'; - console.log('Estado de la Cuenca Segura:'); const scrapedCuencaSegura = await scrapeCuencaSegura(); diff --git a/integrations/scraping-cuenca-segura/src/integration.ts b/integrations/scraping-cuenca-segura/src/integration.ts index 5d6e6d4..9200d94 100644 --- a/integrations/scraping-cuenca-segura/src/integration.ts +++ b/integrations/scraping-cuenca-segura/src/integration.ts @@ -1,5 +1,5 @@ import * as cheerio from 'cheerio'; -import { getCuencaPageHTMLContent, EmbalsesSegura } from './api/index.js'; +import { getCuencaPageHTMLContent } from './api/index.js'; import { extractReservoirsFromSeguraPage } from './scraper/index.js'; import { mapToEmbalseUpdateSAIH } from "./scraper/mapper.js"; import { EmbalseUpdateSAIHEntity } from 'db-model'; @@ -11,8 +11,6 @@ const URL = 'https://chsegura.es/es/cuenca/redes-de-control/estadisticas-hidrolo * @param url - The URL to scrape the data from */ export async function scrapeCuencaSegura(): Promise { - - const html = await getCuencaPageHTMLContent(URL); const $: cheerio.CheerioAPI = cheerio.load(html); From 46cefde421ad1146a02c73e1e955ef8a521fbef1 Mon Sep 17 00:00:00 2001 From: Antonio Contreras Date: Tue, 24 Feb 2026 12:05:53 +0100 Subject: [PATCH 4/4] fix: add missing dependency for scraping-cuenca-segura in package.json --- packages/db/package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/db/package.json b/packages/db/package.json index 67b1e94..c103c88 100644 --- a/packages/db/package.json +++ b/packages/db/package.json @@ -28,9 +28,9 @@ "scraping-cuenca-cantabrico": "*", "scraping-cuenca-catalana": "*", "scraping-cuenca-duero": "*", - "scraping-cuenca-jucar": "*", - "scraping-cuenca-mediterranea": "*" + "scraping-cuenca-mediterranea": "*", + "scraping-cuenca-segura": "*" }, "devDependencies": { "@types/prompts": "^2.4.9",