Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions functions/src/functions/scraping-functions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ export async function scrapingsFunction(

const responseCuencaJucar = await embalsesRepository.actualizarCuencaJucar();

const responseCuencaSegura = await embalsesRepository.actualizarCuencaSegura();

if (responseCuencaMediterranea) {
context.log(
"scrapings-function: Se han actualizado los embalses de la cuenca Mediterránea",
Expand Down Expand Up @@ -82,6 +84,14 @@ export async function scrapingsFunction(
);
}

if (responseCuencaSegura) {
context.log(`Se han actualizado los embalses de la cuenca Segura`);
} else {
context.log(
"No se han podido actualizar los embalses de la cuenca Segura"
);
}

} catch (error) {
context.error("scrapings-function: ERROR", error);
throw error;
Expand Down
18 changes: 15 additions & 3 deletions integrations/scraping-cuenca-segura/package.json
Original file line number Diff line number Diff line change
@@ -1,15 +1,27 @@
{
"name": "scraping-cuenca-segura",
"name": "scraping-cuenca-segura",
"version": "1.0.0",
"private": true,
"type": "module",
"exports": {
".": "./src/index.ts"
".": "./dist/index.js"
},
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
"scripts": {
"start": "tsx --watch src/console-runner.ts"
"start": "tsx --watch ./src/console-runner.ts",
"build": "run-p clean type-check build:scraping-cuenca-segura",
"build:scraping-cuenca-segura": "tsc",
"clean": "rimraf dist",
"type-check": "tsc --noEmit --preserveWatchOutput"
},
"dependencies": {
"axios": "^1.11.0",
"cheerio": "^1.1.2",
"db-model": "^1.0.0"
},
"devDependencies": {
"ts-node": "^10.9.2",
"typescript": "^5.9.2"
}
}
4 changes: 2 additions & 2 deletions integrations/scraping-cuenca-segura/src/api/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
// Barrel file for API exports
export * from './cuenca.api';
export * from './cuenca.model';
export * from './cuenca.api.js';
export * from './cuenca.model.js';
9 changes: 3 additions & 6 deletions integrations/scraping-cuenca-segura/src/console-runner.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import { scrapeCuencaSegura } from './integration';
import { mapToEmbalseUpdateSAIH } from './scraper';
import { scrapeCuencaSegura } from './integration.js';

const URL = 'https://chsegura.es/es/cuenca/redes-de-control/estadisticas-hidrologicas/estado-de-embalses/';
console.log('Estado de la Cuenca Segura:');
const scrapedCuencaSegura = await scrapeCuencaSegura(URL);
const result = mapToEmbalseUpdateSAIH(scrapedCuencaSegura)
console.log(result);
const scrapedCuencaSegura = await scrapeCuencaSegura();
console.log(scrapedCuencaSegura);
2 changes: 1 addition & 1 deletion integrations/scraping-cuenca-segura/src/index.ts
Original file line number Diff line number Diff line change
@@ -1 +1 @@
export * from "./integration";
export * from "./integration.js";
17 changes: 10 additions & 7 deletions integrations/scraping-cuenca-segura/src/integration.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
import * as cheerio from 'cheerio';
import { getCuencaPageHTMLContent, EmbalsesSegura } from '@/api';
import { extractReservoirsFromSeguraPage } from '@/scraper';
import { getCuencaPageHTMLContent } from './api/index.js';
import { extractReservoirsFromSeguraPage } from './scraper/index.js';
import { mapToEmbalseUpdateSAIH } from "./scraper/mapper.js";
import { EmbalseUpdateSAIHEntity } from 'db-model';

const URL = 'https://chsegura.es/es/cuenca/redes-de-control/estadisticas-hidrologicas/estado-de-embalses/';

/**
* Scrapes Segura reservoir data and returns it as an array.
* @param url - The URL to scrape the data from
*/
export async function scrapeCuencaSegura(
url: string
): Promise<EmbalsesSegura[]> {
const html = await getCuencaPageHTMLContent(url);
export async function scrapeCuencaSegura(): Promise<EmbalseUpdateSAIHEntity[]> {
const html = await getCuencaPageHTMLContent(URL);
const $: cheerio.CheerioAPI = cheerio.load(html);

// Extract and map reservoir data
return extractReservoirsFromSeguraPage($);
const embalses = extractReservoirsFromSeguraPage($);
return mapToEmbalseUpdateSAIH(embalses);
}
18 changes: 9 additions & 9 deletions integrations/scraping-cuenca-segura/src/scraper/business.ts
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
import { CheerioAPI } from 'cheerio';
import type { Element } from 'domhandler';
import { EmbalsesSegura } from '@/api';
import { mapEmbalsesToEntities } from '@/scraper'
import { EmbalsesSegura } from '../api/index.js';
import { mapEmbalsesToEntities } from './mapper.js'

// Function to extract capacity data from main table
function getReservoirCapacities($: CheerioAPI): Record<string, { capacity: number; percentage: number }> {
const capacityMap: Record<string, { capacity: number; percentage: number }> = {};

$('#n0 tbody tr').each((_, row) => {
const $row = $(row);
const cols = $row.find('td');
if (cols.length !== 4) return;

const embalse = $(cols[0]).text().trim();
if (!embalse ||
embalse.toLowerCase().includes('total') ||
embalse.toLowerCase().includes('resto')) {
if (!embalse ||
embalse.toLowerCase().includes('total') ||
embalse.toLowerCase().includes('resto')) {
return;
}

const capacidadTotalHm3 = Number($(cols[1]).text().trim());
const porcentajeActual = Number($(cols[3]).text().trim());

capacityMap[embalse] = {
capacity: capacidadTotalHm3,
percentage: porcentajeActual
Expand Down Expand Up @@ -67,11 +67,11 @@ function parseAnnualStatsRow(
export function extractReservoirsFromSeguraPage($: CheerioAPI): EmbalsesSegura[] {
// Get capacity data from main table (#n0)
const capacityMap = getReservoirCapacities($);

// Get most recent monthly data from annual table (#n1)
const reservoirs: EmbalsesSegura[] = [];
const annualRows = extractAnnualStatsRows($);

// Take only the LAST row (most recent month)
if (annualRows.length > 0) {
const lastRow = annualRows[annualRows.length - 1];
Expand Down
4 changes: 2 additions & 2 deletions integrations/scraping-cuenca-segura/src/scraper/index.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
export * from './business';
export * from './mapper';
export * from './business.js';
export * from './mapper.js';
2 changes: 1 addition & 1 deletion integrations/scraping-cuenca-segura/src/scraper/mapper.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { EmbalseUpdateSAIHEntity } from 'db-model';
import { EmbalsesSegura } from '@/api';
import { EmbalsesSegura } from '../api/index.js';

// Province lookup for each reservoir
const reservoirProvince: Record<string, string> = {
Expand Down
17 changes: 8 additions & 9 deletions integrations/scraping-cuenca-segura/tsconfig.json
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
{
"compilerOptions": {
"target": "ESNext",
"module": "ESNext",
"moduleResolution": "bundler",
"module": "nodenext",
"moduleResolution": "nodenext",
"outDir": "dist",
"skipLibCheck": true,
"isolatedModules": true,
"esModuleInterop": true,
"baseUrl": "./",
"paths": {
"@/*": ["src/*"],
"@/api/*": ["src/api/*"],
"@/scraper/*": ["src/scraper/*"]
}
"verbatimModuleSyntax": false,
"declaration": true,
"baseUrl": "./"
},
"include": ["src"]
"include": ["src/**/*"],
"exclude": ["dist", "node_modules"]
}
3 changes: 2 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions packages/db/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@
"scraping-cuenca-cantabrico": "*",
"scraping-cuenca-catalana": "*",
"scraping-cuenca-duero": "*",

"scraping-cuenca-jucar": "*",
"scraping-cuenca-mediterranea": "*"
"scraping-cuenca-mediterranea": "*",
"scraping-cuenca-segura": "*"
},
"devDependencies": {
"@types/prompts": "^2.4.9",
Expand Down
14 changes: 14 additions & 0 deletions packages/db/src/dals/embalses/embalses.mappers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -200,3 +200,17 @@ export const mapperFromCuencasJucarToArcgis = new Map<
[332, { nombre: "Guadalest", idArcgis: 153 }],
[331, { nombre: "Amadorio", idArcgis: 23 }],
]);

// Ojo Crevillente, Fuensanta, La Cierva, Puentes, Santomera, Taibilla, Valdeinfierno, Algeciras,
// Alfonso XIII, Anchuricas, Argos

export const mapperFromCuencasSeguraToArcgis = new Map<
number,
InfoDestinoArcgis
>([
[1, { nombre: "Fuensanta", idArcgis: 143 }],
[2, { nombre: "Talave", idArcgis: 304 }],
[3, { nombre: "Cenajo", idArcgis: 94 }],
[4, { nombre: "Camarillas", idArcgis: 72 }],
[5, { nombre: "La Pedrera", idArcgis: 180 }],
]);
52 changes: 51 additions & 1 deletion packages/db/src/dals/embalses/embalses.repository.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { scrapeSeedEmbalses } from "arcgis";
import { getEmbalsesContext } from "./embalses.context.js";
import { mapperFromCuencasMediterraneaToArcgis, mapperFromCuencasCantabricoToArcgis, mapperFromCuencasCatalanaToArcgis, mapperFromCuencasDueroToArcgis, mapperFromCuencasJucarToArcgis } from "./embalses.mappers.js";
import { mapperFromCuencasMediterraneaToArcgis, mapperFromCuencasCantabricoToArcgis, mapperFromCuencasCatalanaToArcgis, mapperFromCuencasDueroToArcgis, mapperFromCuencasJucarToArcgis, mapperFromCuencasSeguraToArcgis } from "./embalses.mappers.js";
import { scrapeCuencaMediterranea } from "scraping-cuenca-mediterranea";
import { scrapeCuencaCantabrica } from 'scraping-cuenca-cantabrico';
import { integracionCuencaCatalana } from 'scraping-cuenca-catalana';
Expand All @@ -9,6 +9,7 @@ import { getEstadoCuencaDuero } from 'scraping-cuenca-duero';
// available in Azure Functions. Needs rewrite to use axios/cheerio or similar.
// import { scrapeCuencaGuadalquivir } from 'scraping-cuenca-guadalquivir';
import { scrapeCuencaJucar } from 'scraping-cuenca-jucar';
import { scrapeCuencaSegura } from 'scraping-cuenca-segura';
import { parseDate } from "./embalses.helpers.js";

export const embalsesRepository = {
Expand Down Expand Up @@ -290,6 +291,55 @@ export const embalsesRepository = {
}
}

return actualizados > 0;
},
actualizarCuencaSegura: async (): Promise<boolean> => {
const embalsesSegura = await scrapeCuencaSegura();

console.log(
`Se han scrapeado ${embalsesSegura.length} embalses de la Cuenca Segura`
);

let actualizados = 0;
let noEncontrados = 0;
let sinMapper = 0;

for (const embalse of embalsesSegura) {
const infoDestino = mapperFromCuencasSeguraToArcgis.get(embalse.id);

if (!infoDestino) {
sinMapper++;
console.warn(`Sin mapper para ID ${embalse.id} - ${embalse.nombre}`);
continue;
}

console.log(
`🔍 Mapeando: ID scraping ${embalse.id} -> _id BD ${infoDestino.idArcgis} (${infoDestino.nombre})`
);

const { matchedCount } = await getEmbalsesContext().updateOne(
{ _id: infoDestino.idArcgis.toString() },
{
$set: {
aguaActualSAIH: embalse.aguaActualSAIH,
fechaMedidaAguaActualSAIH: parseDate(embalse.fechaMedidaSAIH),
},
}
);

if (matchedCount > 0) {
actualizados++;
console.log(
`Actualizado: ${infoDestino.nombre} (_id: ${infoDestino.idArcgis}) -> ${embalse.aguaActualSAIH} hm³`
);
} else {
noEncontrados++;
console.warn(
`No encontrado en BD: _id ${infoDestino.idArcgis} - ${infoDestino.nombre}`
);
}
}

return actualizados > 0;
}
};