Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 0 additions & 44 deletions .github/workflows/claude-code-review.yml

This file was deleted.

50 changes: 0 additions & 50 deletions .github/workflows/claude.yml

This file was deleted.

27 changes: 27 additions & 0 deletions lib/FredyPipelineExecutioner.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class FredyPipelineExecutioner {
* @param {(raw:any)=>Listing} providerConfig.normalize Function to convert raw scraped data into a Listing shape.
* @param {(listing:Listing)=>boolean} providerConfig.filter Function to filter out unwanted listings.
* @param {(url:string, waitForSelector?:string)=>Promise<void>|Promise<Listing[]>} [providerConfig.getListings] Optional override to fetch listings.
* @param {(listing:Listing, browser:any)=>Promise<Listing>} [providerConfig.fetchDetails] Optional per-listing detail enrichment. Called in parallel for each new listing after deduplication. Receives the shared browser instance. Must always resolve (never reject).
* @param {Object} notificationConfig Notification configuration passed to notification adapters.
* @param {Object} spatialFilter Optional spatial filter configuration.
* @param {string} providerId The ID of the provider currently in use.
Expand Down Expand Up @@ -92,6 +93,7 @@ class FredyPipelineExecutioner {
.then(this._normalize.bind(this))
.then(this._filter.bind(this))
.then(this._findNew.bind(this))
.then(this._fetchDetails.bind(this))
.then(this._geocode.bind(this))
.then(this._save.bind(this))
.then(this._calculateDistance.bind(this))
Expand All @@ -101,6 +103,31 @@ class FredyPipelineExecutioner {
.catch(this._handleError.bind(this));
}

/**
* Optionally enrich new listings with data from their detail pages.
* Only called when the provider config defines a `fetchDetails` function.
* Runs all fetches in parallel. Each individual fetch must handle its own errors
* and always resolve (never reject) to avoid aborting other listings.
*
* @param {Listing[]} newListings New listings to enrich.
* @returns {Promise<Listing[]>} Resolves with enriched listings.
*/
async _fetchDetails(newListings) {
if (typeof this._providerConfig.fetchDetails !== 'function') {
return newListings;
}
const userId = getJob(this._jobKey)?.userId;
if (!userId || !getUserSettings(userId)?.provider_details) {
return newListings;
}
const listingsToEnrich = process.env.NODE_ENV === 'test' ? newListings.slice(0, 1) : newListings;
const enriched = [];
for (const listing of listingsToEnrich) {
enriched.push(await this._providerConfig.fetchDetails(listing, this._browser));
}
return enriched;
}

/**
* Geocode new listings.
*
Expand Down
8 changes: 4 additions & 4 deletions lib/api/routes/userSettingsRoute.js
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,9 @@ userSettingsRouter.post('/news-hash', async (req, res) => {
}
});

userSettingsRouter.post('/immoscout-details', async (req, res) => {
userSettingsRouter.post('/provider-details', async (req, res) => {
const userId = req.session.currentUser;
const { immoscout_details } = req.body;
const { provider_details } = req.body;

const globalSettings = await getSettings();
if (globalSettings.demoMode) {
Expand All @@ -109,10 +109,10 @@ userSettingsRouter.post('/immoscout-details', async (req, res) => {
}

try {
upsertSettings({ immoscout_details: !!immoscout_details }, userId);
upsertSettings({ provider_details: !!provider_details }, userId);
res.send({ success: true });
} catch (error) {
logger.error('Error updating immoscout details setting', error);
logger.error('Error updating provider details setting', error);
res.statusCode = 500;
res.send({ error: error.message });
}
Expand Down
69 changes: 59 additions & 10 deletions lib/provider/immobilienDe.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

import { buildHash, isOneOf } from '../utils.js';
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
import puppeteerExtractor from '../services/extractor/puppeteerExtractor.js';
import * as cheerio from 'cheerio';
import logger from '../services/logger.js';

let appliedBlackList = [];

Expand All @@ -18,15 +21,60 @@ function parseId(shortenedLink) {
return shortenedLink.substring(shortenedLink.lastIndexOf('/') + 1);
}

async function fetchDetails(listing, browser) {
try {
const html = await puppeteerExtractor(listing.link, null, { browser });
if (!html) return listing;

const $ = cheerio.load(html);

// Try JSON-LD first
let description = null;
let address = listing.address;
$('script[type="application/ld+json"]').each((_, el) => {
if (description) return;
try {
const data = JSON.parse($(el).text());
const nodes = Array.isArray(data) ? data : [data];
for (const node of nodes) {
if (node.description && !description) description = String(node.description).replace(/\s+/g, ' ').trim();
const addr = node.address || node?.mainEntity?.address;
if (addr && addr.streetAddress && address === listing.address) {
const parts = [addr.streetAddress, addr.postalCode, addr.addressLocality].filter(Boolean);
if (parts.length) address = parts.join(' ');
}
}
} catch {
// ignore malformed JSON-LD
}
});

// Fallback: common description selectors used by immobilien.de
if (!description) {
const sel = ['.beschreibung', '.freitext', '.objektbeschreibung', '.description'].find((s) => $(s).length > 0);
if (sel) description = $(sel).text().replace(/\s+/g, ' ').trim();
}

return {
...listing,
address,
description: description || listing.description,
};
} catch (error) {
logger.warn(`Could not fetch immobilien.de detail page for listing '${listing.id}'.`, error?.message || error);
return listing;
}
}

function normalize(o) {
const baseUrl = 'https://www.immobilien.de';
const size = o.size || null;
const price = o.price || null;
const title = o.title || 'No title available';
const address = o.address || null;
const shortLink = shortenLink(o.link);
const link = baseUrl + shortLink;
const image = baseUrl + o.image;
const link = shortLink ? (shortLink.startsWith('http') ? shortLink : baseUrl + shortLink) : baseUrl;
const image = o.image ? (o.image.startsWith('http') ? o.image : baseUrl + o.image) : null;
const id = buildHash(parseId(shortLink), o.price);
return Object.assign(o, { id, price, size, title, address, link, image });
}
Expand All @@ -39,21 +87,22 @@ function applyBlacklist(o) {

const config = {
url: null,
crawlContainer: 'a:has(div.list_entry)',
crawlContainer: 'a.lr-card',
sortByDateParam: 'sort_col=*created_ts&sort_dir=desc',
waitForSelector: 'body',
waitForSelector: null,
crawlFields: {
id: '@href', //will be transformed later
price: '.immo_preis .label_info',
size: '.flaeche .label_info | removeNewline | trim',
title: 'h3 span',
price: '.lr-card__price-amount | trim',
size: '.lr-card__fact:has(.lr-card__fact-label:contains("Fläche")) .lr-card__fact-value | trim',
title: '.lr-card__title | trim',
description: '.description | trim',
link: '@href',
address: '.place',
image: 'img@src',
address: '.lr-card__address span | trim',
image: 'img.lr-card__gallery-img@src',
},
normalize: normalize,
normalize,
filter: applyBlacklist,
fetchDetails,
activeTester: checkIfListingIsActive,
};
export const init = (sourceConfig, blacklist) => {
Expand Down
56 changes: 26 additions & 30 deletions lib/provider/immoscout.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,7 @@ import {
convertWebToMobile,
} from '../services/immoscout/immoscout-web-translator.js';
import logger from '../services/logger.js';
import { getUserSettings } from '../services/storage/settingsStorage.js';
let appliedBlackList = [];
let currentUserId = null;

async function getListings(url) {
const response = await fetch(url, {
Expand All @@ -68,42 +66,40 @@ async function getListings(url) {
}

const responseBody = await response.json();
return Promise.all(
responseBody.resultListItems
.filter((item) => item.type === 'EXPOSE_RESULT')
.map(async (expose) => {
const item = expose.item;
const [price, size] = item.attributes;
const image = item?.titlePicture?.full ?? item?.titlePicture?.preview ?? null;
let listing = {
id: item.id,
price: price?.value,
size: size?.value,
title: item.title,
link: `${metaInformation.baseUrl}expose/${item.id}`,
address: item.address?.line,
image,
};
if (currentUserId) {
const userSettings = getUserSettings(currentUserId);
if (userSettings.immoscout_details) {
return await pushDetails(listing);
}
}
return listing;
}),
);
return responseBody.resultListItems
.filter((item) => item.type === 'EXPOSE_RESULT')
.map((expose) => {
const item = expose.item;
const [price, size] = item.attributes;
const image = item?.titlePicture?.full ?? item?.titlePicture?.preview ?? null;
return {
id: item.id,
price: price?.value,
size: size?.value,
title: item.title,
link: `${metaInformation.baseUrl}expose/${item.id}`,
address: item.address?.line,
image,
};
});
}

async function fetchDetails(listing) {
return pushDetails(listing);
}

async function pushDetails(listing) {
const detailed = await fetch(`https://api.mobile.immobilienscout24.de/expose/${listing.id}`, {
const exposeId = listing.link?.split('/').pop();
const detailed = await fetch(`https://api.mobile.immobilienscout24.de/expose/${exposeId}`, {
headers: {
'User-Agent': 'ImmoScout_27.3_26.0_._',
'Content-Type': 'application/json',
},
});
if (!detailed.ok) {
logger.error('Error fetching listing details from ImmoScout Mobile API:', detailed.statusText);
logger.warn(
`Error fetching listing details from ImmoScout Mobile API for id: ${exposeId} Status: ${detailed.statusText}`,
);
return listing;
}
const detailBody = await detailed.json();
Expand Down Expand Up @@ -196,13 +192,13 @@ const config = {
normalize: normalize,
filter: applyBlacklist,
getListings: getListings,
fetchDetails: fetchDetails,
activeTester: isListingActive,
};
export const init = (sourceConfig, blacklist) => {
config.enabled = sourceConfig.enabled;
config.url = convertWebToMobile(sourceConfig.url);
appliedBlackList = blacklist || [];
currentUserId = sourceConfig.userId || null;
};
export const metaInformation = {
name: 'Immoscout',
Expand Down
Loading
Loading