Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions lib/api/routes/userSettingsRoute.js
Original file line number Diff line number Diff line change
Expand Up @@ -118,4 +118,25 @@ userSettingsRouter.post('/immoscout-details', async (req, res) => {
}
});

userSettingsRouter.post('/kleinanzeigen-details', async (req, res) => {
const userId = req.session.currentUser;
const { kleinanzeigen_details } = req.body;

const globalSettings = await getSettings();
if (globalSettings.demoMode) {
res.statusCode = 403;
res.send({ error: 'In demo mode, it is not allowed to change settings.' });
return;
}

try {
upsertSettings({ kleinanzeigen_details: !!kleinanzeigen_details }, userId);
res.send({ success: true });
} catch (error) {
logger.error('Error updating kleinanzeigen details setting', error);
res.statusCode = 500;
res.send({ error: error.message });
}
});

export { userSettingsRouter };
178 changes: 175 additions & 3 deletions lib/provider/kleinanzeigen.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,185 @@

import { buildHash, isOneOf } from '../utils.js';
import checkIfListingIsActive from '../services/listings/listingActiveTester.js';
import Extractor from '../services/extractor/extractor.js';
import logger from '../services/logger.js';
import { getUserSettings } from '../services/storage/settingsStorage.js';
import * as cheerio from 'cheerio';

let appliedBlackList = [];
let appliedBlacklistedDistricts = [];
let currentUserId = null;

function toAbsoluteLink(link) {
if (!link) return null;
return link.startsWith('http') ? link : `https://www.kleinanzeigen.de${link}`;
}

function cleanText(value) {
if (value == null) return '';
return String(value)
.replace(/<[^>]*>/g, ' ')
.replace(/\s+/g, ' ')
.trim();
}

function buildAddressFromJsonLd(address) {
if (!address || typeof address !== 'object') return null;

const locality = cleanText(address.addressLocality);
const region = cleanText(address.addressRegion);
const postalCode = cleanText(address.postalCode);
const streetAddress = cleanText(address.streetAddress);

const cityPart = [region, locality].filter(Boolean).join(' - ');
const tail = [postalCode, cityPart || locality || region].filter(Boolean).join(' ');
const fullAddress = [streetAddress, tail].filter(Boolean).join(', ');

return fullAddress || null;
}

function flattenJsonLdNodes(node, acc = []) {
if (node == null) return acc;

if (Array.isArray(node)) {
node.forEach((item) => flattenJsonLdNodes(item, acc));
return acc;
}

if (typeof node !== 'object') return acc;

acc.push(node);

if (Array.isArray(node['@graph'])) {
node['@graph'].forEach((item) => flattenJsonLdNodes(item, acc));
}

if (node.mainEntity) {
flattenJsonLdNodes(node.mainEntity, acc);
}

if (node.itemOffered) {
flattenJsonLdNodes(node.itemOffered, acc);
}

return acc;
}

function extractDetailFromHtml(html) {
const $ = cheerio.load(html);
const nodes = [];

// Prefer the rendered postal address block from the detail page because
// it contains the street line that is missing from list results.
const streetFromDom = cleanText($('#street-address').first().text());
const localityFromDom = cleanText($('#viewad-locality').first().text());
const domAddress = [streetFromDom, localityFromDom].filter(Boolean).join(' ');

$('script[type="application/ld+json"]').each((_, element) => {
const content = $(element).text();
if (!content) return;

try {
const parsed = JSON.parse(content);
flattenJsonLdNodes(parsed, nodes);
} catch {
// Ignore broken JSON-LD blocks from ads/trackers and keep trying others.
}
});

let detailAddress = null;
let detailDescription = null;

if (domAddress) {
detailAddress = domAddress;
}

for (const node of nodes) {
const candidateAddress = buildAddressFromJsonLd(
node.address || node?.itemOffered?.address || node?.offers?.address,
);
if (!detailAddress && candidateAddress) {
detailAddress = candidateAddress;
}

const candidateDescription = cleanText(node.description || node?.itemOffered?.description);
if (!detailDescription && candidateDescription) {
detailDescription = candidateDescription;
}

if (detailAddress && detailDescription) {
break;
}
}

return {
detailAddress,
detailDescription,
};
}

async function enrichListingFromDetails(listing) {
const absoluteLink = toAbsoluteLink(listing.link);
if (!absoluteLink) return listing;

try {
const response = await fetch(absoluteLink, {
headers: {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
},
});

if (!response.ok) {
return {
...listing,
link: absoluteLink,
};
}

const html = await response.text();
const { detailAddress, detailDescription } = extractDetailFromHtml(html);

return {
...listing,
link: absoluteLink,
address: detailAddress || listing.address,
description: detailDescription || listing.description,
};
} catch (error) {
logger.warn(`Could not fetch Kleinanzeigen detail page for listing '${listing.id}'.`, error?.message || error);
return {
...listing,
link: absoluteLink,
};
}
}

async function getListings(url) {
const extractor = new Extractor();
await extractor.execute(url, config.waitForSelector);
const listings = extractor.parseResponseText(config.crawlContainer, config.crawlFields, url) || [];

const shouldFetchDetails = currentUserId ? !!getUserSettings(currentUserId).kleinanzeigen_details : false;

if (!shouldFetchDetails) {
return listings.map((listing) => ({
...listing,
link: toAbsoluteLink(listing.link) || listing.link,
}));
}

const enriched = [];
for (const listing of listings) {
enriched.push(await enrichListingFromDetails(listing));
}
return enriched;
}

function normalize(o) {
const size = o.size || '--- m²';
const id = buildHash(o.id, o.price);
const link = `https://www.kleinanzeigen.de${o.link}`;
return Object.assign(o, { id, size, link });
return Object.assign(o, { id, size });
}

function applyBlacklist(o) {
Expand All @@ -40,12 +210,13 @@ const config = {
address: '.aditem-main--top--left | trim | removeNewline',
image: 'img@src',
},
getListings: getListings,
normalize: normalize,
filter: applyBlacklist,
activeTester: checkIfListingIsActive,
};
export const metaInformation = {
name: 'Ebay Kleinanzeigen',
name: 'Kleinanzeigen',
baseUrl: 'https://www.kleinanzeigen.de/',
id: 'kleinanzeigen',
};
Expand All @@ -54,5 +225,6 @@ export const init = (sourceConfig, blacklist, blacklistedDistricts) => {
config.url = sourceConfig.url;
appliedBlacklistedDistricts = blacklistedDistricts || [];
appliedBlackList = blacklist || [];
currentUserId = sourceConfig.userId || null;
};
export { config };
14 changes: 14 additions & 0 deletions ui/src/services/state/store.js
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,20 @@ export const useFredyState = create(
throw Exception;
}
},
async setKleinanzeigenDetails(enabled) {
try {
await xhrPost('/api/user/settings/kleinanzeigen-details', { kleinanzeigen_details: enabled });
set((state) => ({
userSettings: {
...state.userSettings,
settings: { ...state.userSettings.settings, kleinanzeigen_details: enabled },
},
}));
} catch (Exception) {
console.error('Error while trying to update kleinanzeigen details setting. Error:', Exception);
throw Exception;
}
},
},
};

Expand Down
27 changes: 27 additions & 0 deletions ui/src/views/generalSettings/GeneralSettings.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ const GeneralSettings = function GeneralSettings() {
// User settings state
const homeAddress = useSelector((state) => state.userSettings.settings.home_address);
const immoscoutDetails = useSelector((state) => state.userSettings.settings.immoscout_details);
const kleinanzeigenDetails = useSelector((state) => state.userSettings.settings.kleinanzeigen_details);
const [address, setAddress] = useState(homeAddress?.address || '');
const [coords, setCoords] = useState(homeAddress?.coords || null);
const saving = useIsLoading(actions.userSettings.setHomeAddress);
Expand Down Expand Up @@ -465,6 +466,32 @@ const GeneralSettings = function GeneralSettings() {
</div>
</SegmentPart>

<SegmentPart
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you put this both in the General Settings AND the user Settings? I'd vote for just putting it into the UserSettings

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've just thought about it again. Wouldn't it make sense to make the additional details job based and put it in the job settings?
For some jobs, the exact location might be more important than the others.
Like finding real estate in rural areas vs. cities.

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I do agree and I like this thinking.

We would need to adjust the logic to make it more generic plus we must make it 1 setting only..

Would you enable it by default or disable it as a default setting?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have not encountered any rate limiting on ImmoScout or Kleinanzeigen thus far when running jobs.
So if it were up to me I would leave on by default because I find the additional details to be extremely valuable. ^^

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do you want to go about this. Do you want me to merge your pr, and then build up on top of it?
Would that be ok with you?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you can spare the time sure!

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah. I just want to make sure it's ok with you as this is your source/idea.
Of course I'll attribute your work as such :)

name="Kleinanzeigen Details"
helpText="Fetch the individual listing page for each Kleinanzeigen result to extract a more detailed address and description. Makes an extra request per listing."
>
<Banner
type="warning"
description="Enabling this significantly increases requests to Kleinanzeigen, raising the chance of rate limiting or blocking. Use at your own risk."
closeIcon={null}
style={{ marginBottom: 12 }}
/>
<div style={{ display: 'flex', alignItems: 'center', gap: 10 }}>
<Switch
checked={!!kleinanzeigenDetails}
onChange={async (checked) => {
try {
await actions.userSettings.setKleinanzeigenDetails(checked);
Toast.success('Kleinanzeigen details setting updated.');
} catch {
Toast.error('Failed to update setting.');
}
}}
/>
<Text>Fetch detailed Kleinanzeigen listings</Text>
</div>
</SegmentPart>

<div className="generalSettings__save-row">
<Button
icon={<IconSave />}
Expand Down
28 changes: 28 additions & 0 deletions ui/src/views/userSettings/UserSettings.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ const UserSettings = () => {
const actions = useActions();
const homeAddress = useSelector((state) => state.userSettings.settings.home_address);
const immoscoutDetails = useSelector((state) => state.userSettings.settings.immoscout_details);
const kleinanzeigenDetails = useSelector((state) => state.userSettings.settings.kleinanzeigen_details);
const [address, setAddress] = useState(homeAddress?.address || '');
const [coords, setCoords] = useState(homeAddress?.coords || null);
const saving = useIsLoading(actions.userSettings.setHomeAddress);
Expand Down Expand Up @@ -112,6 +113,33 @@ const UserSettings = () => {
</div>
</SegmentPart>
<Divider />
<SegmentPart
name="Kleinanzeigen Details"
Icon={IconSearch}
helpText="When enabled, Fredy will fetch the individual listing page for each Kleinanzeigen result to extract a more detailed address and description. This makes an extra request per listing."
>
<Banner
type="warning"
description="Enabling this feature significantly increases the number of requests to Kleinanzeigen. This raises the likelihood of being detected and rate-limited or blocked. Use at your own risk."
closeIcon={null}
style={{ marginBottom: '12px', maxWidth: '600px' }}
/>
<div style={{ display: 'flex', alignItems: 'center', gap: '12px' }}>
<Switch
checked={!!kleinanzeigenDetails}
onChange={async (checked) => {
try {
await actions.userSettings.setKleinanzeigenDetails(checked);
Toast.success('Kleinanzeigen details setting updated.');
} catch {
Toast.error('Failed to update setting.');
}
}}
/>
<span>Fetch detailed Kleinanzeigen listings</span>
</div>
</SegmentPart>
<Divider />
<div style={{ marginTop: '20px' }}>
<Button icon={<IconSave />} theme="solid" type="primary" onClick={handleSave} loading={saving}>
Save Settings
Expand Down
Loading