From 0013139079ec06d072b3c81f31ad480bc9a7d4d8 Mon Sep 17 00:00:00 2001 From: CodingPF Date: Sat, 20 Dec 2025 17:09:31 +0100 Subject: [PATCH 01/23] ARD Grouping Structure test --- .../mserver/crawler/ard/ArdCrawler.java | 27 +++++++--- .../ard/json/ArdTeasersDeserializer.java | 30 +++++++---- .../ard/json/ArdTopicGroupsDeserializer.java | 52 +++++++++++++++++++ .../ard/json/ArdTopicsLetterDeserializer.java | 20 ++----- .../json/ArdVideoInfoJsonDeserializer.java | 2 +- .../crawler/ard/tasks/ArdFilmDetailTask.java | 4 +- .../crawler/ard/tasks/ArdTaskBase.java | 1 + .../crawler/ard/tasks/ArdTopicGroupsTask.java | 43 +++++++++++++++ .../crawler/ard/tasks/ArdTopicPageTask.java | 4 +- 9 files changed, 144 insertions(+), 39 deletions(-) create mode 100644 src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicGroupsDeserializer.java create mode 100644 src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicGroupsTask.java diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java index 079e085b1..e95bc01dd 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java @@ -57,12 +57,13 @@ private Queue createDayUrlsToCrawl() { @Override protected RecursiveTask> createCrawlerTask() { - + ConcurrentLinkedQueue test = new ConcurrentLinkedQueue<>(); try { final ForkJoinTask> dayTask = forkJoinPool.submit(new ArdDayPageTask(this, createDayUrlsToCrawl())); final Set shows = dayTask.get(); + shows.clear(); printMessage( ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), shows.size()); @@ -74,8 +75,18 @@ protected RecursiveTask> createCrawlerTask() { senderTopicUrls.addAll(senderTopicTask.get()); } LOG.debug("sender topic tasks: {}", senderTopicUrls.size()); + final ArdTopicGroupsTask groupsToAsset = new ArdTopicGroupsTask(this, new ConcurrentLinkedQueue<>(senderTopicUrls)); + final Set assitUrls = new HashSet<>(); + assitUrls.addAll(forkJoinPool.submit(groupsToAsset).get()); + LOG.debug("sender group assit tasks: {}", assitUrls.size()); + + //test.add(new CrawlerUrlDTO("https://api.ardmediathek.de/page-gateway/widgets/swr/asset/Y3JpZDovL3N3ci5kZS8yNDEwMzY1MA?pageNumber=0&pageSize=48&embedded=true&seasoned=false&seasonNumber=&withAudiodescription=false&withOriginalWithSubtitle=false&withOriginalversion=false&single=false")); + test.add(new CrawlerUrlDTO("https://api.ardmediathek.de/page-gateway/widgets/wdr/asset/Y3JpZDovL3dkci5kZS93ZXN0cG9s?pageNumber=0&pageSize=48&embedded=true&seasoned=false&seasonNumber=&withAudiodescription=false&withOriginalWithSubtitle=false&withOriginalversion=false&single=false")); + final ArdTopicPageTask topicTask = - new ArdTopicPageTask(this, new ConcurrentLinkedQueue<>(senderTopicUrls)); + new ArdTopicPageTask(this, new ConcurrentLinkedQueue<>(assitUrls)); + //new ArdTopicPageTask(this, new ConcurrentLinkedQueue<>(test)); + final int showsCountBefore = shows.size(); shows.addAll(forkJoinPool.submit(topicTask).get()); LOG.debug( @@ -113,14 +124,14 @@ private Set>> createSenderTopicTasks() { } private ForkJoinTask> getTopicEntriesBySender(final String sender) throws ExecutionException, InterruptedException { - Set senderTopics = forkJoinPool.submit( - new ArdTopicsTask(this, sender, createTopicsOverviewUrl(sender))).get(); + Set senderSingleLetterUrls = forkJoinPool.submit( + new ArdTopicsTask(this, sender, CreateLetterUrlQuery(sender))).get(); - LOG.debug("topics task result {}", senderTopics.size()); - return forkJoinPool.submit(new ArdTopicsLetterTask(this, sender, new ConcurrentLinkedQueue<>(senderTopics))); + LOG.debug("topics task result {}", senderSingleLetterUrls.size()); + return forkJoinPool.submit(new ArdTopicsLetterTask(this, sender, new ConcurrentLinkedQueue<>(senderSingleLetterUrls))); } - private Queue createTopicsOverviewUrl(final String client) { + private Queue CreateLetterUrlQuery(final String client) { final Queue urls = new ConcurrentLinkedQueue<>(); final String url = String.format(ArdConstants.TOPICS_URL, client); @@ -128,4 +139,4 @@ private Queue createTopicsOverviewUrl(final String client) { return urls; } -} +} \ No newline at end of file diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTeasersDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTeasersDeserializer.java index 27d4e42b9..1cde6222d 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTeasersDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTeasersDeserializer.java @@ -56,21 +56,31 @@ private Optional toId(final JsonObject teaserObject) { } private ArdFilmInfoDto createFilmInfo(final String id, final int numberOfClips) { - final String url = String.format(ArdConstants.ITEM_URL, id); + String refId = id; + if(id.contains(":")) { + refId = id.replace(":", "%3A"); + } + + final String url = String.format(ArdConstants.ITEM_URL, refId); + + + + if (id.contains("a04c5a47-0801-40e5-b530-b7f9a4312be9:6898178275329995836") + || id.contains("Y3JpZDovL25kci5kZS9wcm9wbGFuXzE5NjM4MTA5N19nYW56ZVNlbmR1bmc") + || id.contains("1TDLUvc8cVEtcSb9GGsOnt:6898178275329995836") + || id.contains("6b64fc2c-4bd7-47ae-af6c-680e65b53b89") + ) { + System.out.println("stop"); + } + return new ArdFilmInfoDto(id, url, numberOfClips); } private boolean isRelevant(final JsonObject teaserObject) { - if (teaserObject.has(ELEMENT_PUBLICATION_SERVICE)) { - final JsonObject publicationService = - teaserObject.get(ELEMENT_PUBLICATION_SERVICE).getAsJsonObject(); - final Optional attributeAsString = - JsonUtils.getAttributeAsString(publicationService, ATTRIBUTE_PARTNER); - if (attributeAsString.isPresent()) { - return ArdConstants.PARTNER_TO_SENDER.get(attributeAsString.get()) != null; - } + Optional partner = JsonUtils.getElementValueAsString(teaserObject, ELEMENT_PUBLICATION_SERVICE, ATTRIBUTE_PARTNER); + if (partner.isPresent()) { + return ArdConstants.PARTNER_TO_SENDER.get(partner.get()) != null; } - return true; } } diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicGroupsDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicGroupsDeserializer.java new file mode 100644 index 000000000..f405e66ed --- /dev/null +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicGroupsDeserializer.java @@ -0,0 +1,52 @@ +package de.mediathekview.mserver.crawler.ard.json; + +import com.google.gson.JsonArray; +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonDeserializer; +import com.google.gson.JsonElement; +import de.mediathekview.mserver.base.utils.JsonUtils; +import de.mediathekview.mserver.crawler.ard.ArdConstants; +import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; +import java.lang.reflect.Type; +import java.util.*; + +public class ArdTopicGroupsDeserializer implements JsonDeserializer> { + private static final String ELEMENT_WIDGETS = "widgets"; + private static final String ELEMENT_LINKS = "links"; + private static final String ELEMENT_TARGET = "self"; + private static final String ELEMENT_HREF = "href"; + private final int maxPageSize = ArdConstants.TOPICS_COMPILATION_PAGE_SIZE; + + + @Override + public Set deserialize( + JsonElement jsonElement, Type type, JsonDeserializationContext jsonDeserializationContext) { + final Set result = new HashSet<>(); + + if (JsonUtils.hasElements(jsonElement, ELEMENT_WIDGETS)) { + final JsonArray widgets = jsonElement.getAsJsonObject().getAsJsonArray(ELEMENT_WIDGETS); + widgets.forEach(widget -> parseWidget(widget.getAsJsonObject()).ifPresent(result::add)); + } + + return result; + } + + private Optional parseWidget(final JsonElement compilation) { + Optional totalElements = JsonUtils.getElementValueAsString(compilation, "pagination", "totalElements"); + if (totalElements.isEmpty() || totalElements.get() == null || totalElements.get().trim().length() == 0 || totalElements.get().trim().equalsIgnoreCase("0")) { + return Optional.empty(); + } + if (JsonUtils.hasElements(compilation, ELEMENT_LINKS)) { + final JsonElement selfLink = + compilation.getAsJsonObject().get(ELEMENT_LINKS).getAsJsonObject().get(ELEMENT_TARGET); + final Optional url = JsonUtils.getElementValueAsString(selfLink, ELEMENT_HREF); + + if (url.isPresent()) { + String x = url.get().replaceAll("pageSize=\\d+", "pageSize="+this.maxPageSize); + return Optional.of(new CrawlerUrlDTO(x)); + } + } + + return Optional.empty(); + } +} diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicsLetterDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicsLetterDeserializer.java index fc55746eb..07b8d0c6d 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicsLetterDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicsLetterDeserializer.java @@ -71,24 +71,10 @@ private int getJsonElementAsIntOrNullIfNotExist(final JsonElement element) { private Set parseTeaser(final JsonObject teaserObject) { final Set results = new HashSet<>(); - final Optional id; - - if (JsonUtils.checkTreePath(teaserObject, null, ELEMENT_LINKS, ELEMENT_TARGET)) { - final JsonObject targetObject = - teaserObject.get(ELEMENT_LINKS).getAsJsonObject().get(ELEMENT_TARGET).getAsJsonObject(); - id = JsonUtils.getAttributeAsString(targetObject, ATTRIBUTE_ID); - } else { - id = JsonUtils.getAttributeAsString(teaserObject, ATTRIBUTE_ID); - } - if (isRelevant(teaserObject)) { - id.ifPresent( - nonNullId -> - results.add( - new CrawlerUrlDTO( - String.format( - ArdConstants.TOPIC_URL, nonNullId, ArdConstants.TOPIC_PAGE_SIZE)))); + final Optional urlToGroup = JsonUtils.getElementValueAsString(teaserObject, ELEMENT_LINKS, ELEMENT_TARGET, "href"); + if (isRelevant(teaserObject) && urlToGroup.isPresent()) { + results.add(new CrawlerUrlDTO(urlToGroup.get())); } - return results; } diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdVideoInfoJsonDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdVideoInfoJsonDeserializer.java index 6b0b0d8ef..b0fbe33bc 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdVideoInfoJsonDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdVideoInfoJsonDeserializer.java @@ -121,7 +121,7 @@ public Map loadM3U8(URL m3u8File) { if (UrlUtils.getProtocol(videoUrl).isEmpty()) { videoUrl = baseUrl + videoUrl; } - urls.put(resolution.get(), URI.create(videoUrl).toURL()); + urls.put(resolution.get(), new URL(videoUrl)); } catch (final MalformedURLException malformedURLException) { LOG.error( "ArdVideoInfoJsonDeserializer: invalid url {}", diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdFilmDetailTask.java b/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdFilmDetailTask.java index 9b6d3f896..5aafd594f 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdFilmDetailTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdFilmDetailTask.java @@ -57,7 +57,7 @@ protected void processRestTarget(final ArdFilmInfoDto aDTO, final WebTarget aTar taskResults.add(result); if (aDTO.getNumberOfClips() > 1) { - processRelatedFilms(filmDto.getRelatedFilms()); + //processRelatedFilms(filmDto.getRelatedFilms()); } } crawler.incrementAndGetActualCount(); @@ -83,7 +83,7 @@ private void processRelatedFilms(final Set relatedFilms) { private Optional getWebsiteUrl(final ArdFilmInfoDto aDTO) { final String url = String.format(ArdConstants.WEBSITE_URL, aDTO.getId()); try { - return Optional.of(URI.create(url).toURL()); + return Optional.of(new URL(url)); } catch (final MalformedURLException e) { LOG.error(e); } diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTaskBase.java b/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTaskBase.java index c9a3d09ba..ccab6a077 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTaskBase.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTaskBase.java @@ -101,6 +101,7 @@ private Response executeRequest(final WebTarget aTarget) { } return request + .header("Accept-Encoding", "br, gzip, deflate, zstd") .header(HEADER_ACCEPT, APPLICATION_JSON) .header(HEADER_CONTENT_TYPE, APPLICATION_JSON) .get(); diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicGroupsTask.java b/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicGroupsTask.java new file mode 100644 index 000000000..741dc6f62 --- /dev/null +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicGroupsTask.java @@ -0,0 +1,43 @@ +package de.mediathekview.mserver.crawler.ard.tasks; + +import com.google.gson.reflect.TypeToken; +import de.mediathekview.mserver.crawler.ard.PaginationUrlDto; +import de.mediathekview.mserver.crawler.ard.json.ArdTopicGroupsDeserializer; +import de.mediathekview.mserver.crawler.basic.AbstractCrawler; +import de.mediathekview.mserver.crawler.basic.AbstractRecursiveConverterTask; +import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; +import jakarta.ws.rs.client.WebTarget; +import java.lang.reflect.Type; +import java.util.HashSet; +import java.util.Queue; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +public class ArdTopicGroupsTask extends ArdTaskBase { + private static final Logger LOG = LogManager.getLogger(ArdTopicGroupsTask.class); + private static final String PAGE_NUMBER = "pageNumber"; + private static final String URL_PAGE_NUMBER_REPLACE_REGEX = PAGE_NUMBER + "=\\d+"; + private static final String PAGE_NUMBER_URL_ENCODED = PAGE_NUMBER + "="; + private static final Type DTO_TYPE_TOKEN = + new TypeToken>() {}.getType(); + + public ArdTopicGroupsTask( + final AbstractCrawler crawler, + final Queue urlToCrawlDtos) { + super(crawler, urlToCrawlDtos); + registerJsonDeserializer(DTO_TYPE_TOKEN, new ArdTopicGroupsDeserializer()); + } + + @Override + protected AbstractRecursiveConverterTask createNewOwnInstance( + final Queue aElementsToProcess) { + return new ArdTopicGroupsTask(crawler, aElementsToProcess); + } + + @Override + protected void processRestTarget(final CrawlerUrlDTO aDTO, final WebTarget aTarget) { + taskResults.addAll(deserialize(aTarget, DTO_TYPE_TOKEN, aDTO)); + } + + +} diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicPageTask.java b/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicPageTask.java index 53472d506..95020d2ca 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicPageTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicPageTask.java @@ -78,7 +78,9 @@ private String changePageNumber(final WebTarget aTarget, final int newPageNumber .getUri() .getRawQuery() .replaceAll( - URL_PAGE_NUMBER_REPLACE_REGEX, PAGE_NUMBER_URL_ENCODED + newPageNumber)) + URL_PAGE_NUMBER_REPLACE_REGEX, PAGE_NUMBER_URL_ENCODED + newPageNumber) + .replaceAll( + "pageNumber=\\d+", "pageNumber=" + newPageNumber)) .build() .toString() : aTarget.queryParam(PAGE_NUMBER, newPageNumber).getUri().toString(); From d829e47a4dfcf44e80af28f0d529fa7dc301b7f5 Mon Sep 17 00:00:00 2001 From: CodingPF Date: Tue, 23 Dec 2025 12:45:50 +0100 Subject: [PATCH 02/23] init 2.0 --- .../base/utils/CheckUrlAvailability.java | 16 +- .../mserver/base/utils/FilmDBService.java | 303 ++++++++++++++++++ .../base/utils/GPDataSourceProvider.java | 51 +++ .../mserver/crawler/CrawlerManager.java | 15 +- .../mserver/crawler/ard/ArdCrawler.java | 10 +- .../crawler/ard/json/ArdFilmDeserializer.java | 6 +- .../mserver/crawler/arte/ArteCrawler.java | 12 +- .../crawler/arte/json/ArteVideoInfoDto.java | 4 +- .../arte/tasks/ArteDtoVideo2FilmTask.java | 1 + .../crawler/basic/AbstractCrawler.java | 18 +- .../mserver/crawler/dw/DWTaskBase.java | 2 +- .../mserver/crawler/dw/DwCrawler.java | 14 +- .../parser/DWSendungOverviewDeserializer.java | 26 +- .../dw/parser/DwFilmDetailDeserializer.java | 1 + .../crawler/dw/tasks/DWOverviewTask.java | 15 +- .../crawler/dw/tasks/DwFilmDetailTask.java | 11 +- .../mserver/crawler/kika/KikaApiCrawler.java | 8 +- .../crawler/kika/tasks/KikaApiFilmTask.java | 1 + .../mserver/crawler/orfon/OrfOnCrawler.java | 4 +- .../crawler/orfon/task/OrfOnEpisodeTask.java | 1 + .../crawler/phoenix/PhoenixCrawler.java | 7 +- .../phoenix/tasks/PhoenixFilmDetailTask.java | 1 + .../mserver/crawler/sr/SrCrawler.java | 8 +- .../crawler/sr/tasks/SrFilmDetailTask.java | 2 +- .../mserver/crawler/srf/SrfCrawler.java | 8 +- .../srf/parser/SrfFilmJsonDeserializer.java | 1 + .../crawler/zdf/AbstractZdfCrawler.java | 11 +- .../mserver/crawler/zdf/ZdfCrawler.java | 13 +- .../mserver/crawler/zdf/ZdfFilmDto.java | 16 +- .../zdf/json/ZdfDayPageDeserializer.java | 17 +- .../crawler/zdf/json/ZdfDayPageDto.java | 8 +- .../zdf/json/ZdfFilmDetailDeserializer.java | 11 +- .../crawler/zdf/json/ZdfTopicBaseClass.java | 14 +- .../crawler/zdf/tasks/ZdfFilmDetailTask.java | 2 +- .../crawler/zdf/tasks/ZdfFilmTask.java | 4 +- .../crawler/zdf/tasks/ZdfTaskBase.java | 3 + .../mserver/daten/AbstractMediaResource.java | 17 +- .../de/mediathekview/mserver/daten/Film.java | 20 ++ .../mserver/ui/config/MServerCommandLine.java | 62 ++++ .../mserver/ui/config/MServerConfigUI.java | 102 +++--- .../ui/config/MServerExecutionFlow.java | 122 +++++++ .../arte/tasks/ArteVideoInfoTaskTest.java | 2 +- .../dw/tasks/DWOverviewDeserializerTest.java | 3 +- .../json/ZdfTopicSeasonDeserializerTest.java | 16 +- 44 files changed, 832 insertions(+), 157 deletions(-) create mode 100644 src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java create mode 100644 src/main/java/de/mediathekview/mserver/base/utils/GPDataSourceProvider.java create mode 100644 src/main/java/de/mediathekview/mserver/ui/config/MServerCommandLine.java create mode 100644 src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java diff --git a/src/main/java/de/mediathekview/mserver/base/utils/CheckUrlAvailability.java b/src/main/java/de/mediathekview/mserver/base/utils/CheckUrlAvailability.java index 42cedf74b..7fd17d335 100644 --- a/src/main/java/de/mediathekview/mserver/base/utils/CheckUrlAvailability.java +++ b/src/main/java/de/mediathekview/mserver/base/utils/CheckUrlAvailability.java @@ -32,8 +32,11 @@ public CheckUrlAvailability(final long minFileSize, final long timeoutInSec, fin fsd = new FileSizeDeterminer(30L, 30L, numberOfThreads); } - public Filmlist getAvaiableFilmlist(final Filmlist importList) { - LOG.debug("start getAvaiableFilmlist(minSize {} byte, timeout {} sec)", this.minFileSize, (this.timeoutInMS/1000)); + public Filmlist getAvailableFilmlist(final Filmlist importList) { + return getAvailableFilmlist(importList, true); + } + public Filmlist getAvailableFilmlist(final Filmlist importList, final boolean available) { + LOG.debug("start getAvailableFilmlist(minSize {} byte, timeout {} sec)", this.minFileSize, (this.timeoutInMS/1000)); start = System.currentTimeMillis(); Filmlist filteredFilmlist = new Filmlist(); filteredFilmlist.setCreationDate(importList.getCreationDate()); @@ -41,10 +44,11 @@ public Filmlist getAvaiableFilmlist(final Filmlist importList) { // ForkJoinPool customThreadPool = new ForkJoinPool(numberOfThreads); customThreadPool.submit(() -> importList.getFilms().values().parallelStream() - .filter(this::isAvailable) + .filter(film -> isAvailable(film) == available) .forEach(filteredFilmlist::add)) .join(); customThreadPool.shutdown(); + customThreadPool.close(); // LOG.debug("checked {} urls and removed {} in {} sec and timeout was reached: {}", importList.getFilms().size(), removedCounter.get(), ((System.currentTimeMillis()-start)/1000), timeout.get()); return filteredFilmlist; @@ -55,8 +59,10 @@ private boolean isAvailable(Film pFilm) { timeout.set(true); return true; } - - String normalUrl = pFilm.getUrl(Resolution.NORMAL).getUrl().toString(); + if(pFilm.getDefaultUrl().isEmpty()) { + System.out.println("asdf"); + } + String normalUrl = pFilm.getDefaultUrl().get().getUrl().toString(); ResponseInfo ri = fsd.getRequestInfo(normalUrl); if (pFilm.getThema().equalsIgnoreCase("Livestream")) { diff --git a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java new file mode 100644 index 000000000..682aabf52 --- /dev/null +++ b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java @@ -0,0 +1,303 @@ +package de.mediathekview.mserver.base.utils; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; + +import de.mediathekview.mserver.daten.Film; +import de.mediathekview.mserver.daten.Filmlist; +import de.mediathekview.mserver.daten.GsonDurationAdapter; +import de.mediathekview.mserver.daten.GsonLocalDateTimeAdapter; + +import javax.sql.DataSource; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.IOException; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.time.Duration; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; +import java.util.stream.Collectors; + +/** + * Service zum Speichern einzelner Filme aus einer Filmlist in die DB. Nutzt + * Batch-UPSERT und einen vorhandenen ExecutorService für Parallelität. + */ +public class FilmDBService { + private static final Logger LOG = LogManager.getLogger(FilmDBService.class); + private final DataSource dataSource; + private final Gson gson; + private final ExecutorService executorService; + private final int batchSize; + + public FilmDBService(DataSource dataSource, ExecutorService executorService, int batchSize) { + this.dataSource = dataSource; + this.executorService = executorService; + this.batchSize = batchSize; + + this.gson = new GsonBuilder().registerTypeAdapter(LocalDateTime.class, new GsonLocalDateTimeAdapter()) + .registerTypeAdapter(Duration.class, new GsonDurationAdapter()).create(); + } + + + public void update(String sql) { + try (Connection con = dataSource.getConnection(); PreparedStatement ps = con.prepareStatement(sql)) { + LOG.debug("updated {} rows", ps.executeUpdate()); + } catch (Exception e) { + LOG.error(e); + } + } + + ///////////////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////////// + + public void deleteFilms(Collection abandonedFilmlist) { + try { + List>> futures = new ArrayList<>(); + List allVideos = abandonedFilmlist.stream() + .sorted(Comparator.comparing(Film::getId)) + .toList(); + for (int i = 0; i < allVideos.size(); i += batchSize) { + int from = i; + int to = Math.min(i + batchSize, allVideos.size()); + List batch = allVideos.subList(from, to); + futures.add(executorService.submit(() -> { + List newVideos = new ArrayList<>(); + String sql = "DELETE FROM filme WHERE id = ?"; + try (Connection con = dataSource.getConnection(); PreparedStatement ps = con.prepareStatement(sql)) { + for (Film video : batch) { + ps.setString(1, video.getId()); + ps.addBatch(); + } + ps.executeBatch(); + } catch (SQLException e) { + LOG.error(e); + } + return newVideos; + })); + } + List result = new ArrayList<>(); + for (Future> f : futures) { + result.addAll(f.get()); + } + LOG.debug("deleted {}", abandonedFilmlist.size()); + + } catch (Exception e) { + LOG.error(e); + } + } + + ///////////////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////////// + + public Optional readFilmlistFromDB() { + return readFilmlistFromDB(""); + } + + public Optional readFilmlistFromDB(String where) { + LOG.debug("fetch data from DB"); + int readCounter = 0; + Filmlist list = new Filmlist(); + try (Connection con = dataSource.getConnection(); + PreparedStatement ps = con.prepareStatement("SELECT data FROM filme " + where + " ORDER BY data ->> 'sender', data ->> 'thema', data ->> 'titel'"); + ) { + ps.setFetchSize(50000); + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) { + String json = rs.getString("data"); + list.add(gson.fromJson(json, Film.class)); + readCounter++; + } + } + LOG.debug("Filmlist read {} records and imported {} records", readCounter, list.getFilms().size()); + return Optional.of(list); + } catch (Exception e) { + LOG.error(e); + } + return Optional.empty(); + } + + ///////////////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////////// + + public List filterNewVideos(List videos, Function idExtractor) { + try { + List>> futures = new ArrayList<>(); + + List allVideos = videos.stream() + .sorted(Comparator.comparing(idExtractor)) + .toList(); + + for (int i = 0; i < allVideos.size(); i += batchSize) { + int from = i; + int to = Math.min(i + batchSize, allVideos.size()); + List batch = allVideos.subList(from, to); + + futures.add(executorService.submit(() -> { + List newVideos = new ArrayList<>(); + + String sql = "UPDATE filme SET last_update = now() WHERE id = ?"; + + try (Connection con = dataSource.getConnection(); PreparedStatement ps = con.prepareStatement(sql)) { + + for (T video : batch) { + String id = idExtractor.apply(video); + if (id != null) { + ps.setString(1, id); + ps.addBatch(); + } else { + LOG.error("filterNewVideos - Missing ID for Film {}", video); + } + } + int[] rs = ps.executeBatch(); + for (int rsIndex = 0; rsIndex < rs.length; rsIndex++) { + if (rs[rsIndex] == 0) { + newVideos.add(batch.get(rsIndex)); + } + } + + } catch (SQLException e) { + LOG.error(e); + } + return newVideos; + })); + } + List result = new ArrayList<>(); + for (Future> f : futures) { + result.addAll(f.get()); + } + LOG.debug("Filtered {} (in {} out {})",(videos.size()-result.size()), videos.size(), result.size()); + return result; + } catch (Exception e) { + return videos; + } + } + + + + //////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////// + + public HashSet getAllVideoUrls() { + HashSet allVideoUrls = new HashSet(); + String sql = """ + SELECT + data -> 'urls' -> 'SMALL' ->> 'url' aSmall, + data -> 'urls' -> 'NORMAL' ->> 'url' aNormal, + data -> 'urls' -> 'HD' ->> 'url' aHD + FROM filme + """; + try (Connection con = dataSource.getConnection(); PreparedStatement ps = con.prepareStatement(sql)) { + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) { + allVideoUrls.add(rs.getString(1)); + allVideoUrls.add(rs.getString(2)); + allVideoUrls.add(rs.getString(3)); + } + } + } catch (SQLException e) { + LOG.error("getAllVideoUrls failed", e); + } + return allVideoUrls; + } + + //////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////// + + /** + * Speichert alle Filme einer Filmlist parallel in der DB. + */ + public void saveAll(Filmlist filmlist) throws Exception { + // Map in List konvertieren + List films = new ArrayList<>(filmlist.getFilms().values()); + films = makeUniqueIds(films); + AtomicInteger successCounter = new AtomicInteger(0); + List> futures = new ArrayList<>(); + + for (int i = 0; i < films.size(); i += batchSize) { + int from = i; + int to = Math.min(i + batchSize, films.size()); + List batch = films.subList(from, to); + + futures.add(executorService.submit(() -> { + try { + successCounter.addAndGet(saveBatch(batch)); + } catch (SQLException | IOException e) { + LOG.error(e); + } + })); + } + + for (Future f : futures) { + f.get(); + } + + LOG.info("Stored {} films in DB", successCounter.get()); + } + + /** + * Speichert einen Batch von Filmen als Upsert in der DB. + */ + private int saveBatch(List films) throws SQLException, IOException { + int successCounter = 0; + + String sql = """ + INSERT INTO filme (id, data, created_at, last_update) + VALUES (?, ?::jsonb, now(), now()) + ON CONFLICT (id) DO UPDATE + SET data = EXCLUDED.data, + last_update = now(), + created_at = filme.created_at + """; + + try (Connection con = dataSource.getConnection(); PreparedStatement ps = con.prepareStatement(sql)) { + + for (Film film : films) { + if(film.getId() != null) { + ps.setString(1, film.getId()); + ps.setString(2, gson.toJson(film)); + ps.addBatch(); + successCounter++; + } else { + LOG.error("saveBatch - Missing ID for film {}", film); + } + } + + ps.executeBatch(); + } + return successCounter; + } + + private static List makeUniqueIds(List films) { + Map idCount = new HashMap<>(); + + return films.stream().map(film -> { + String originalId = film.getId(); + AtomicInteger count = idCount.computeIfAbsent(originalId, k -> new AtomicInteger(0)); + + int c = count.getAndIncrement(); + if (c == 0) { + return film; // erste ID bleibt unverändert + } else { + // Duplikat → neue ID mit Suffix #1, #2 ... + film.setId(originalId + "#" + c); + return film; + } + }).collect(Collectors.toList()); + } +} diff --git a/src/main/java/de/mediathekview/mserver/base/utils/GPDataSourceProvider.java b/src/main/java/de/mediathekview/mserver/base/utils/GPDataSourceProvider.java new file mode 100644 index 000000000..3e2883194 --- /dev/null +++ b/src/main/java/de/mediathekview/mserver/base/utils/GPDataSourceProvider.java @@ -0,0 +1,51 @@ +package de.mediathekview.mserver.base.utils; + +import com.zaxxer.hikari.HikariConfig; +import com.zaxxer.hikari.HikariDataSource; + +import javax.sql.DataSource; + +public final class GPDataSourceProvider { + + private static final HikariDataSource DATA_SOURCE; + + static { + HikariConfig cfg = new HikariConfig(); + + // === JDBC === + cfg.setJdbcUrl("jdbc:postgresql://OscarDS:55432/crawler"); + cfg.setUsername("crawler"); + cfg.setPassword("secret"); + + // === Pool Sizing (wichtig!) === + cfg.setMaximumPoolSize(16); // Sweet Spot für 10k+/min + cfg.setMinimumIdle(4); + + // === Performance === + cfg.setAutoCommit(true); + cfg.setConnectionTimeout(3000); + cfg.setIdleTimeout(600_000); + cfg.setMaxLifetime(1_800_000); + + // === PostgreSQL Optimierungen === + cfg.addDataSourceProperty("reWriteBatchedInserts", "true"); + cfg.addDataSourceProperty("stringtype", "unspecified"); + + // === Debug (optional) === + cfg.setPoolName("CrawlerPool"); + + DATA_SOURCE = new HikariDataSource(cfg); + } + + private GPDataSourceProvider() { + // no instances + } + + public static DataSource get() { + return DATA_SOURCE; + } + + public static void shutdown() { + DATA_SOURCE.close(); + } +} diff --git a/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java b/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java index 3f3d5f87f..1f97bc961 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java +++ b/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java @@ -15,6 +15,8 @@ import de.mediathekview.mserver.base.uploader.copy.FileCopyTarget; import de.mediathekview.mserver.base.uploader.copy.FileCopyTask; import de.mediathekview.mserver.base.utils.CheckUrlAvailability; +import de.mediathekview.mserver.base.utils.FilmDBService; +import de.mediathekview.mserver.base.utils.GPDataSourceProvider; import de.mediathekview.mserver.crawler.ard.ArdCrawler; import de.mediathekview.mserver.crawler.arte.ArteCrawler; import de.mediathekview.mserver.crawler.arte.ArteCrawler_EN; @@ -44,6 +46,7 @@ import java.util.*; import java.util.Map.Entry; import java.util.concurrent.*; +import javax.sql.DataSource; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.NotNull; @@ -92,6 +95,16 @@ public CrawlerManager(MServerConfigManager aMServerConfigManager) { public MServerConfigManager getConfigManager() { return rootConfig; } + + public void storeFilmsToDB() { + DataSource ds = GPDataSourceProvider.get(); + FilmDBService filmDBService = new FilmDBService(ds, executorService, 200); + try { + filmDBService.saveAll(filmlist); + } catch (Exception e) { + LOG.error(e); + } + } public void copyFilmlist() { final MServerCopySettings copySettings = config.getCopySettings(); @@ -200,7 +213,7 @@ public void importFilmlist(final ImportFilmlistConfiguration importFilmlistConfi config.getCheckImportListUrlMinSize(), config.getCheckImportListUrlTimeoutInSec(), config.getMaximumCpuThreads()) - .getAvaiableFilmlist(importedFilmlist.get()) + .getAvailableFilmlist(importedFilmlist.get()) ); } // diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java index 079e085b1..54c7cc922 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java @@ -81,11 +81,13 @@ protected RecursiveTask> createCrawlerTask() { LOG.debug( "ARD crawler found {} topics for all sub-sender.", shows.size() - showsCountBefore); } - + // + final Queue showsFiltered = this.filterExistingFilms(shows, ArdFilmInfoDto::getId); + // printMessage( - ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), shows.size()); - getAndSetMaxCount(shows.size()); - return new ArdFilmDetailTask(this, new ConcurrentLinkedQueue<>(shows)); + ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), showsFiltered.size()); + getAndSetMaxCount(showsFiltered.size()); + return new ArdFilmDetailTask(this, new ConcurrentLinkedQueue<>(showsFiltered)); } catch (final InterruptedException ex) { LOG.fatal("Exception in ARD crawler.", ex); Thread.currentThread().interrupt(); diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java index eb182a208..1a3d214fd 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java @@ -168,6 +168,7 @@ public List deserialize( final JsonObject itemObject = widgets.get(0).getAsJsonObject(); final Optional topic = parseTopic(itemObject); + Optional id = JsonUtils.getAttributeAsString(itemObject, "id"); Optional titleOriginal = JsonUtils.getAttributeAsString(itemObject, ATTRIBUTE_TITLE); final Optional title = parseTitle(itemObject); final Optional description = JsonUtils.getAttributeAsString(itemObject, ATTRIBUTE_SYNOPSIS); @@ -197,6 +198,7 @@ public List deserialize( final ArdFilmDto filmDto = new ArdFilmDto( createFilm( + id.get(), sender, topic.get(), title.get(), @@ -217,6 +219,7 @@ public List deserialize( final ArdFilmDto filmDtoOV = new ArdFilmDto( createFilm( + id.get(), sender, topic.get(), title.get() + " (Originalversion)", @@ -301,6 +304,7 @@ private void parseRelatedFilms(final ArdFilmDto filmDto, final JsonObject player } private Film createFilm( + final String id, final Sender sender, final String topic, final String title, @@ -319,7 +323,7 @@ private Film createFilm( duration == null ? Duration.ofSeconds(0) : duration); Optional.ofNullable(description).ifPresent(film::setBeschreibung); - + film.setId(id); film.setGeoLocations(GeoLocationGuesser.getGeoLocations(Sender.ARD, videoInfo.getDefaultVideoUrl())); if (!videoInfo.getSubtitleUrl().isEmpty()) { diff --git a/src/main/java/de/mediathekview/mserver/crawler/arte/ArteCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/arte/ArteCrawler.java index 6aa194e11..7d060001d 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/arte/ArteCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/arte/ArteCrawler.java @@ -62,16 +62,18 @@ protected RecursiveTask> createCrawlerTask() { final ArteVideoInfoTask aArteRestVideoInfoTask; // DO NOT overload - maximumUrlsPerTask used to reduce threads to 4 aArteRestVideoInfoTask = new ArteVideoInfoTask(this, videoUrls, getMaxPagesForOverview(getLanguage().toString().toLowerCase())); - final Queue videos = new ConcurrentLinkedQueue<>(); - videos.addAll(aArteRestVideoInfoTask.fork().join()); + final Queue videosRaw = new ConcurrentLinkedQueue<>(); + videosRaw.addAll(aArteRestVideoInfoTask.fork().join()); + // + final Queue videosFiltered = this.filterExistingFilms(videosRaw, ArteVideoInfoDto::getId); // printMessage( - ServerMessages.DEBUG_ALL_SENDUNG_COUNT, getSender().getName(), videos.size()); - getAndSetMaxCount(videos.size()); + ServerMessages.DEBUG_ALL_SENDUNG_COUNT, getSender().getName(), videosFiltered.size()); + getAndSetMaxCount(videosFiltered.size()); updateProgress(); // final Queue videosWithLink = new ConcurrentLinkedQueue<>(); - final ArteVideoLinkTask aArteRestVideosTask = new ArteVideoLinkTask(this, videos); + final ArteVideoLinkTask aArteRestVideosTask = new ArteVideoLinkTask(this, videosFiltered); videosWithLink.addAll(aArteRestVideosTask.fork().join()); // printMessage( diff --git a/src/main/java/de/mediathekview/mserver/crawler/arte/json/ArteVideoInfoDto.java b/src/main/java/de/mediathekview/mserver/crawler/arte/json/ArteVideoInfoDto.java index ca8ca4940..b58ab6e18 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/arte/json/ArteVideoInfoDto.java +++ b/src/main/java/de/mediathekview/mserver/crawler/arte/json/ArteVideoInfoDto.java @@ -85,8 +85,8 @@ public ArteVideoInfoDto(Optional firstBroadcastDate, Optional id public Optional getFirstBroadcastDate() { return firstBroadcastDate; } - public Optional getId() { - return id; + public String getId() { + return id.orElse(""); } public Optional getProgramId() { return programId; diff --git a/src/main/java/de/mediathekview/mserver/crawler/arte/tasks/ArteDtoVideo2FilmTask.java b/src/main/java/de/mediathekview/mserver/crawler/arte/tasks/ArteDtoVideo2FilmTask.java index b8a14ca7e..a46272013 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/arte/tasks/ArteDtoVideo2FilmTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/arte/tasks/ArteDtoVideo2FilmTask.java @@ -105,6 +105,7 @@ protected Film buildFilmBody(ArteVideoInfoDto aElement) { buildAired(aElement), buildDuration(aElement) ); + film.setId(aElement.getId()); film.addGeolocation(buildGeoLocation(aElement)); film.setBeschreibung(buildDescription(aElement)); film.setWebsite(buildWebsite(aElement)); diff --git a/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java index 21ee96aa2..f7365d9bc 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java @@ -5,6 +5,8 @@ import de.mediathekview.mserver.base.messages.Message; import de.mediathekview.mserver.base.messages.listener.MessageListener; import de.mediathekview.mserver.base.progress.Progress; +import de.mediathekview.mserver.base.utils.FilmDBService; +import de.mediathekview.mserver.base.utils.GPDataSourceProvider; import de.mediathekview.mserver.base.config.MServerBasicConfigDTO; import de.mediathekview.mserver.base.config.MServerConfigDTO; import de.mediathekview.mserver.base.config.MServerConfigManager; @@ -20,13 +22,17 @@ import java.io.IOException; import java.time.Duration; import java.time.LocalDateTime; +import java.util.ArrayDeque; +import java.util.ArrayList; import java.util.Collection; +import java.util.Queue; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ForkJoinPool; import java.util.concurrent.RecursiveTask; import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Function; /** A basic crawler task. */ public abstract class AbstractCrawler implements Callable> { @@ -44,6 +50,7 @@ public abstract class AbstractCrawler implements Callable> { private LocalDateTime startTime; protected JsoupConnection jsoupConnection; protected RateLimiter rateLimiter; + protected FilmDBService filmDBService; protected AbstractCrawler( final ForkJoinPool aForkJoinPool, @@ -65,10 +72,19 @@ protected AbstractCrawler( rootConfig.getSenderConfig(getSender()).getSocketTimeoutInSeconds(), runtimeConfig.getMaximumCpuThreads()); rateLimiter = RateLimiter.create(rootConfig.getSenderConfig(getSender()).getMaximumRequestsPerSecond()); - + filmDBService = new FilmDBService(GPDataSourceProvider.get(), forkJoinPool, 200); films = ConcurrentHashMap.newKeySet(); } + public Queue filterExistingFilms(Collection input, Function idExtractor) { + return new ArrayDeque<>( + filmDBService.filterNewVideos( + new ArrayList<>(input), + idExtractor + ) + ); + } + @Override public Set call() { final TimeoutTask timeoutRunner = diff --git a/src/main/java/de/mediathekview/mserver/crawler/dw/DWTaskBase.java b/src/main/java/de/mediathekview/mserver/crawler/dw/DWTaskBase.java index 7cc07c7c7..1c04f95aa 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/dw/DWTaskBase.java +++ b/src/main/java/de/mediathekview/mserver/crawler/dw/DWTaskBase.java @@ -26,7 +26,7 @@ public abstract class DWTaskBase extends AbstractRes protected DWTaskBase( final AbstractCrawler aCrawler, final Queue aUrlToCrawlDtos, final String authKey) { super(aCrawler, aUrlToCrawlDtos, authKey); - gsonBuilder = new GsonBuilder();aCrawler.getSender(); + gsonBuilder = new GsonBuilder(); } protected void registerJsonDeserializer(final Type aType, final Object aDeserializer) { diff --git a/src/main/java/de/mediathekview/mserver/crawler/dw/DwCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/dw/DwCrawler.java index 707c91653..a42900809 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/dw/DwCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/dw/DwCrawler.java @@ -7,6 +7,7 @@ import de.mediathekview.mserver.base.messages.ServerMessages; import de.mediathekview.mserver.crawler.basic.AbstractCrawler; import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; +import de.mediathekview.mserver.crawler.basic.TopicUrlDTO; import de.mediathekview.mserver.crawler.dw.tasks.DWOverviewTask; import de.mediathekview.mserver.crawler.dw.tasks.DwFilmDetailTask; import de.mediathekview.mserver.progress.listeners.SenderProgressListener; @@ -41,15 +42,14 @@ public Sender getSender() { @Override protected RecursiveTask> createCrawlerTask() { - Queue shows =new ConcurrentLinkedQueue<>(); + Queue shows = new ConcurrentLinkedQueue<>(); try { shows.addAll(getShows()); + Queue showsFiltered = this.filterExistingFilms(shows, TopicUrlDTO::getTopic); printMessage( - ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), shows.size()); - getAndSetMaxCount(shows.size()); - - return new DwFilmDetailTask(this,shows); - + ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), showsFiltered.size()); + getAndSetMaxCount(showsFiltered.size()); + return new DwFilmDetailTask(this,showsFiltered); } catch (final InterruptedException ex) { LOG.debug("{} crawler interrupted.", getSender().getName(), ex); Thread.currentThread().interrupt(); @@ -59,7 +59,7 @@ protected RecursiveTask> createCrawlerTask() { return null; } - private Collection getShows() throws ExecutionException, InterruptedException { + private Collection getShows() throws ExecutionException, InterruptedException { final CrawlerUrlDTO url = new CrawlerUrlDTO(DwConstants.URL_BASE + DwConstants.URL_OVERVIEW); final Queue startUrl = new ConcurrentLinkedQueue<>(); diff --git a/src/main/java/de/mediathekview/mserver/crawler/dw/parser/DWSendungOverviewDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/dw/parser/DWSendungOverviewDeserializer.java index a8b93688b..b0b5f4e60 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/dw/parser/DWSendungOverviewDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/dw/parser/DWSendungOverviewDeserializer.java @@ -4,6 +4,7 @@ import de.mediathekview.mserver.base.utils.JsonUtils; import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; import de.mediathekview.mserver.crawler.basic.PagedElementListDTO; +import de.mediathekview.mserver.crawler.basic.TopicUrlDTO; import java.lang.reflect.Type; import java.util.HashSet; @@ -11,12 +12,13 @@ import java.util.Set; public class DWSendungOverviewDeserializer - implements JsonDeserializer>> { + implements JsonDeserializer>> { private static final String ELEMENT_ITEMS = "items"; private static final String ELEMENT_REFERENCE = "reference"; private static final String ELEMENT_REFERENCE_URL = "url"; private static final String ELEMENT_REFERENCE_TYPE = "type"; + private static final String ELEMENT_REFERENCE_ID = "id"; private static final String ELEMENT_PAGINATION = "paginationInfo"; private static final String ELEMENT_PAGINATION_NEXT = "nextPageUrl"; @@ -30,20 +32,18 @@ private static Optional parseNextUrl(final JsonObject contentObject) { return JsonUtils.getAttributeAsString(paginationElement.getAsJsonObject(), ELEMENT_PAGINATION_NEXT); } - private static Set parseItems(final JsonObject aContentObject) { - final Set items = new HashSet<>(); + private static Set parseItems(final JsonObject aContentObject) { + final Set items = new HashSet<>(); if (aContentObject.has(ELEMENT_ITEMS)) { final JsonArray itemArray = aContentObject.get(ELEMENT_ITEMS).getAsJsonArray(); for (final JsonElement itemElement : itemArray) { - final Optional reference = - Optional.of(itemElement.getAsJsonObject().get(ELEMENT_REFERENCE).getAsJsonObject()); - final Optional url = - JsonUtils.getAttributeAsString(reference.get(), ELEMENT_REFERENCE_URL); - final Optional type = - JsonUtils.getAttributeAsString(reference.get(), ELEMENT_REFERENCE_TYPE); + final Optional reference = Optional.of(itemElement.getAsJsonObject().get(ELEMENT_REFERENCE).getAsJsonObject()); + final Optional url = JsonUtils.getAttributeAsString(reference.get(), ELEMENT_REFERENCE_URL); + final Optional type = JsonUtils.getAttributeAsString(reference.get(), ELEMENT_REFERENCE_TYPE); if (url.isPresent() && !url.get().isEmpty() && type.orElse("empty").equalsIgnoreCase("VideoRef")) { - items.add(new CrawlerUrlDTO(url.get())); + final Optional id = JsonUtils.getAttributeAsString(reference.get(), ELEMENT_REFERENCE_ID); + items.add(new TopicUrlDTO(id.get(), url.get())); } } } @@ -52,7 +52,7 @@ private static Set parseItems(final JsonObject aContentObject) { } @Override - public Optional> deserialize( + public Optional> deserialize( final JsonElement aJsonElement, final Type aType, final JsonDeserializationContext aContext) { final JsonObject jsonObject = aJsonElement.getAsJsonObject(); @@ -60,10 +60,10 @@ public Optional> deserialize( return Optional.empty(); } - final Set itemIds = parseItems(jsonObject); + final Set itemIds = parseItems(jsonObject); final Optional nextUrl = parseNextUrl(jsonObject); - final PagedElementListDTO dto = new PagedElementListDTO<>(); + final PagedElementListDTO dto = new PagedElementListDTO<>(); dto.setNextPage(nextUrl); dto.addElements(itemIds); return Optional.of(dto); diff --git a/src/main/java/de/mediathekview/mserver/crawler/dw/parser/DwFilmDetailDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/dw/parser/DwFilmDetailDeserializer.java index 82524e15c..cda5be96c 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/dw/parser/DwFilmDetailDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/dw/parser/DwFilmDetailDeserializer.java @@ -119,6 +119,7 @@ public Optional deserialize( topic.get(), getAiredDate(thisPageUrl.get(), jsonObject), getDuration(thisPageUrl.get(), jsonObjectMainContent)); + film.setId(videoId.get()); // final Optional description = JsonUtils.getAttributeAsString(jsonObject, ELEMENT_TEASER); description.ifPresent(film::setBeschreibung); diff --git a/src/main/java/de/mediathekview/mserver/crawler/dw/tasks/DWOverviewTask.java b/src/main/java/de/mediathekview/mserver/crawler/dw/tasks/DWOverviewTask.java index 62c1c80fa..83e9e5c22 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/dw/tasks/DWOverviewTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/dw/tasks/DWOverviewTask.java @@ -5,6 +5,7 @@ import de.mediathekview.mserver.crawler.basic.AbstractRecursiveConverterTask; import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; import de.mediathekview.mserver.crawler.basic.PagedElementListDTO; +import de.mediathekview.mserver.crawler.basic.TopicUrlDTO; import de.mediathekview.mserver.crawler.dw.DWTaskBase; import de.mediathekview.mserver.crawler.dw.parser.DWSendungOverviewDeserializer; import jakarta.ws.rs.client.WebTarget; @@ -14,7 +15,7 @@ import java.util.Queue; import java.util.concurrent.ConcurrentLinkedQueue; -public class DWOverviewTask extends DWTaskBase { +public class DWOverviewTask extends DWTaskBase { private static final Type OPTIONAL_OVERVIEW_DTO_TYPE_TOKEN = new TypeToken>>() {}.getType(); @@ -33,7 +34,7 @@ public DWOverviewTask( @Override protected void processRestTarget(final CrawlerUrlDTO aDTO, final WebTarget aTarget) { - final Optional> overviewDtoOptional = + final Optional> overviewDtoOptional = deserializeOptional(aTarget, OPTIONAL_OVERVIEW_DTO_TYPE_TOKEN); if (overviewDtoOptional.isEmpty()) { crawler.incrementAndGetErrorCount(); @@ -41,7 +42,7 @@ protected void processRestTarget(final CrawlerUrlDTO aDTO, final WebTarget aTarg return; } - final PagedElementListDTO overviewDto = overviewDtoOptional.get(); + final PagedElementListDTO overviewDto = overviewDtoOptional.get(); addResults(overviewDto.getElements()); final Optional optionalNextPage = overviewDto.getNextPage(); @@ -55,14 +56,14 @@ protected void processRestTarget(final CrawlerUrlDTO aDTO, final WebTarget aTarg } } - private void addResults(final Collection aUrls) { - for (final CrawlerUrlDTO url : aUrls) { - taskResults.add(new CrawlerUrlDTO(url.getUrl())); + private void addResults(final Collection aUrls) { + for (final TopicUrlDTO url : aUrls) { + taskResults.add(new TopicUrlDTO(url.getTopic(),url.getUrl())); } } @Override - protected AbstractRecursiveConverterTask createNewOwnInstance( + protected AbstractRecursiveConverterTask createNewOwnInstance( final Queue aElementsToProcess) { return new DWOverviewTask(crawler, aElementsToProcess, subpage + 1); } diff --git a/src/main/java/de/mediathekview/mserver/crawler/dw/tasks/DwFilmDetailTask.java b/src/main/java/de/mediathekview/mserver/crawler/dw/tasks/DwFilmDetailTask.java index 5389ca9f1..a8058796d 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/dw/tasks/DwFilmDetailTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/dw/tasks/DwFilmDetailTask.java @@ -5,6 +5,7 @@ import de.mediathekview.mserver.crawler.basic.AbstractCrawler; import de.mediathekview.mserver.crawler.basic.AbstractRecursiveConverterTask; import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; +import de.mediathekview.mserver.crawler.basic.TopicUrlDTO; import de.mediathekview.mserver.crawler.dw.DWTaskBase; import de.mediathekview.mserver.crawler.dw.parser.DwFilmDetailDeserializer; import jakarta.ws.rs.client.WebTarget; @@ -17,14 +18,14 @@ import org.apache.logging.log4j.Logger; @SuppressWarnings("serial") -public class DwFilmDetailTask extends DWTaskBase { +public class DwFilmDetailTask extends DWTaskBase { private static final Logger LOG = LogManager.getLogger(DwFilmDetailTask.class); private static final Type OPTIONAL_FILM_DETAIL_DTO_TYPE_TOKEN = new TypeToken>() {}.getType(); public DwFilmDetailTask( final AbstractCrawler aCrawler, - final Queue aUrlToCrawlDTOs) { + final Queue aUrlToCrawlDTOs) { super(aCrawler, aUrlToCrawlDTOs, null); registerJsonDeserializer( @@ -32,13 +33,13 @@ public DwFilmDetailTask( } @Override - protected AbstractRecursiveConverterTask createNewOwnInstance( - final Queue aElementsToProcess) { + protected AbstractRecursiveConverterTask createNewOwnInstance( + final Queue aElementsToProcess) { return new DwFilmDetailTask(crawler, aElementsToProcess); } @Override - protected void processRestTarget(final CrawlerUrlDTO aDTO, final WebTarget aTarget) { + protected void processRestTarget(final TopicUrlDTO aDTO, final WebTarget aTarget) { Optional filmDetailDtoOptional = Optional.empty(); try { filmDetailDtoOptional = deserializeOptional(aTarget, OPTIONAL_FILM_DETAIL_DTO_TYPE_TOKEN); diff --git a/src/main/java/de/mediathekview/mserver/crawler/kika/KikaApiCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/kika/KikaApiCrawler.java index c9280c17d..b524ee56c 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/kika/KikaApiCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/kika/KikaApiCrawler.java @@ -47,11 +47,13 @@ protected RecursiveTask> createCrawlerTask() { final Queue videos = new ConcurrentLinkedQueue<>(); videos.addAll(aKikaApiTopicOverviewTask.fork().join()); // - printMessage(ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), videos.size()); - getAndSetMaxCount(videos.size()); + final Queue videosFiltered = this.filterExistingFilms(videos, v -> v.getId().get() ); + // + printMessage(ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), videosFiltered.size()); + getAndSetMaxCount(videosFiltered.size()); // // get all video urls for this episode - return new KikaApiFilmTask(this, videos); + return new KikaApiFilmTask(this, videosFiltered); } catch (final Exception ex) { LOG.fatal("Exception in KIKA crawler.", ex); } diff --git a/src/main/java/de/mediathekview/mserver/crawler/kika/tasks/KikaApiFilmTask.java b/src/main/java/de/mediathekview/mserver/crawler/kika/tasks/KikaApiFilmTask.java index a3da20273..7460fd79d 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/kika/tasks/KikaApiFilmTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/kika/tasks/KikaApiFilmTask.java @@ -112,6 +112,7 @@ protected void postProcessing(KikaApiVideoInfoDto aResponseObj, KikaApiFilmDto a if (aDTO.getDescription().isPresent()) { aFilm.setBeschreibung(aDTO.getDescription().get()); } + aFilm.setId(aDTO.getId().get()); getGeo(aDTO).ifPresent(aFilm::setGeoLocations); getWebsite(aDTO).ifPresent(aFilm::setWebsite); aFilm.setUrls(getVideoUrls(aResponseObj, aDTO)); diff --git a/src/main/java/de/mediathekview/mserver/crawler/orfon/OrfOnCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/orfon/OrfOnCrawler.java index 54f5a62a7..ec5e6968c 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/orfon/OrfOnCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/orfon/OrfOnCrawler.java @@ -75,7 +75,9 @@ protected RecursiveTask> createCrawlerTask() { getAndSetMaxCount(allVideos.size()); } // - return new OrfOnEpisodeTask(this, new ConcurrentLinkedQueue<>(allVideos)); + Queue allVideosFiltered = this.filterExistingFilms(allVideos, v-> v.getUrl().substring(v.getUrl().lastIndexOf("/")+1)); + // + return new OrfOnEpisodeTask(this, new ConcurrentLinkedQueue<>(allVideosFiltered)); } catch (final Exception ex) { LOG.fatal("Exception in ORFON crawler.", ex); Thread.currentThread().interrupt(); diff --git a/src/main/java/de/mediathekview/mserver/crawler/orfon/task/OrfOnEpisodeTask.java b/src/main/java/de/mediathekview/mserver/crawler/orfon/task/OrfOnEpisodeTask.java index ba399d1e9..45cac6e2d 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/orfon/task/OrfOnEpisodeTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/orfon/task/OrfOnEpisodeTask.java @@ -84,6 +84,7 @@ protected void postProcessing(OrfOnVideoInfoDTO aResponseObj, OrfOnBreadCrumsUrl aResponseObj.getAired().orElse(LocalDateTime.of(1970,1,1,00,00,00)), aResponseObj.getDuration().orElse(Duration.ofMinutes(0L)) ); + aResponseObj.getId().ifPresent(aFilm::setId); aResponseObj.getGeorestriction().ifPresent(aFilm::addAllGeoLocations); aResponseObj.getDescription().ifPresent(aFilm::setBeschreibung); aResponseObj.getVideoUrls().ifPresent(aFilm::setUrls); diff --git a/src/main/java/de/mediathekview/mserver/crawler/phoenix/PhoenixCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/phoenix/PhoenixCrawler.java index 6e43e0aac..8b165e3af 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/phoenix/PhoenixCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/phoenix/PhoenixCrawler.java @@ -45,12 +45,15 @@ protected RecursiveTask> createCrawlerTask() { try { shows.addAll(getShows()); + // + Queue showsFiltered = this.filterExistingFilms(shows, v -> v.getUrl().substring(v.getUrl().lastIndexOf("/")+1)); + // printMessage( ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), shows.size()); - getAndSetMaxCount(shows.size()); + getAndSetMaxCount(showsFiltered.size()); return new PhoenixFilmDetailTask( - this, shows, null, PhoenixConstants.URL_BASE); + this, showsFiltered, null, PhoenixConstants.URL_BASE); } catch (final ExecutionException executionException) { LOG.fatal("Exception in Phönix crawler.", executionException); } catch (final InterruptedException interruptedException) { diff --git a/src/main/java/de/mediathekview/mserver/crawler/phoenix/tasks/PhoenixFilmDetailTask.java b/src/main/java/de/mediathekview/mserver/crawler/phoenix/tasks/PhoenixFilmDetailTask.java index 3fa072cdf..ce720f74a 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/phoenix/tasks/PhoenixFilmDetailTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/phoenix/tasks/PhoenixFilmDetailTask.java @@ -77,6 +77,7 @@ protected void processRestTarget(final CrawlerUrlDTO aDTO, final WebTarget aTarg final Set films = zdfFilmDetailTask.invoke(); films.forEach( film -> { + film.setId(aDTO.getUrl().substring(aDTO.getUrl().lastIndexOf("/")+1)); film.setThema(filmDetailDto.getTopic()); film.setTitel(filmDetailDto.getTitle()); if (filmDetailDto.getWebsite().isPresent()) { diff --git a/src/main/java/de/mediathekview/mserver/crawler/sr/SrCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/sr/SrCrawler.java index 0bd3233d6..f9b71d042 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/sr/SrCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/sr/SrCrawler.java @@ -54,11 +54,13 @@ protected RecursiveTask> createCrawlerTask() { filmDtos.addAll(forkJoinPool.submit(archiveTask).get()); } + final Queue filmDtosFiltered = this.filterExistingFilms(filmDtos, v->v.getUrl().substring(v.getUrl().lastIndexOf("id=")+3)); + printMessage( - ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), filmDtos.size()); - getAndSetMaxCount(filmDtos.size()); + ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), filmDtosFiltered.size()); + getAndSetMaxCount(filmDtosFiltered.size()); - return new SrFilmDetailTask(this, filmDtos); + return new SrFilmDetailTask(this, filmDtosFiltered); } catch (final InterruptedException ex) { LOG.debug("{} crawler interrupted.", getSender().getName(), ex); Thread.currentThread().interrupt(); diff --git a/src/main/java/de/mediathekview/mserver/crawler/sr/tasks/SrFilmDetailTask.java b/src/main/java/de/mediathekview/mserver/crawler/sr/tasks/SrFilmDetailTask.java index c8b29ef37..f1f8b1d2e 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/sr/tasks/SrFilmDetailTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/sr/tasks/SrFilmDetailTask.java @@ -145,7 +145,7 @@ protected void processDocument(final SrTopicUrlDTO aUrlDTO, final Document aDocu aUrlDTO.getTheme(), time.orElse(LocalDateTime.now()), duration.orElse(Duration.ZERO)); - + film.setId(aUrlDTO.getUrl().substring(aUrlDTO.getUrl().lastIndexOf("id=")+3)); film.setWebsite(URI.create(aUrlDTO.getUrl()).toURL()); description.ifPresent(film::setBeschreibung); diff --git a/src/main/java/de/mediathekview/mserver/crawler/srf/SrfCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/srf/SrfCrawler.java index fb35b46af..d8b10e410 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/srf/SrfCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/srf/SrfCrawler.java @@ -74,11 +74,13 @@ protected RecursiveTask> createCrawlerTask() { dtos.addAll(topicSearchUrls); } // + final Queue topicsUrlsFiltered = this.filterExistingFilms(dtos, v-> v.getUrl().substring(v.getUrl().lastIndexOf("/")+1)); + // printMessage( - ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), dtos.size()); - getAndSetMaxCount(dtos.size()); + ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), topicsUrlsFiltered.size()); + getAndSetMaxCount(topicsUrlsFiltered.size()); - return new SrfFilmDetailTask(this, new ConcurrentLinkedQueue<>(dtos)); + return new SrfFilmDetailTask(this, new ConcurrentLinkedQueue<>(topicsUrlsFiltered)); } catch (final InterruptedException ex) { LOG.debug("{} crawler interrupted.", getSender().getName(), ex); diff --git a/src/main/java/de/mediathekview/mserver/crawler/srf/parser/SrfFilmJsonDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/srf/parser/SrfFilmJsonDeserializer.java index 29b002e00..90c8955e6 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/srf/parser/SrfFilmJsonDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/srf/parser/SrfFilmJsonDeserializer.java @@ -264,6 +264,7 @@ public Optional deserialize( isAudioDescription ? theme.replace(TEXT_AUDIO_DESCRIPTION, "").trim() : theme, episodeData.publishDate, chapterList.duration); + film.setId(chapterList.urn); film.setBeschreibung(chapterList.description); film.setWebsite(buildWebsiteUrl(chapterList.id, chapterList.urn, episodeData.title, theme).orElse(null)); addUrls(videoUrls, film, isAudioDescription); diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/AbstractZdfCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/AbstractZdfCrawler.java index f6cd6495d..242b4fa03 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/AbstractZdfCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/AbstractZdfCrawler.java @@ -18,6 +18,7 @@ import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.time.temporal.ChronoUnit; +import java.util.ArrayDeque; import java.util.Collection; import java.util.HashSet; import java.util.Map; @@ -48,23 +49,21 @@ protected RecursiveTask> createCrawlerTask() { try { Set shows = new HashSet<>(); - + Queue showsFiltered = new ArrayDeque<>(); final ZdfConfiguration configuration = loadConfiguration(); if (configuration.getSearchAuthKey().isPresent() && configuration.getVideoAuthKey().isPresent()) { - shows = new HashSet<>(getDaysEntries(configuration)); - if (Boolean.TRUE.equals(crawlerConfig.getTopicsSearchEnabled())) { shows.addAll(getTopicsEntries()); } - - getAndSetMaxCount(shows.size()); + showsFiltered = this.filterExistingFilms(shows, v-> v.getUrl().substring(v.getUrl().lastIndexOf("/")+1).replace(".json", "") ); + getAndSetMaxCount(showsFiltered.size()); } return new ZdfFilmDetailTask( this, getApiUrlBase(), - new ConcurrentLinkedQueue<>(shows), + new ConcurrentLinkedQueue<>(showsFiltered), configuration.getVideoAuthKey().orElse(null), partner2Sender); } catch (final InterruptedException ex) { diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfCrawler.java index b8ef8114e..fe09a4643 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfCrawler.java @@ -7,6 +7,7 @@ import de.mediathekview.mserver.base.messages.ServerMessages; import de.mediathekview.mserver.crawler.basic.AbstractCrawler; import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; +import de.mediathekview.mserver.crawler.basic.TopicUrlDTO; import de.mediathekview.mserver.crawler.zdf.tasks.*; import de.mediathekview.mserver.progress.listeners.SenderProgressListener; @@ -78,23 +79,25 @@ protected RecursiveTask> createCrawlerTask() { ZdfTopicSeasonTask topicSeasonTask = new ZdfTopicSeasonTask(this, new ConcurrentLinkedQueue<>(topicUrls), AUTH_KEY); shows.addAll(forkJoinPool.submit(topicSeasonTask).get()); + + final Queue showsFiltered = this.filterExistingFilms(shows, ZdfFilmDto::getId); printMessage( - ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), shows.size()); + ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), showsFiltered.size()); - return new ZdfFilmTask(this, new ConcurrentLinkedQueue<>(shows), AUTH_KEY); + return new ZdfFilmTask(this, new ConcurrentLinkedQueue<>(showsFiltered), AUTH_KEY); } else { final ZdfConfiguration configuration = loadConfiguration(); if (configuration.getSearchAuthKey().isPresent() && configuration.getVideoAuthKey().isPresent()) { - Set shows = new HashSet<>(getDaysEntries(configuration)); + Queue shows = new ArrayDeque<>(getDaysEntries(configuration)); printMessage( ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), shows.size()); - + final Queue showsFiltered = this.filterExistingFilms(shows, v-> ((TopicUrlDTO)v).getTopic()); return new ZdfFilmDetailTask( this, getApiUrlBase(), - new ConcurrentLinkedQueue<>(shows), + new ConcurrentLinkedQueue<>(showsFiltered), configuration.getVideoAuthKey().orElse(""), ZdfConstants.PARTNER_TO_SENDER); } } diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfFilmDto.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfFilmDto.java index ccf467ce6..52116e1b7 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfFilmDto.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfFilmDto.java @@ -14,6 +14,8 @@ public class ZdfFilmDto extends CrawlerUrlDTO { private final LocalDateTime time; private final String videoType; private String topic; + private String id; + private String canonical; public ZdfFilmDto( Sender sender, @@ -22,7 +24,9 @@ public ZdfFilmDto( String website, LocalDateTime time, String videoType, - String downloadUrl) { + String downloadUrl, + String id, + String canonical) { super(downloadUrl); this.topic = ""; this.title = title; @@ -31,6 +35,8 @@ public ZdfFilmDto( this.website = website; this.time = time; this.videoType = videoType; + this.id = id; + this.canonical = canonical; } public String getTitle() { @@ -52,6 +58,14 @@ public String getWebsite() { public LocalDateTime getTime() { return time; } + + public String getId() { + return id; + } + + public String getCanonical() { + return canonical; + } @Override public boolean equals(Object o) { diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDeserializer.java index 7c5219623..40ebb92ec 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDeserializer.java @@ -7,6 +7,8 @@ import com.google.gson.JsonObject; import de.mediathekview.mserver.base.utils.UrlUtils; import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; +import de.mediathekview.mserver.crawler.basic.TopicUrlDTO; + import java.lang.reflect.Type; import java.util.Optional; @@ -61,14 +63,14 @@ private void parseSearchEntries(final ZdfDayPageDto aDayPageDto, final JsonObjec final JsonArray resultsArray = resultsElement.getAsJsonArray(); resultsArray.forEach( result -> { - final Optional dto = parseSearchEntry(result.getAsJsonObject()); + final Optional dto = parseSearchEntry(result.getAsJsonObject()); dto.ifPresent(aDayPageDto::addEntry); }); } } } - private Optional parseSearchEntry(final JsonObject aResultObject) { + private Optional parseSearchEntry(final JsonObject aResultObject) { if (!aResultObject.has(JSON_ELEMENT_TARGET)) { return Optional.empty(); } @@ -85,10 +87,17 @@ private Optional parseSearchEntry(final JsonObject aResultObject) if (target.has(JSON_ATTRIBUTE_CANONICAL)) { String canonical = target.get(JSON_ATTRIBUTE_CANONICAL).getAsString(); - + String id = aResultObject.get("id").getAsString().replace("SCMS_", ""); canonical = UrlUtils.addDomainIfMissing(canonical, apiUrlBase); + if(id.contains("video_artede") + || id.contains("video-ard") + || id.contains("video-kika") + || id.contains("video_phoenix") + ) { + return Optional.empty(); + } - final CrawlerUrlDTO dto = new CrawlerUrlDTO(canonical); + final TopicUrlDTO dto = new TopicUrlDTO(id, canonical); return Optional.of(dto); } diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDto.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDto.java index 663847986..d0e12cbcd 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDto.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDto.java @@ -1,13 +1,15 @@ package de.mediathekview.mserver.crawler.zdf.json; import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; +import de.mediathekview.mserver.crawler.basic.TopicUrlDTO; + import java.util.ArrayList; import java.util.Collection; import java.util.Optional; public class ZdfDayPageDto { - private final Collection entries; + private final Collection entries; private Optional nextPageUrl; public ZdfDayPageDto() { @@ -15,11 +17,11 @@ public ZdfDayPageDto() { nextPageUrl = Optional.empty(); } - public void addEntry(CrawlerUrlDTO entry) { + public void addEntry(TopicUrlDTO entry) { entries.add(entry); } - public Collection getEntries() { + public Collection getEntries() { return entries; } diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializer.java index 82f1c28e6..7d745f288 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializer.java @@ -110,8 +110,14 @@ public Optional deserialize( final Map downloadUrl = parseDownloadUrls(mainVideoTarget); if (title.isPresent()) { + final Optional id = JsonUtils.getElementValueAsString(aJsonObject, "id"); + final Optional selfId = JsonUtils.getElementValueAsString(aJsonObject, "self"); + if(id.isEmpty() && selfId.isEmpty()) { + System.out.println("check"); + } + final Optional film = - createFilm(partner2Sender.get(tvService.orElse("EMPTY")), topic, title.get(), description, website, time, duration); + createFilm(id.orElse(selfId.get()), partner2Sender.get(tvService.orElse("EMPTY")), topic, title.get(), description, website, time, duration); return Optional.of(new ZdfFilmDtoOld(film, downloadUrl.get(DOWNLOAD_URL_DEFAULT), downloadUrl.get(DOWNLOAD_URL_DGS))); } else { LOG.error("ZdfFilmDetailDeserializer: no title found"); @@ -158,6 +164,7 @@ private String finalizeDownloadUrl(final String url) { } private Optional createFilm( + final String id, final Sender sender, final Optional aTopic, final String aTitle, @@ -175,7 +182,7 @@ private Optional createFilm( aTopic.orElse(aTitle), aTime.orElse(LocalDateTime.now()), aDuration.orElse(Duration.ZERO)); - + film.setId(id); if (aWebsite.isPresent()) { film.setWebsite(URI.create(aWebsite.get()).toURL()); } diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfTopicBaseClass.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfTopicBaseClass.java index 51e9a53b6..500fa18b4 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfTopicBaseClass.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfTopicBaseClass.java @@ -40,6 +40,8 @@ protected Set deserializeMovie(JsonElement episode) { final Optional time = parseDate(episodeObject); final Optional description = JsonUtils.getAttributeAsString(episodeObject.getAsJsonObject("teaser"), "description"); + final Optional id = JsonUtils.getAttributeAsString(episodeObject, "id"); + final Optional canonical = JsonUtils.getAttributeAsString(episodeObject, "canonical"); final Optional sender = parseSender(episodeObject); // streamingoptions relevant, um zu erkennen ob uhd/dgs/ad/ov...? @@ -65,7 +67,9 @@ protected Set deserializeMovie(JsonElement episode) { description, website, time, - downloadUrls); + downloadUrls, + id.get(), + canonical.get()); } } else { LOG.error("ZdfTopicSeasonDeserializer: no title found"); @@ -172,7 +176,9 @@ private Set createFilm( final Optional aDescription, final Optional aWebsite, final Optional aTime, - final Map downloadUrls) { + final Map downloadUrls, + final String id, + final String canonical) { Set films = new HashSet<>(); if (!downloadUrls.isEmpty()) { @@ -186,7 +192,9 @@ private Set createFilm( aWebsite.orElse(""), aTime.orElse(LocalDateTime.now()), key.toLowerCase(), - url))); + url, + id, + canonical))); } else { LOG.error("ZdfTopicSeasonDeserializer: no video found for {}: {}", sender, aTitle); } diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmDetailTask.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmDetailTask.java index 42d0e06d0..e8d567e46 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmDetailTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmDetailTask.java @@ -59,7 +59,7 @@ private static Film clone(final Film aFilm, final String aLanguage) { aFilm.getThema(), aFilm.getTime(), aFilm.getDuration()); - + film.setId(aFilm.getId()); film.setBeschreibung(aFilm.getBeschreibung()); film.setWebsite(aFilm.getWebsite().orElse(null)); diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmTask.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmTask.java index a45ad763b..b60472f6c 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmTask.java @@ -45,7 +45,7 @@ private static Film createFilm(final ZdfFilmDto aFilm, final DownloadDto downloa aFilm.getTopic(), aFilm.getTime(), downloadDto.getDuration().orElse(Duration.ZERO)); - + film.setId(aFilm.getId()); film.setBeschreibung(aFilm.getDescription()); film.setWebsite(URI.create(aFilm.getWebsite()).toURL()); @@ -78,7 +78,7 @@ private static Film clone(final Film aFilm, final String aLanguage) { aFilm.getThema(), aFilm.getTime(), aFilm.getDuration()); - + film.setId(aFilm.getId()); film.setBeschreibung(aFilm.getBeschreibung()); film.setWebsite(aFilm.getWebsite().orElse(null)); diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfTaskBase.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfTaskBase.java index 0c511fe19..0cdda34d2 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfTaskBase.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfTaskBase.java @@ -36,6 +36,9 @@ protected Optional deserializeOptional(final WebTarget aTarget, final Typ final Response response = executeRequest(aTarget); if (response.getStatus() == 200) { final String jsonOutput = response.readEntity(String.class); + if(jsonOutput.length() == 0) { + return Optional.empty(); // PHONIX CONTENT-LENGTH 0 + } return gson.fromJson(jsonOutput, aType); } else { LOG.error( diff --git a/src/main/java/de/mediathekview/mserver/daten/AbstractMediaResource.java b/src/main/java/de/mediathekview/mserver/daten/AbstractMediaResource.java index 6049ea671..99d2e6f94 100644 --- a/src/main/java/de/mediathekview/mserver/daten/AbstractMediaResource.java +++ b/src/main/java/de/mediathekview/mserver/daten/AbstractMediaResource.java @@ -19,12 +19,14 @@ public abstract class AbstractMediaResource implements S private String thema; private String beschreibung; private URL website; + private String id; /** DON'T USE! - ONLY FOR GSON! */ AbstractMediaResource() { geoLocations = new ArrayList<>(); urls = new EnumMap<>(Resolution.class); uuid = null; + id = null; sender = null; time = null; website = null; @@ -40,6 +42,7 @@ protected AbstractMediaResource( geoLocations = new ArrayList<>(); urls = new EnumMap<>(Resolution.class); uuid = aUuid; + id = null; if (aSender == null) { throw new IllegalArgumentException("The sender can't be null!"); } @@ -63,6 +66,7 @@ protected AbstractMediaResource(final AbstractMediaResource copyObj) { time = copyObj.time; beschreibung = copyObj.beschreibung; website = copyObj.website; + id = copyObj.id; } public AbstractMediaResource merge(final AbstractMediaResource objToMergeWith) { @@ -139,13 +143,20 @@ public void setBeschreibungRaw(final String aBeschreibung) { beschreibung = aBeschreibung; } + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + public Optional getDefaultUrl() { if (urls.containsKey(Resolution.NORMAL)) { return Optional.of(getUrl(Resolution.NORMAL)); } - final Iterator> entryIterator = urls.entrySet().iterator(); - if (entryIterator.hasNext()) { - return Optional.of(entryIterator.next().getValue()); + if(!urls.isEmpty()) { + return urls.values().stream().findFirst(); } return Optional.empty(); } diff --git a/src/main/java/de/mediathekview/mserver/daten/Film.java b/src/main/java/de/mediathekview/mserver/daten/Film.java index 5f0bd4958..c5c7b3f69 100644 --- a/src/main/java/de/mediathekview/mserver/daten/Film.java +++ b/src/main/java/de/mediathekview/mserver/daten/Film.java @@ -147,6 +147,26 @@ public boolean hasUT() { return !subtitles.isEmpty(); } + public Optional getDefaultUrl() { + Optional defaultFilmUrl = super.getDefaultUrl(); + if (defaultFilmUrl.isPresent()) { + return defaultFilmUrl; + } + if (audioDescriptions.containsKey(Resolution.NORMAL)) { + return Optional.of(audioDescriptions.get(Resolution.NORMAL)); + } + if(!audioDescriptions.isEmpty()) { + return audioDescriptions.values().stream().findFirst(); + } + if (signLanguages.containsKey(Resolution.NORMAL)) { + return Optional.of(signLanguages.get(Resolution.NORMAL)); + } + if(!signLanguages.isEmpty()) { + return signLanguages.values().stream().findFirst(); + } + return Optional.empty(); + } + public static void addAllToFilmlist(final Filmlist source,final Filmlist target) { target.addAllFilms(source.getFilms().values()); target.addAllLivestreams(source.getLivestreams().values()); diff --git a/src/main/java/de/mediathekview/mserver/ui/config/MServerCommandLine.java b/src/main/java/de/mediathekview/mserver/ui/config/MServerCommandLine.java new file mode 100644 index 000000000..091262faf --- /dev/null +++ b/src/main/java/de/mediathekview/mserver/ui/config/MServerCommandLine.java @@ -0,0 +1,62 @@ +package de.mediathekview.mserver.ui.config; + +import java.util.HashMap; +import java.util.Map; + +public class MServerCommandLine { + public enum CMDARG { + config, + gconf, + flow, + topicsSearchEnabled, + invalid; + + public static CMDARG from(String key) { + try { + return CMDARG.valueOf(key); + } catch (IllegalArgumentException e) { + return CMDARG.invalid; + } + } + } + + public static void print() { + System.err.println("Call --config abc.yaml --gconf --flow abc,def,ghi "); + } + static boolean validateArgs(String[] args) { + if(!args[0].startsWith("--")) { + System.err.println("must start with --"); + return false; + } + for (int index = 0; index < args.length; index++) { + if(!args[index].startsWith("--") && args.length > index+1 && !args[index+1].startsWith("--")) { + return false; + } + } + Map enumArgs = parseArgs(args); + if (enumArgs.containsKey(CMDARG.invalid)) { + return false; + } + return true; + } + + + // TODO: replace me with Apache Commons CLI + // Usage aba.jar --input data.json --limit 50 --verbose + static Map parseArgs(String[] args) { + Map map = new HashMap<>(); + for (int i = 0; i < args.length; i++) { + if (args[i].startsWith("--")) { + String key = args[i].substring(2); + CMDARG enumKey = CMDARG.from(key); + if (i + 1 < args.length && !args[i + 1].startsWith("--")) { + map.put(enumKey, args[++i]); + } else { + map.put(enumKey, "true"); + } + } + } + return map; + } + +} diff --git a/src/main/java/de/mediathekview/mserver/ui/config/MServerConfigUI.java b/src/main/java/de/mediathekview/mserver/ui/config/MServerConfigUI.java index ec43a0e41..01b91ce21 100644 --- a/src/main/java/de/mediathekview/mserver/ui/config/MServerConfigUI.java +++ b/src/main/java/de/mediathekview/mserver/ui/config/MServerConfigUI.java @@ -8,6 +8,8 @@ import de.mediathekview.mserver.base.messages.ServerMessages; import de.mediathekview.mserver.crawler.CrawlerManager; import de.mediathekview.mserver.progress.listeners.ProgressLogMessageListener; +import de.mediathekview.mserver.ui.config.MServerCommandLine.CMDARG; + import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -24,14 +26,15 @@ import java.nio.file.StandardCopyOption; import java.util.ArrayList; import java.util.List; +import java.util.Map; public final class MServerConfigUI { // logger setup in start private Logger LOG = null; private static final String CONFIG_FILE_NAME = "MServer-Config.yaml"; - private static final String ARGUMENT_GCONF = "-gconf"; private LogMessageListener logMessageListener; private CrawlerManager manager; + private MServerExecutionFlow execution; public MServerConfigUI() { super(); @@ -64,19 +67,22 @@ private void generateDefaultConfiguration() { configFilePath.toAbsolutePath().toString()); } } - - private boolean interpretProgramArguments(final String[] aProgramAgruments) { - if (aProgramAgruments != null && aProgramAgruments.length > 0) { - if (aProgramAgruments.length > 1) { - logMessageListener.consumeMessage(ServerMessages.UI_TO_MANY_ARGUMENTS); - } - - if (ARGUMENT_GCONF.equals(aProgramAgruments[0])) { - generateDefaultConfiguration(); + + private String generateCustomsConfiguration(String sourcefile) { + String configFileName = sourcefile; + if (configFileName.startsWith("http")) { + URL fileUrl; + try { + // get a copy of this file to use it as configuration file + fileUrl = URI.create(configFileName).toURL(); + String filename = Paths.get(fileUrl.getPath()).getFileName().toString(); + MServerConfigUI.getRemoteFileToLocal(configFileName, filename); + configFileName = filename; + } catch (MalformedURLException e) { + e.printStackTrace(); } } - - return true; + return configFileName; } private boolean logLevelInfoOrLower(final Level configLevel) { @@ -85,59 +91,41 @@ private boolean logLevelInfoOrLower(final Level configLevel) { || Level.ALL.equals(configLevel); } - void start() { - try { - manager.start(); - manager.importFilmlist(); - manager.importLivestreamFilmlist(); - } finally { - manager.filterFilmlist(); - manager.saveFilmlist(); - manager.saveDifferenceFilmlist(); - manager.writeHashFile(); - manager.writeIdFile(); - manager.copyFilmlist(); - manager.stop(); - } - } + void start(final String[] aProgramAgruments) { - MServerConfigManager aMServerConfigManager = null; - if (aProgramAgruments.length > 0 && !ARGUMENT_GCONF.equals(aProgramAgruments[0])) { - String configFileName = aProgramAgruments[0]; - if (configFileName.startsWith("http")) { - URL fileUrl; - try { - // get a copy of this file to use it as configuration file - fileUrl = URI.create(configFileName).toURL(); - String filename = Paths.get(fileUrl.getPath()).getFileName().toString(); - MServerConfigUI.getRemoteFileToLocal(configFileName, filename); - configFileName = filename; - } catch (MalformedURLException e) { - e.printStackTrace(); - } - } - aMServerConfigManager = new MServerConfigManager(configFileName); - } else { - aMServerConfigManager = new MServerConfigManager(MServerConfigManager.DEFAULT_CONFIG_FILE); + if (!MServerCommandLine.validateArgs(aProgramAgruments)) { + MServerCommandLine.print(); + return; + } + Map cmd = MServerCommandLine.parseArgs(aProgramAgruments); + // config + String configFileName = MServerConfigManager.DEFAULT_CONFIG_FILE; + if (cmd.containsKey(CMDARG.gconf)) { + generateDefaultConfiguration(); + } + if (cmd.containsKey(CMDARG.config)) { + configFileName = generateCustomsConfiguration(cmd.get(CMDARG.config)); } + final MServerConfigManager aMServerConfigManager = new MServerConfigManager(configFileName); // here we set the correct configManager for all log4logger // logsettings are stored static in our factory new Log4JConfigurationFactory(aMServerConfigManager.getConfig().getLogSettings()); LOG = LogManager.getLogger(MServerConfigUI.class); logMessageListener = new LogMessageListener(); - - if (interpretProgramArguments(aProgramAgruments)) { - manager = new CrawlerManager(aMServerConfigManager); - final MServerLogSettingsDTO logSettings = aMServerConfigManager.getConfig().getLogSettings(); - logSettings.setLogActivateConsole(true); - final Level configLevel = logSettings.getLogLevelConsole(); - if (configLevel == null || !logLevelInfoOrLower(configLevel)) { - logSettings.setLogLevelConsole(Level.INFO); - } - addListeners(); - start(); + // + manager = new CrawlerManager(aMServerConfigManager); + final MServerLogSettingsDTO logSettings = aMServerConfigManager.getConfig().getLogSettings(); + logSettings.setLogActivateConsole(true); + final Level configLevel = logSettings.getLogLevelConsole(); + if (configLevel == null || !logLevelInfoOrLower(configLevel)) { + logSettings.setLogLevelConsole(Level.INFO); } + addListeners(); // REQUIRES MANAGER!!! + // + execution = new MServerExecutionFlow(manager, cmd); + execution.start(); + } public static void getRemoteFileToLocal(String source, String target) { @@ -151,4 +139,6 @@ public static void getRemoteFileToLocal(String source, String target) { e.printStackTrace(); // we do not have a logger yet } } + + } diff --git a/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java b/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java new file mode 100644 index 000000000..a1f8f8fa2 --- /dev/null +++ b/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java @@ -0,0 +1,122 @@ +package de.mediathekview.mserver.ui.config; + +import java.util.HashSet; +import java.util.Map; +import java.util.Optional; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import de.mediathekview.mserver.base.utils.CheckUrlAvailability; +import de.mediathekview.mserver.base.utils.FilmDBService; +import de.mediathekview.mserver.base.utils.GPDataSourceProvider; +import de.mediathekview.mserver.crawler.CrawlerManager; +import de.mediathekview.mserver.daten.Filmlist; +import de.mediathekview.mserver.ui.config.MServerCommandLine.CMDARG; + +public class MServerExecutionFlow { + private static final Logger LOG = LogManager.getLogger(MServerExecutionFlow.class); + private CrawlerManager manager; + private Map cmd; + + public MServerExecutionFlow(CrawlerManager manager, Map cmd) { + this.manager = manager; + this.cmd = cmd; + } + + void start() { + if(cmd.containsKey(CMDARG.topicsSearchEnabled)) { + if(cmd.get(CMDARG.topicsSearchEnabled).equalsIgnoreCase("true")) { + manager.getConfigManager().getConfig().setTopicsSearchEnabled(true); + } else { + manager.getConfigManager().getConfig().setTopicsSearchEnabled(false); + } + } + // + if (cmd.containsKey(CMDARG.flow) && cmd.get(CMDARG.flow).equalsIgnoreCase("importFilmlistIntoDB")) { + importFilmlistIntoDB(); + } else if (cmd.containsKey(CMDARG.flow) && cmd.get(CMDARG.flow).equalsIgnoreCase("exportFilmListFromDB")) { + exportFilmListFromDB(); + } else if (cmd.containsKey(CMDARG.flow) && cmd.get(CMDARG.flow).equalsIgnoreCase("checkAvailability")) { + checkAvailability(); + } else { + startCrawlerFlow(); + } + } + + void startCrawlerFlow() { + try { + manager.start(); + manager.importFilmlist(); + manager.importLivestreamFilmlist(); + } finally { + manager.filterFilmlist(); + manager.saveFilmlist(); + manager.saveDifferenceFilmlist(); + manager.writeHashFile(); + manager.writeIdFile(); + manager.copyFilmlist(); + // + manager.storeFilmsToDB(); + // + manager.stop(); + } + } + + void exportFilmListFromDB() { + try { + FilmDBService filmDBService = new FilmDBService(GPDataSourceProvider.get(), manager.getExecutorService(), 2000); + Optional dbFilmlist = filmDBService.readFilmlistFromDB(); + dbFilmlist.ifPresent(filmlist -> manager.getFilmlist().addAllFilms(filmlist.getFilms().values())); + // + manager.importLivestreamFilmlist(); + } finally { + manager.filterFilmlist(); + manager.saveFilmlist(); + manager.saveDifferenceFilmlist(); + manager.writeHashFile(); + manager.writeIdFile(); + manager.copyFilmlist(); + manager.stop(); + } + + } + void importFilmlistIntoDB() { + manager.importFilmlist(); + FilmDBService filmDBService = new FilmDBService(GPDataSourceProvider.get(), manager.getExecutorService(), 2000); + HashSet allVideoUrls = filmDBService.getAllVideoUrls(); + LOG.debug("allVideoUrls loaded {} entries", allVideoUrls.size()); + manager.getFilmlist().getFilms().entrySet().parallelStream() + .forEach(entry -> { + if (allVideoUrls.contains(entry.getValue().getDefaultUrl().get().getUrl().toString())) { + manager.getFilmlist().getFilms().remove(entry.getKey()); + } + }); + LOG.debug("removed to {} entries", manager.getFilmlist().getFilms().entrySet().size()); + //manager.getFilmlist().getFilms().entrySet().removeIf(entry -> filmDBService.videoExistsByUrl(entry.getValue())); + manager.getFilmlist().getFilms().entrySet().forEach(entry -> { + var film = entry.getValue(); + if (film.getId() == null || film.getId().isBlank()) { + film.setId(film.getUuid().toString()); + } + }); + LOG.debug("updated id for old films"); + manager.storeFilmsToDB(); + LOG.debug("data stored"); + manager.stop(); + } + void checkAvailability() { + FilmDBService filmDBService = new FilmDBService(GPDataSourceProvider.get(), manager.getExecutorService(), 2000); + String condition = "where last_url_check IS NULL OR last_url_check < NOW() - INTERVAL '1 DAY'"; + Optional dbFilmlist = filmDBService.readFilmlistFromDB(condition); + dbFilmlist.ifPresent(filmlist -> manager.getFilmlist().addAllFilms(filmlist.getFilms().values())); + CheckUrlAvailability checkUrlAvailability = new CheckUrlAvailability( + manager.getConfigManager().getConfig().getCheckImportListUrlMinSize(), + manager.getConfigManager().getConfig().getCheckImportListUrlTimeoutInSec(), + manager.getConfigManager().getConfig().getMaximumCpuThreads()); + Filmlist abonednedList = checkUrlAvailability.getAvailableFilmlist(dbFilmlist.get(), false); + filmDBService.deleteFilms(abonednedList.getFilms().values()); + filmDBService.update("UPDATE filme SET last_url_check = NOW() " + condition); + manager.stop(); + } +} diff --git a/src/test/java/de/mediathekview/mserver/crawler/arte/tasks/ArteVideoInfoTaskTest.java b/src/test/java/de/mediathekview/mserver/crawler/arte/tasks/ArteVideoInfoTaskTest.java index f7db35a72..6e34d12ae 100644 --- a/src/test/java/de/mediathekview/mserver/crawler/arte/tasks/ArteVideoInfoTaskTest.java +++ b/src/test/java/de/mediathekview/mserver/crawler/arte/tasks/ArteVideoInfoTaskTest.java @@ -25,7 +25,7 @@ public void test() { List>> expectedResult = generateExpectedResult(); for(List> entry : expectedResult) { Optional act = result.stream() - .filter(item -> item.getId().get().equalsIgnoreCase(entry.get(0).get())) + .filter(item -> item.getId().equalsIgnoreCase(entry.get(0).get())) .findFirst(); assertTrue(act.isPresent()); assertEntry(act.get(), entry.toArray(new Optional[0])); diff --git a/src/test/java/de/mediathekview/mserver/crawler/dw/tasks/DWOverviewDeserializerTest.java b/src/test/java/de/mediathekview/mserver/crawler/dw/tasks/DWOverviewDeserializerTest.java index a1113a1e3..922b792d9 100644 --- a/src/test/java/de/mediathekview/mserver/crawler/dw/tasks/DWOverviewDeserializerTest.java +++ b/src/test/java/de/mediathekview/mserver/crawler/dw/tasks/DWOverviewDeserializerTest.java @@ -4,6 +4,7 @@ import de.mediathekview.mserver.base.webaccess.JsoupConnection; import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; import de.mediathekview.mserver.crawler.basic.PagedElementListDTO; +import de.mediathekview.mserver.crawler.basic.TopicUrlDTO; import de.mediathekview.mserver.crawler.dw.parser.DWSendungOverviewDeserializer; import de.mediathekview.mserver.testhelper.JsonFileReader; import org.junit.Before; @@ -76,7 +77,7 @@ public void setUp() { public void test() throws IOException { final JsonElement jsonElement = JsonFileReader.readJson(responseAsFile); final DWSendungOverviewDeserializer target = new DWSendungOverviewDeserializer(); - final Optional> actual = target.deserialize(jsonElement, null, null); + final Optional> actual = target.deserialize(jsonElement, null, null); // assertThat(actual.isPresent(), equalTo(true)); assertThat(actual.get().getNextPage().isPresent(), equalTo(hasNext)); diff --git a/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfTopicSeasonDeserializerTest.java b/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfTopicSeasonDeserializerTest.java index 970e9621b..c9a569718 100644 --- a/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfTopicSeasonDeserializerTest.java +++ b/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfTopicSeasonDeserializerTest.java @@ -34,7 +34,9 @@ void deserializeValidJson() { LocalDateTime.of(2024, 3, 31, 18, 15, 0), // LocalDateTime.of(2024, 3, 31, 20, 15, 0), "default", - "https://api.zdf.de/tmd/2/android_native_5/vod/ptmd/mediathek/240331_2015_sendung_trs/5"), + "https://api.zdf.de/tmd/2/android_native_5/vod/ptmd/mediathek/240331_2015_sendung_trs/5", + "", + ""), new ZdfFilmDto( Sender.ZDF, "Nusantara (S36/E03)", @@ -43,7 +45,9 @@ void deserializeValidJson() { LocalDateTime.of(2024, 12, 2, 1, 45, 0), // LocalDateTime.of(2024, 1, 1, 20, 15, 0), "default", - "https://api.zdf.de/tmd/2/android_native_5/vod/ptmd/mediathek/240101_2015_sendung_trs/8"))); + "https://api.zdf.de/tmd/2/android_native_5/vod/ptmd/mediathek/240101_2015_sendung_trs/8", + "", + ""))); } @Test @@ -63,7 +67,9 @@ void deserializeJsonWithDgs() { "https://www.zdf.de/video/dokus/terra-x-unter-dinos-geheimnisse-der-urzeit-dokureihe-100/terra-x-unter-dinos-lusotitan-old-grande-die-insel-der-giganten-doku-100", LocalDateTime.of(2025, 8, 20, 4, 0, 0), "dgs", - "https://api.zdf.de/tmd/2/android_native_5/vod/ptmd/mediathek/250914_dk_dinos_insel_giganten_tex_dgs/2?caption_source=250914_dk_dinos_insel_giganten_tex%2F5"), + "https://api.zdf.de/tmd/2/android_native_5/vod/ptmd/mediathek/250914_dk_dinos_insel_giganten_tex_dgs/2?caption_source=250914_dk_dinos_insel_giganten_tex%2F5", + "", + ""), new ZdfFilmDto( Sender.ZDF, "Die Insel der Giganten (S01/E04)", @@ -71,7 +77,9 @@ void deserializeJsonWithDgs() { "https://www.zdf.de/video/dokus/terra-x-unter-dinos-geheimnisse-der-urzeit-dokureihe-100/terra-x-unter-dinos-lusotitan-old-grande-die-insel-der-giganten-doku-100", LocalDateTime.of(2025, 8, 20, 4, 0, 0), "default", - "https://api.zdf.de/tmd/2/android_native_5/vod/ptmd/mediathek/250914_dk_dinos_insel_giganten_tex/5"))); + "https://api.zdf.de/tmd/2/android_native_5/vod/ptmd/mediathek/250914_dk_dinos_insel_giganten_tex/5", + "", + ""))); } From 12627e2ac3df1e6603094fc395ad81cb00e2c8e6 Mon Sep 17 00:00:00 2001 From: CodingPF Date: Tue, 23 Dec 2025 12:46:11 +0100 Subject: [PATCH 03/23] init 2.0 dependency --- pom.xml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pom.xml b/pom.xml index e4e6d34dc..929189801 100644 --- a/pom.xml +++ b/pom.xml @@ -125,6 +125,20 @@ + + + org.postgresql + postgresql + 42.7.4 + + + + + com.zaxxer + HikariCP + 5.1.0 + + org.apache.commons commons-compress From c939829fee075c9be1f174925a2b5fa2ae42a170 Mon Sep 17 00:00:00 2001 From: CodingPF Date: Wed, 24 Dec 2025 00:42:32 +0100 Subject: [PATCH 04/23] update datasource and config --- .../mserver/base/config/MServerConfigDTO.java | 14 ++++- .../mserver/base/config/MServerDBConfig.java | 59 ++++++++++++++++++ .../mserver/base/utils/FilmDBService.java | 11 +++- .../base/utils/GPDataSourceProvider.java | 51 ---------------- .../utils/PostgreSQLDataSourceProvider.java | 61 +++++++++++++++++++ .../mserver/crawler/CrawlerManager.java | 5 +- .../crawler/basic/AbstractCrawler.java | 3 +- .../mserver/ui/config/MServerConfigUI.java | 3 + .../ui/config/MServerExecutionFlow.java | 7 +-- src/main/resources/MServer-Config.yaml | 6 ++ 10 files changed, 154 insertions(+), 66 deletions(-) create mode 100644 src/main/java/de/mediathekview/mserver/base/config/MServerDBConfig.java delete mode 100644 src/main/java/de/mediathekview/mserver/base/utils/GPDataSourceProvider.java create mode 100644 src/main/java/de/mediathekview/mserver/base/utils/PostgreSQLDataSourceProvider.java diff --git a/src/main/java/de/mediathekview/mserver/base/config/MServerConfigDTO.java b/src/main/java/de/mediathekview/mserver/base/config/MServerConfigDTO.java index 921c0f61a..65562c0e6 100644 --- a/src/main/java/de/mediathekview/mserver/base/config/MServerConfigDTO.java +++ b/src/main/java/de/mediathekview/mserver/base/config/MServerConfigDTO.java @@ -13,6 +13,7 @@ public class MServerConfigDTO extends MServerBasicConfigDTO implements ConfigDTO private final String filmlistHashFilePath; private final Boolean writeFilmlistIdFileEnabled; private final String filmlistIdFilePath; + private final MServerDBConfig mServerDBConfig; /** ignore certain film by title **/ private String ignoreFilmlistPath; /** add livestreams from external list **/ @@ -48,6 +49,7 @@ public MServerConfigDTO() { filmlistSavePaths = new EnumMap<>(FilmlistFormats.class); filmlistDiffSavePaths = new EnumMap<>(FilmlistFormats.class); copySettings = new MServerCopySettings(); + mServerDBConfig = new MServerDBConfig(); logSettings = new MServerLogSettingsDTO(); crawlerURLs = new EnumMap<>(CrawlerUrlType.class); @@ -242,7 +244,11 @@ public ImportLivestreamConfiguration getImportLivestreamConfiguration() { public List getImportFilmlistConfigurations() { return importFilmlistConfigurations; } - + + public MServerDBConfig getMServerDBConfig() { + return mServerDBConfig; + } + /** * Loads the {@link Sender} specific configuration and if it not exist creates one. * @@ -286,7 +292,8 @@ public boolean equals(final Object o) { && Objects.equals(getFilmlistIdFilePath(), that.getFilmlistIdFilePath()) && Objects.equals(getIgnoreFilmslistPath(), that.getIgnoreFilmslistPath()) && Objects.equals(getImportLivestreamConfiguration(), that.getImportLivestreamConfiguration()) - && Objects.equals(getImportFilmlistConfigurations(), that.getImportFilmlistConfigurations()); + && Objects.equals(getImportFilmlistConfigurations(), that.getImportFilmlistConfigurations()) + && Objects.equals(getMServerDBConfig(), that.getMServerDBConfig()); } @Override @@ -313,7 +320,8 @@ public int hashCode() { getFilmlistIdFilePath(), getIgnoreFilmslistPath(), getImportLivestreamConfiguration(), - getImportFilmlistConfigurations()); + getImportFilmlistConfigurations(), + getMServerDBConfig()); } public void initializeSenderConfigurations() { diff --git a/src/main/java/de/mediathekview/mserver/base/config/MServerDBConfig.java b/src/main/java/de/mediathekview/mserver/base/config/MServerDBConfig.java new file mode 100644 index 000000000..2953ec72e --- /dev/null +++ b/src/main/java/de/mediathekview/mserver/base/config/MServerDBConfig.java @@ -0,0 +1,59 @@ +package de.mediathekview.mserver.base.config; + +import java.util.Objects; + +public class MServerDBConfig { + private final Boolean active; + private final String url; + private final String username; + private final String password; + + public MServerDBConfig() { + super(); + active = true; + url = "jdbc:postgresql://OscarDS:55432/crawler"; + username = "crawler"; + password = "secret"; + } + + public MServerDBConfig(Boolean active, String url, String username, String password) { + super(); + this.active = active; + this.url = url; + this.username = username; + this.password = password; + } + + + + public Boolean getActive() { + return active; + } + public String getUrl() { + return url; + } + public String getUsername() { + return username; + } + public String getPassword() { + return password; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + MServerDBConfig that = (MServerDBConfig) o; + + return Objects.equals(active, that.active) + && Objects.equals(url, that.url) + && Objects.equals(username, that.username) + && Objects.equals(password, that.password); + } + + @Override + public int hashCode() { + return Objects.hash(active, url, username, password); + } +} diff --git a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java index 682aabf52..9fd47c0a0 100644 --- a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java +++ b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java @@ -45,8 +45,8 @@ public class FilmDBService { private final ExecutorService executorService; private final int batchSize; - public FilmDBService(DataSource dataSource, ExecutorService executorService, int batchSize) { - this.dataSource = dataSource; + public FilmDBService(ExecutorService executorService, int batchSize) { + this.dataSource = PostgreSQLDataSourceProvider.get(); this.executorService = executorService; this.batchSize = batchSize; @@ -136,6 +136,9 @@ public Optional readFilmlistFromDB(String where) { ///////////////////////////////////////////////////////////////////////////////////////// public List filterNewVideos(List videos, Function idExtractor) { + if(!PostgreSQLDataSourceProvider.isEnabled()) { + return videos; + } try { List>> futures = new ArrayList<>(); @@ -184,6 +187,7 @@ public List filterNewVideos(List videos, Function idExtract LOG.debug("Filtered {} (in {} out {})",(videos.size()-result.size()), videos.size(), result.size()); return result; } catch (Exception e) { + LOG.error("{}", e); return videos; } } @@ -223,6 +227,9 @@ public HashSet getAllVideoUrls() { * Speichert alle Filme einer Filmlist parallel in der DB. */ public void saveAll(Filmlist filmlist) throws Exception { + if(!PostgreSQLDataSourceProvider.isEnabled()) { + return; + } // Map in List konvertieren List films = new ArrayList<>(filmlist.getFilms().values()); films = makeUniqueIds(films); diff --git a/src/main/java/de/mediathekview/mserver/base/utils/GPDataSourceProvider.java b/src/main/java/de/mediathekview/mserver/base/utils/GPDataSourceProvider.java deleted file mode 100644 index 3e2883194..000000000 --- a/src/main/java/de/mediathekview/mserver/base/utils/GPDataSourceProvider.java +++ /dev/null @@ -1,51 +0,0 @@ -package de.mediathekview.mserver.base.utils; - -import com.zaxxer.hikari.HikariConfig; -import com.zaxxer.hikari.HikariDataSource; - -import javax.sql.DataSource; - -public final class GPDataSourceProvider { - - private static final HikariDataSource DATA_SOURCE; - - static { - HikariConfig cfg = new HikariConfig(); - - // === JDBC === - cfg.setJdbcUrl("jdbc:postgresql://OscarDS:55432/crawler"); - cfg.setUsername("crawler"); - cfg.setPassword("secret"); - - // === Pool Sizing (wichtig!) === - cfg.setMaximumPoolSize(16); // Sweet Spot für 10k+/min - cfg.setMinimumIdle(4); - - // === Performance === - cfg.setAutoCommit(true); - cfg.setConnectionTimeout(3000); - cfg.setIdleTimeout(600_000); - cfg.setMaxLifetime(1_800_000); - - // === PostgreSQL Optimierungen === - cfg.addDataSourceProperty("reWriteBatchedInserts", "true"); - cfg.addDataSourceProperty("stringtype", "unspecified"); - - // === Debug (optional) === - cfg.setPoolName("CrawlerPool"); - - DATA_SOURCE = new HikariDataSource(cfg); - } - - private GPDataSourceProvider() { - // no instances - } - - public static DataSource get() { - return DATA_SOURCE; - } - - public static void shutdown() { - DATA_SOURCE.close(); - } -} diff --git a/src/main/java/de/mediathekview/mserver/base/utils/PostgreSQLDataSourceProvider.java b/src/main/java/de/mediathekview/mserver/base/utils/PostgreSQLDataSourceProvider.java new file mode 100644 index 000000000..e98d7c1c3 --- /dev/null +++ b/src/main/java/de/mediathekview/mserver/base/utils/PostgreSQLDataSourceProvider.java @@ -0,0 +1,61 @@ +package de.mediathekview.mserver.base.utils; + +import com.zaxxer.hikari.HikariConfig; +import com.zaxxer.hikari.HikariDataSource; + +import de.mediathekview.mserver.base.config.MServerConfigManager; + +import javax.sql.DataSource; + +public final class PostgreSQLDataSourceProvider { + private static HikariDataSource DATA_SOURCE; + private static Boolean enabled = false; + private MServerConfigManager aMServerConfigManager; + + public PostgreSQLDataSourceProvider(MServerConfigManager aMServerConfigManager) { + this.aMServerConfigManager = aMServerConfigManager; + init(); + } + + public static boolean isEnabled() { + return enabled; + } + + public static DataSource get() { + return DATA_SOURCE; + } + + public static void shutdown() { + DATA_SOURCE.close(); + } + + private void init() { + HikariConfig cfg = new HikariConfig(); + enabled = aMServerConfigManager.getConfig().getMServerDBConfig().getActive(); + if(!enabled) { + return; + } + cfg.setJdbcUrl(aMServerConfigManager.getConfig().getMServerDBConfig().getUrl()); + cfg.setUsername(aMServerConfigManager.getConfig().getMServerDBConfig().getUsername()); + cfg.setPassword(aMServerConfigManager.getConfig().getMServerDBConfig().getPassword()); + + // === Pool Sizing (wichtig!) === + cfg.setMaximumPoolSize(16); // Sweet Spot für 10k+/min + cfg.setMinimumIdle(4); + + // === Performance === + cfg.setAutoCommit(true); + cfg.setConnectionTimeout(3000); + cfg.setIdleTimeout(600_000); + cfg.setMaxLifetime(1_800_000); + + // === PostgreSQL Optimierungen === + cfg.addDataSourceProperty("reWriteBatchedInserts", "true"); + cfg.addDataSourceProperty("stringtype", "unspecified"); + + // === Debug (optional) === + cfg.setPoolName("CrawlerPool"); + + DATA_SOURCE = new HikariDataSource(cfg); + } +} diff --git a/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java b/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java index 1f97bc961..90f8c7dae 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java +++ b/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java @@ -16,7 +16,6 @@ import de.mediathekview.mserver.base.uploader.copy.FileCopyTask; import de.mediathekview.mserver.base.utils.CheckUrlAvailability; import de.mediathekview.mserver.base.utils.FilmDBService; -import de.mediathekview.mserver.base.utils.GPDataSourceProvider; import de.mediathekview.mserver.crawler.ard.ArdCrawler; import de.mediathekview.mserver.crawler.arte.ArteCrawler; import de.mediathekview.mserver.crawler.arte.ArteCrawler_EN; @@ -46,7 +45,6 @@ import java.util.*; import java.util.Map.Entry; import java.util.concurrent.*; -import javax.sql.DataSource; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.NotNull; @@ -97,8 +95,7 @@ public MServerConfigManager getConfigManager() { } public void storeFilmsToDB() { - DataSource ds = GPDataSourceProvider.get(); - FilmDBService filmDBService = new FilmDBService(ds, executorService, 200); + FilmDBService filmDBService = new FilmDBService(executorService, 200); try { filmDBService.saveAll(filmlist); } catch (Exception e) { diff --git a/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java index f7365d9bc..ab089a43c 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java @@ -6,7 +6,6 @@ import de.mediathekview.mserver.base.messages.listener.MessageListener; import de.mediathekview.mserver.base.progress.Progress; import de.mediathekview.mserver.base.utils.FilmDBService; -import de.mediathekview.mserver.base.utils.GPDataSourceProvider; import de.mediathekview.mserver.base.config.MServerBasicConfigDTO; import de.mediathekview.mserver.base.config.MServerConfigDTO; import de.mediathekview.mserver.base.config.MServerConfigManager; @@ -72,7 +71,7 @@ protected AbstractCrawler( rootConfig.getSenderConfig(getSender()).getSocketTimeoutInSeconds(), runtimeConfig.getMaximumCpuThreads()); rateLimiter = RateLimiter.create(rootConfig.getSenderConfig(getSender()).getMaximumRequestsPerSecond()); - filmDBService = new FilmDBService(GPDataSourceProvider.get(), forkJoinPool, 200); + filmDBService = new FilmDBService(forkJoinPool, 200); films = ConcurrentHashMap.newKeySet(); } diff --git a/src/main/java/de/mediathekview/mserver/ui/config/MServerConfigUI.java b/src/main/java/de/mediathekview/mserver/ui/config/MServerConfigUI.java index 01b91ce21..14497703e 100644 --- a/src/main/java/de/mediathekview/mserver/ui/config/MServerConfigUI.java +++ b/src/main/java/de/mediathekview/mserver/ui/config/MServerConfigUI.java @@ -2,6 +2,7 @@ import de.mediathekview.mserver.base.messages.listener.LogMessageListener; import de.mediathekview.mserver.base.messages.listener.MessageListener; +import de.mediathekview.mserver.base.utils.PostgreSQLDataSourceProvider; import de.mediathekview.mserver.base.config.Log4JConfigurationFactory; import de.mediathekview.mserver.base.config.MServerConfigManager; import de.mediathekview.mserver.base.config.MServerLogSettingsDTO; @@ -114,6 +115,8 @@ void start(final String[] aProgramAgruments) { LOG = LogManager.getLogger(MServerConfigUI.class); logMessageListener = new LogMessageListener(); // + new PostgreSQLDataSourceProvider(aMServerConfigManager); // init singleton + // manager = new CrawlerManager(aMServerConfigManager); final MServerLogSettingsDTO logSettings = aMServerConfigManager.getConfig().getLogSettings(); logSettings.setLogActivateConsole(true); diff --git a/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java b/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java index a1f8f8fa2..20350f125 100644 --- a/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java +++ b/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java @@ -9,7 +9,6 @@ import de.mediathekview.mserver.base.utils.CheckUrlAvailability; import de.mediathekview.mserver.base.utils.FilmDBService; -import de.mediathekview.mserver.base.utils.GPDataSourceProvider; import de.mediathekview.mserver.crawler.CrawlerManager; import de.mediathekview.mserver.daten.Filmlist; import de.mediathekview.mserver.ui.config.MServerCommandLine.CMDARG; @@ -65,7 +64,7 @@ void startCrawlerFlow() { void exportFilmListFromDB() { try { - FilmDBService filmDBService = new FilmDBService(GPDataSourceProvider.get(), manager.getExecutorService(), 2000); + FilmDBService filmDBService = new FilmDBService(manager.getExecutorService(), 2000); Optional dbFilmlist = filmDBService.readFilmlistFromDB(); dbFilmlist.ifPresent(filmlist -> manager.getFilmlist().addAllFilms(filmlist.getFilms().values())); // @@ -83,7 +82,7 @@ void exportFilmListFromDB() { } void importFilmlistIntoDB() { manager.importFilmlist(); - FilmDBService filmDBService = new FilmDBService(GPDataSourceProvider.get(), manager.getExecutorService(), 2000); + FilmDBService filmDBService = new FilmDBService(manager.getExecutorService(), 2000); HashSet allVideoUrls = filmDBService.getAllVideoUrls(); LOG.debug("allVideoUrls loaded {} entries", allVideoUrls.size()); manager.getFilmlist().getFilms().entrySet().parallelStream() @@ -106,7 +105,7 @@ void importFilmlistIntoDB() { manager.stop(); } void checkAvailability() { - FilmDBService filmDBService = new FilmDBService(GPDataSourceProvider.get(), manager.getExecutorService(), 2000); + FilmDBService filmDBService = new FilmDBService(manager.getExecutorService(), 2000); String condition = "where last_url_check IS NULL OR last_url_check < NOW() - INTERVAL '1 DAY'"; Optional dbFilmlist = filmDBService.readFilmlistFromDB(condition); dbFilmlist.ifPresent(filmlist -> manager.getFilmlist().addAllFilms(filmlist.getFilms().values())); diff --git a/src/main/resources/MServer-Config.yaml b/src/main/resources/MServer-Config.yaml index 4ae683329..aea212a75 100644 --- a/src/main/resources/MServer-Config.yaml +++ b/src/main/resources/MServer-Config.yaml @@ -188,6 +188,12 @@ copySettings: # JSON_COMPRESSED: /var/www/mediathekview/filmlisten/filmliste_diff.json.xz OLD_JSON_COMPRESSED_XZ: copyTarget/filmliste_old_diff.json.xz +mServerDBConfig: + active: false + url: "url" + username: "username" + password: "password" + #### Logging #### logSettings: # The log level for the console. From 14891e4ba180c963d6cdd3149a3ac92cc5684b6c Mon Sep 17 00:00:00 2001 From: CodingPF Date: Mon, 29 Dec 2025 19:32:40 +0100 Subject: [PATCH 05/23] update --- MServer-Config.yaml | 40 ++++++++---- pom.xml | 8 +-- src/main/docker/docker-compose.yml | 61 +++++++++++++++++++ src/main/docker/runDocker | 18 ++++++ .../mserver/base/config/ConfigManager.java | 5 +- .../mserver/base/config/MServerConfigDTO.java | 17 ++++-- .../mserver/base/config/MServerDBConfig.java | 29 ++++++--- .../base/uploader/copy/FileCopyTask.java | 31 ++++++++-- .../base/utils/CheckUrlAvailability.java | 3 - .../mserver/base/utils/FilmDBService.java | 14 +++-- .../utils/PostgreSQLDataSourceProvider.java | 8 +-- .../mserver/crawler/CrawlerManager.java | 10 ++- .../crawler/ard/json/ArdFilmDeserializer.java | 7 +++ .../crawler/phoenix/PhoenixCrawler.java | 2 +- .../zdf/json/ZdfDayPageDeserializer.java | 21 ++++--- .../zdf/json/ZdfFilmDetailDeserializer.java | 5 +- .../ui/config/MServerExecutionFlow.java | 9 ++- src/main/resources/MServer-Config.yaml | 10 +-- 18 files changed, 227 insertions(+), 71 deletions(-) create mode 100644 src/main/docker/docker-compose.yml create mode 100644 src/main/docker/runDocker diff --git a/MServer-Config.yaml b/MServer-Config.yaml index 93925bd50..a9e6f2604 100644 --- a/MServer-Config.yaml +++ b/MServer-Config.yaml @@ -1,5 +1,12 @@ #### Server configurations #### +# Film DB +databaseConfig: + active: false + url: "jdbc:postgresql://localhost:55432/crawler" + username: "crawler" + password: "secret" + # The maximum amount of cpu threads to be used. maximumCpuThreads: 10 @@ -18,20 +25,19 @@ maximumRequestsPerSecond: 999.0 # If set only these Sender will be crawled all other will be ignored. senderIncluded: #- ARD - - ARTE_DE - - ARTE_FR - - ARTE_PL - - ARTE_IT - - ARTE_ES - - ARTE_EN + #- ARTE_DE + #- ARTE_FR + #- ARTE_PL + #- ARTE_IT + #- ARTE_ES + #- ARTE_EN #- DREISAT - #- FUNK #- KIKA - # - DW + #- DW #- ORF #- PHOENIX #- SRF - #- SR + - SR #- ZDF #SRF,SR,PHONIX,ORF,KIKA,DW,3SAT< @@ -113,7 +119,17 @@ importFilmlistConfigurations : path: "https://verteiler1.mediathekview.de/filme-org.xz" format: OLD_JSON_COMPRESSED_XZ createDiff: true - checkImportListUrl: true + checkImportListUrl: true + - active: false + path: "https://verteiler1.mediathekview.de/Filmliste-akt.xz" + format: OLD_JSON_COMPRESSED_XZ + createDiff: false + checkImportListUrl: false + - active: true + path: jdbc + format: OLD_JSON + createDiff: false + checkImportListUrl: false # film url is consider invalid if the size is below the minSize checkImportListUrlMinSize: 5012 @@ -171,7 +187,7 @@ senderConfigurations: ARTE_ES: maximumSubpages: 6 KIKA: - maximumSubpages: 2 + maximumSubpages: 4 maximumRequestsPerSecond: 8.0 ZDF: maximumDaysForSendungVerpasstSection: 21 @@ -206,6 +222,8 @@ copySettings: # JSON_COMPRESSED: /var/www/mediathekview/filmlisten/filmliste_diff.json.xz OLD_JSON_COMPRESSED_XZ: copyTarget/filmliste_old_diff.json.xz + + #### Logging #### logSettings: # The log level for the console. diff --git a/pom.xml b/pom.xml index 929189801..c56c407ac 100644 --- a/pom.xml +++ b/pom.xml @@ -93,7 +93,7 @@ 2.35.2 1.10 0.9.2 - 0.40.2 + 0.48.0 3.2.0 @@ -500,16 +500,14 @@ - io.fabric8 docker-maven-plugin ${docker-maven-plugin.version} - + mediathekview/mserver:${project.version} - eclipse-temurin:${maven.compiler.target} Nicklas Wiegandt <nicklas@wiegandt.eu> @@ -517,7 +515,7 @@ docker-assembly.xml - -Xmx4G + -Xmx8G config.yaml diff --git a/src/main/docker/docker-compose.yml b/src/main/docker/docker-compose.yml new file mode 100644 index 000000000..15a9879f0 --- /dev/null +++ b/src/main/docker/docker-compose.yml @@ -0,0 +1,61 @@ + +configs: + init_sql: + content: | + CREATE TABLE IF NOT EXISTS filme ( + id TEXT PRIMARY KEY, -- eindeutige Film-ID + data JSONB NOT NULL, -- JSON-Daten des Films + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + last_update TIMESTAMPTZ NOT NULL DEFAULT now(), + last_url_check TIMESTAMPTZ NOT NULL DEFAULT now() + ); + +services: + + postgresMV: + image: postgres:18 + container_name: crawler-postgres + environment: + POSTGRES_DB: crawler + POSTGRES_USER: crawler + POSTGRES_PASSWORD: secret + TZ: Europe/Berlin + ports: + - "55432:5432" + volumes: + # Persistente Daten + - c:/tmp/pgdata:/var/lib/postgresql + configs: + - source: init_sql + target: /docker-entrypoint-initdb.d/init.sql + restart: unless-stopped + + mserver-r1: + image: mediathekview/mserver:4.0.1-SNAPSHOT + environment: + MSERVER_OPTS: --config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R1.yaml + volumes: + - c:/tmp/filmlists:/filmlists + - c:/tmp/logs:/logs + - c:/tmp/hist:/hist + - c:/tmp/config:/config + + mserver-r2: + image: mediathekview/mserver:4.0.1-SNAPSHOT + environment: + MSERVER_OPTS: --config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R2.yaml + volumes: + - c:/tmp/filmlists:/filmlists + - c:/tmp/logs:/logs + - c:/tmp/hist:/hist + - c:/tmp/config:/config + + mserver-checkUrls: + image: mediathekview/mserver:4.0.1-SNAPSHOT + environment: + MSERVER_OPTS: --config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R2.yaml --flow checkAvailability + volumes: + - /volume1/docker/Projekt-mv2/filmlists:/filmlists + - /volume1/docker/Projekt-mv2/logs:/logs + - /volume1/docker/Projekt-mv2/hist:/hist + - /volume1/docker/Projekt-mv2/config:/config \ No newline at end of file diff --git a/src/main/docker/runDocker b/src/main/docker/runDocker new file mode 100644 index 000000000..c1e452092 --- /dev/null +++ b/src/main/docker/runDocker @@ -0,0 +1,18 @@ +# all docker command + +# run PG +docker compose up -d postgresMV + +# cron 01 01 long run +docker compose run -d --rm -e MSERVER_OPTS="--config /config/MServer-Config-R1.yaml" mserver-r1 + +# cron 06-22 1,31 short run +docker compose run -d --rm -e MSERVER_OPTS="--config /config/MServer-Config-R2.yaml" mserver-r2 + +# cron 22 55 url check +docker compose run -d --rm -e MSERVER_OPTS="--config /config/MServer-Config-R2.yaml --flow checkAvailability" mserver-checkUrls + +# on demand - do not run this unless you know what you are doing! +docker compose run -d --rm -e MSERVER_OPTS="--config /config/MServer-Config-R2.yaml --flow importFilmlistIntoDB" mserver-r3 + + diff --git a/src/main/java/de/mediathekview/mserver/base/config/ConfigManager.java b/src/main/java/de/mediathekview/mserver/base/config/ConfigManager.java index 6aa3a6de5..12ebbaef9 100644 --- a/src/main/java/de/mediathekview/mserver/base/config/ConfigManager.java +++ b/src/main/java/de/mediathekview/mserver/base/config/ConfigManager.java @@ -11,7 +11,7 @@ /** A manager to load configurations. */ public abstract class ConfigManager { private T config; - private static final Logger LOG = LogManager.getLogger(ConfigManager.class); + //private static final Logger LOG = LogManager.getLogger(ConfigManager.class); protected abstract String getConfigFileName(); @@ -58,7 +58,8 @@ public String getResourcePath(String resourceName) { } } } catch(Exception e) { - LOG.debug(e); + //LOG.debug(e); + e.printStackTrace(); } return null; } diff --git a/src/main/java/de/mediathekview/mserver/base/config/MServerConfigDTO.java b/src/main/java/de/mediathekview/mserver/base/config/MServerConfigDTO.java index 65562c0e6..e004a81e3 100644 --- a/src/main/java/de/mediathekview/mserver/base/config/MServerConfigDTO.java +++ b/src/main/java/de/mediathekview/mserver/base/config/MServerConfigDTO.java @@ -8,12 +8,13 @@ /** A POJO with the configs for MServer. */ public class MServerConfigDTO extends MServerBasicConfigDTO implements ConfigDTO { + private MServerDBConfig databaseConfig; private final MServerCopySettings copySettings; private final Boolean writeFilmlistHashFileEnabled; private final String filmlistHashFilePath; private final Boolean writeFilmlistIdFileEnabled; private final String filmlistIdFilePath; - private final MServerDBConfig mServerDBConfig; + /** ignore certain film by title **/ private String ignoreFilmlistPath; /** add livestreams from external list **/ @@ -49,7 +50,7 @@ public MServerConfigDTO() { filmlistSavePaths = new EnumMap<>(FilmlistFormats.class); filmlistDiffSavePaths = new EnumMap<>(FilmlistFormats.class); copySettings = new MServerCopySettings(); - mServerDBConfig = new MServerDBConfig(); + databaseConfig = new MServerDBConfig(); logSettings = new MServerLogSettingsDTO(); crawlerURLs = new EnumMap<>(CrawlerUrlType.class); @@ -245,8 +246,12 @@ public List getImportFilmlistConfigurations() { return importFilmlistConfigurations; } - public MServerDBConfig getMServerDBConfig() { - return mServerDBConfig; + public MServerDBConfig getDatabaseConfig() { + return databaseConfig; + } + + public void setDatabaseConfig(MServerDBConfig databaseConfig) { + this.databaseConfig = databaseConfig; } /** @@ -293,7 +298,7 @@ public boolean equals(final Object o) { && Objects.equals(getIgnoreFilmslistPath(), that.getIgnoreFilmslistPath()) && Objects.equals(getImportLivestreamConfiguration(), that.getImportLivestreamConfiguration()) && Objects.equals(getImportFilmlistConfigurations(), that.getImportFilmlistConfigurations()) - && Objects.equals(getMServerDBConfig(), that.getMServerDBConfig()); + && Objects.equals(getDatabaseConfig(), that.getDatabaseConfig()); } @Override @@ -321,7 +326,7 @@ public int hashCode() { getIgnoreFilmslistPath(), getImportLivestreamConfiguration(), getImportFilmlistConfigurations(), - getMServerDBConfig()); + getDatabaseConfig()); } public void initializeSenderConfigurations() { diff --git a/src/main/java/de/mediathekview/mserver/base/config/MServerDBConfig.java b/src/main/java/de/mediathekview/mserver/base/config/MServerDBConfig.java index 2953ec72e..7e6766ad2 100644 --- a/src/main/java/de/mediathekview/mserver/base/config/MServerDBConfig.java +++ b/src/main/java/de/mediathekview/mserver/base/config/MServerDBConfig.java @@ -3,21 +3,19 @@ import java.util.Objects; public class MServerDBConfig { - private final Boolean active; - private final String url; - private final String username; - private final String password; + private boolean active; + private String url; + private String username; + private String password; public MServerDBConfig() { - super(); active = true; - url = "jdbc:postgresql://OscarDS:55432/crawler"; + url = "jdbc:postgresql://postgresMV:55432/crawler"; username = "crawler"; password = "secret"; } public MServerDBConfig(Boolean active, String url, String username, String password) { - super(); this.active = active; this.url = url; this.username = username; @@ -38,6 +36,23 @@ public String getUsername() { public String getPassword() { return password; } + + + public void setActive(Boolean active) { + this.active = active; + } + + public void setUrl(String url) { + this.url = url; + } + + public void setUsername(String username) { + this.username = username; + } + + public void setPassword(String password) { + this.password = password; + } @Override public boolean equals(Object o) { diff --git a/src/main/java/de/mediathekview/mserver/base/uploader/copy/FileCopyTask.java b/src/main/java/de/mediathekview/mserver/base/uploader/copy/FileCopyTask.java index 7dc5927cd..2a69689dc 100644 --- a/src/main/java/de/mediathekview/mserver/base/uploader/copy/FileCopyTask.java +++ b/src/main/java/de/mediathekview/mserver/base/uploader/copy/FileCopyTask.java @@ -10,6 +10,8 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardCopyOption; +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; public class FileCopyTask extends UploadTask { private static final Logger LOG = LogManager.getLogger(FileCopyTask.class); @@ -25,10 +27,10 @@ protected void printMessage(final Message aMessage, final Object... args) { @Override protected void upload() { try { - if (Files.exists(uploadTarget.getTargetPath())) { - printMessage( - ServerMessages.FILE_COPY_TARGET_EXISTS, - uploadTarget.getTargetPath().toAbsolutePath().toString()); + Path target = uploadTarget.getTargetPath(); + if (Files.exists(target)) { + Path backup = backupExistingFile(target); + LOG.debug("CopyTask found existing file - rename existing file to {} before overwrite", backup.getFileName()); } Files.copy(sourcePath, uploadTarget.getTargetPath(), StandardCopyOption.REPLACE_EXISTING); } catch (final IOException ioException) { @@ -36,4 +38,25 @@ protected void upload() { printMessage(ServerMessages.FILE_COPY_ERROR); } } + + private Path backupExistingFile(Path target) throws IOException { + String fileName = target.getFileName().toString(); + Path dir = target.getParent(); + + String date = LocalDate.now() + .format(DateTimeFormatter.ofPattern("yyyy-MM-dd")); + + Path backup = dir.resolve(fileName + "." + date); + + int counter = 1; + while (Files.exists(backup)) { + backup = dir.resolve(fileName + "." + date + "." + counter); + counter++; + } + + Files.move(target, backup); + + return backup; + } + } diff --git a/src/main/java/de/mediathekview/mserver/base/utils/CheckUrlAvailability.java b/src/main/java/de/mediathekview/mserver/base/utils/CheckUrlAvailability.java index 7fd17d335..c6782d855 100644 --- a/src/main/java/de/mediathekview/mserver/base/utils/CheckUrlAvailability.java +++ b/src/main/java/de/mediathekview/mserver/base/utils/CheckUrlAvailability.java @@ -59,9 +59,6 @@ private boolean isAvailable(Film pFilm) { timeout.set(true); return true; } - if(pFilm.getDefaultUrl().isEmpty()) { - System.out.println("asdf"); - } String normalUrl = pFilm.getDefaultUrl().get().getUrl().toString(); ResponseInfo ri = fsd.getRequestInfo(normalUrl); diff --git a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java index 9fd47c0a0..42f1f87a4 100644 --- a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java +++ b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java @@ -110,7 +110,8 @@ public Optional readFilmlistFromDB() { } public Optional readFilmlistFromDB(String where) { - LOG.debug("fetch data from DB"); + long start = System.currentTimeMillis(); + LOG.debug("import filmlist from DB"); int readCounter = 0; Filmlist list = new Filmlist(); try (Connection con = dataSource.getConnection(); @@ -124,7 +125,7 @@ public Optional readFilmlistFromDB(String where) { readCounter++; } } - LOG.debug("Filmlist read {} records and imported {} records", readCounter, list.getFilms().size()); + LOG.debug("done reading in {} sec for {} elements resulting in {} elements", ((System.currentTimeMillis()-start)/1000), readCounter, list.getFilms().size()); return Optional.of(list); } catch (Exception e) { LOG.error(e); @@ -200,7 +201,8 @@ public List filterNewVideos(List videos, Function idExtract public HashSet getAllVideoUrls() { HashSet allVideoUrls = new HashSet(); String sql = """ - SELECT + SELECT + data ->> 'sender' sender, data -> 'urls' -> 'SMALL' ->> 'url' aSmall, data -> 'urls' -> 'NORMAL' ->> 'url' aNormal, data -> 'urls' -> 'HD' ->> 'url' aHD @@ -209,9 +211,9 @@ public HashSet getAllVideoUrls() { try (Connection con = dataSource.getConnection(); PreparedStatement ps = con.prepareStatement(sql)) { try (ResultSet rs = ps.executeQuery()) { while (rs.next()) { - allVideoUrls.add(rs.getString(1)); - allVideoUrls.add(rs.getString(2)); - allVideoUrls.add(rs.getString(3)); + allVideoUrls.add(rs.getString(1)+rs.getString(2)); + allVideoUrls.add(rs.getString(1)+rs.getString(3)); + allVideoUrls.add(rs.getString(1)+rs.getString(4)); } } } catch (SQLException e) { diff --git a/src/main/java/de/mediathekview/mserver/base/utils/PostgreSQLDataSourceProvider.java b/src/main/java/de/mediathekview/mserver/base/utils/PostgreSQLDataSourceProvider.java index e98d7c1c3..af245fb58 100644 --- a/src/main/java/de/mediathekview/mserver/base/utils/PostgreSQLDataSourceProvider.java +++ b/src/main/java/de/mediathekview/mserver/base/utils/PostgreSQLDataSourceProvider.java @@ -31,13 +31,13 @@ public static void shutdown() { private void init() { HikariConfig cfg = new HikariConfig(); - enabled = aMServerConfigManager.getConfig().getMServerDBConfig().getActive(); + enabled = aMServerConfigManager.getConfig().getDatabaseConfig().getActive(); if(!enabled) { return; } - cfg.setJdbcUrl(aMServerConfigManager.getConfig().getMServerDBConfig().getUrl()); - cfg.setUsername(aMServerConfigManager.getConfig().getMServerDBConfig().getUsername()); - cfg.setPassword(aMServerConfigManager.getConfig().getMServerDBConfig().getPassword()); + cfg.setJdbcUrl(aMServerConfigManager.getConfig().getDatabaseConfig().getUrl()); + cfg.setUsername(aMServerConfigManager.getConfig().getDatabaseConfig().getUsername()); + cfg.setPassword(aMServerConfigManager.getConfig().getDatabaseConfig().getPassword()); // === Pool Sizing (wichtig!) === cfg.setMaximumPoolSize(16); // Sweet Spot für 10k+/min diff --git a/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java b/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java index 90f8c7dae..b68f83481 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java +++ b/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java @@ -198,7 +198,9 @@ public void importLivestreamFilmlist(final FilmlistFormats aFormat, final String public void importFilmlist(final ImportFilmlistConfiguration importFilmlistConfiguration) { try { Optional importedFilmlist; - if (importFilmlistConfiguration.getPath().startsWith(HTTP)) { + if (importFilmlistConfiguration.getPath().startsWith("jdbc")) { + importedFilmlist = importFilmlistFromDB(); + } else if (importFilmlistConfiguration.getPath().startsWith(HTTP)) { importedFilmlist = importFilmListFromURl(importFilmlistConfiguration.getFormat(), importFilmlistConfiguration.getPath()); } else { importedFilmlist = importFilmlistFromFile(importFilmlistConfiguration.getFormat(), importFilmlistConfiguration.getPath()); @@ -455,6 +457,12 @@ private Set getCrawlerToRun() { return crawlerToRun; } + private Optional importFilmlistFromDB() throws IOException { + FilmDBService filmDBService = new FilmDBService(getExecutorService(), 2000); + Optional dbFilmlist = filmDBService.readFilmlistFromDB(); + return dbFilmlist; + } + private Optional importFilmlistFromFile( final FilmlistFormats aFormat, final String aFilmlistLocation) throws IOException { final Path filmlistPath = Paths.get(aFilmlistLocation); diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java index 1a3d214fd..0b3f478c5 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java @@ -242,6 +242,13 @@ private Optional> fallbackToM3U(Optional newUrls = new EnumMap<>(Resolution.class); resolutionUrlMapFromM3U.forEach((key, value) -> newUrls.put(key, value.toString())); + // + // TODO: FIXME + if (!resolutionUrlMapFromM3U.containsKey(Resolution.NORMAL)) { + Resolution anyResolution = resolutionUrlMapFromM3U.keySet().stream().findFirst().get(); + resolutionUrlMapFromM3U.put(Resolution.NORMAL, resolutionUrlMapFromM3U.get(anyResolution)); + resolutionUrlMapFromM3U.remove(anyResolution); + } return Optional.of(newUrls); } } catch (MalformedURLException | URISyntaxException e) { diff --git a/src/main/java/de/mediathekview/mserver/crawler/phoenix/PhoenixCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/phoenix/PhoenixCrawler.java index 8b165e3af..2516bb5e3 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/phoenix/PhoenixCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/phoenix/PhoenixCrawler.java @@ -49,7 +49,7 @@ protected RecursiveTask> createCrawlerTask() { Queue showsFiltered = this.filterExistingFilms(shows, v -> v.getUrl().substring(v.getUrl().lastIndexOf("/")+1)); // printMessage( - ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), shows.size()); + ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), showsFiltered.size()); getAndSetMaxCount(showsFiltered.size()); return new PhoenixFilmDetailTask( diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDeserializer.java index 40ebb92ec..028c06bc5 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDeserializer.java @@ -5,15 +5,20 @@ import com.google.gson.JsonDeserializer; import com.google.gson.JsonElement; import com.google.gson.JsonObject; + +import de.mediathekview.mserver.base.utils.JsonUtils; import de.mediathekview.mserver.base.utils.UrlUtils; -import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; import de.mediathekview.mserver.crawler.basic.TopicUrlDTO; +import de.mediathekview.mserver.crawler.zdf.ZdfConstants; import java.lang.reflect.Type; import java.util.Optional; -public class ZdfDayPageDeserializer implements JsonDeserializer { +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +public class ZdfDayPageDeserializer implements JsonDeserializer { + private static final Logger LOG = LogManager.getLogger(ZdfDayPageDeserializer.class); private static final String JSON_ELEMENT_ENTRIES = "http://zdf.de/rels/search/results"; private static final String JSON_ELEMENT_MAIN_VIDEO_CONTENT = "mainVideoContent"; private static final String JSON_ELEMENT_TARGET = "http://zdf.de/rels/target"; @@ -84,19 +89,15 @@ private Optional parseSearchEntry(final JsonObject aResultObject) { if (mainVideoTarget == null) { return Optional.empty(); } + final Optional tvService = JsonUtils.getElementValueAsString(target, "tvService"); + if (tvService.isPresent() && !ZdfConstants.PARTNER_TO_SENDER.containsKey(tvService.orElse("ZDF"))) { + return Optional.empty(); + } if (target.has(JSON_ATTRIBUTE_CANONICAL)) { String canonical = target.get(JSON_ATTRIBUTE_CANONICAL).getAsString(); String id = aResultObject.get("id").getAsString().replace("SCMS_", ""); canonical = UrlUtils.addDomainIfMissing(canonical, apiUrlBase); - if(id.contains("video_artede") - || id.contains("video-ard") - || id.contains("video-kika") - || id.contains("video_phoenix") - ) { - return Optional.empty(); - } - final TopicUrlDTO dto = new TopicUrlDTO(id, canonical); return Optional.of(dto); } diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializer.java index 7d745f288..3497a75c4 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializer.java @@ -110,7 +110,10 @@ public Optional deserialize( final Map downloadUrl = parseDownloadUrls(mainVideoTarget); if (title.isPresent()) { - final Optional id = JsonUtils.getElementValueAsString(aJsonObject, "id"); + Optional id = JsonUtils.getElementValueAsString(aJsonObject, "externalId"); + if (id.isPresent()) { + id = Optional.of(id.get().replace("SCMS_", "")); + } final Optional selfId = JsonUtils.getElementValueAsString(aJsonObject, "self"); if(id.isEmpty() && selfId.isEmpty()) { System.out.println("check"); diff --git a/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java b/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java index 20350f125..7722bb567 100644 --- a/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java +++ b/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java @@ -46,6 +46,8 @@ void start() { void startCrawlerFlow() { try { manager.start(); + manager.filterFilmlist(); + manager.storeFilmsToDB(); manager.importFilmlist(); manager.importLivestreamFilmlist(); } finally { @@ -55,9 +57,6 @@ void startCrawlerFlow() { manager.writeHashFile(); manager.writeIdFile(); manager.copyFilmlist(); - // - manager.storeFilmsToDB(); - // manager.stop(); } } @@ -87,11 +86,11 @@ void importFilmlistIntoDB() { LOG.debug("allVideoUrls loaded {} entries", allVideoUrls.size()); manager.getFilmlist().getFilms().entrySet().parallelStream() .forEach(entry -> { - if (allVideoUrls.contains(entry.getValue().getDefaultUrl().get().getUrl().toString())) { + if (allVideoUrls.contains(entry.getValue().getSender().name()+entry.getValue().getDefaultUrl().get().getUrl().toString())) { manager.getFilmlist().getFilms().remove(entry.getKey()); } }); - LOG.debug("removed to {} entries", manager.getFilmlist().getFilms().entrySet().size()); + LOG.debug("reduced to {} entries", manager.getFilmlist().getFilms().entrySet().size()); //manager.getFilmlist().getFilms().entrySet().removeIf(entry -> filmDBService.videoExistsByUrl(entry.getValue())); manager.getFilmlist().getFilms().entrySet().forEach(entry -> { var film = entry.getValue(); diff --git a/src/main/resources/MServer-Config.yaml b/src/main/resources/MServer-Config.yaml index aea212a75..88a72884a 100644 --- a/src/main/resources/MServer-Config.yaml +++ b/src/main/resources/MServer-Config.yaml @@ -188,11 +188,11 @@ copySettings: # JSON_COMPRESSED: /var/www/mediathekview/filmlisten/filmliste_diff.json.xz OLD_JSON_COMPRESSED_XZ: copyTarget/filmliste_old_diff.json.xz -mServerDBConfig: - active: false - url: "url" - username: "username" - password: "password" +databaseConfig: + active: false + url: "jdbc:postgresql://postgresMV:5432/crawler" + username: "crawler" + password: "secret" #### Logging #### logSettings: From 243e12cf1c5ea55081fd803f92c8eaa16c3cab44 Mon Sep 17 00:00:00 2001 From: CodingPF Date: Tue, 30 Dec 2025 10:12:03 +0100 Subject: [PATCH 06/23] update zdf junit --- MServer-Config.yaml | 2 +- .../mserver/filmlisten/writer/FilmlistOldFormatWriter.java | 2 +- .../mserver/crawler/arte/tasks/ArteVideoInfoTaskTest.java | 2 +- .../crawler/zdf/json/ZdfDayPageDeserializerTest.java | 7 ++----- .../mserver/crawler/zdf/tasks/ZdfDayPageTaskTest.java | 7 ++----- 5 files changed, 7 insertions(+), 13 deletions(-) diff --git a/MServer-Config.yaml b/MServer-Config.yaml index a9e6f2604..2772cd20e 100644 --- a/MServer-Config.yaml +++ b/MServer-Config.yaml @@ -125,7 +125,7 @@ importFilmlistConfigurations : format: OLD_JSON_COMPRESSED_XZ createDiff: false checkImportListUrl: false - - active: true + - active: false path: jdbc format: OLD_JSON createDiff: false diff --git a/src/main/java/de/mediathekview/mserver/filmlisten/writer/FilmlistOldFormatWriter.java b/src/main/java/de/mediathekview/mserver/filmlisten/writer/FilmlistOldFormatWriter.java index 1be3c7e7b..ccc8505ec 100644 --- a/src/main/java/de/mediathekview/mserver/filmlisten/writer/FilmlistOldFormatWriter.java +++ b/src/main/java/de/mediathekview/mserver/filmlisten/writer/FilmlistOldFormatWriter.java @@ -91,7 +91,7 @@ public boolean write(Filmlist filmlist, OutputStream outputStream) throws IOExce }); jsonWriter.endObject(); jsonWriter.flush(); - LOG.info("done writting in {} sec reading {} elements resulting in {} elements", ((System.currentTimeMillis()-start)/1000), cnt, filmlist.getFilms().size()); + LOG.info("done writting in {} sec reading {} elements resulting in {} elements", ((System.currentTimeMillis()-start)/1000), filmlist.getFilms().size(), cnt ); } catch (IOException e) { LOG.error(e); return false; diff --git a/src/test/java/de/mediathekview/mserver/crawler/arte/tasks/ArteVideoInfoTaskTest.java b/src/test/java/de/mediathekview/mserver/crawler/arte/tasks/ArteVideoInfoTaskTest.java index 6e34d12ae..3f28fc7c2 100644 --- a/src/test/java/de/mediathekview/mserver/crawler/arte/tasks/ArteVideoInfoTaskTest.java +++ b/src/test/java/de/mediathekview/mserver/crawler/arte/tasks/ArteVideoInfoTaskTest.java @@ -34,7 +34,7 @@ public void test() { } private void assertEntry(ArteVideoInfoDto act, Optional[] expected) { - assertEquals(act.getId(), expected[0]); + assertEquals(act.getId(), expected[0].get()); assertEquals(act.getKind(), expected[1]); assertEquals(act.getTitle(), expected[2]); assertEquals(act.getSubtitle(), expected[3]); diff --git a/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDeserializerTest.java b/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDeserializerTest.java index e03e88cf5..48f1e51d2 100644 --- a/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDeserializerTest.java +++ b/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDeserializerTest.java @@ -46,10 +46,6 @@ public static Collection data() { new CrawlerUrlDTO( "https://api.zdf.de/content/documents/olympia-im-technikwahn-100.json"), new CrawlerUrlDTO("https://api.zdf.de/content/documents/gestrandet-102.json"), - new CrawlerUrlDTO( - "https://api.zdf.de/content/documents/im-dialog-vom-23022018-100.json"), - new CrawlerUrlDTO( - "https://api.zdf.de/content/documents/augstein--blome-vom-23022018-100.json"), new CrawlerUrlDTO( "https://api.zdf.de/content/documents/menschen---das-magazin-vom-24-februar-2018-100.json"), new CrawlerUrlDTO("https://api.zdf.de/content/documents/die-orakel-krake-100.json"), @@ -112,6 +108,7 @@ public void deserializeTest() { assertThat(actual, notNullValue()); assertThat(actual.getNextPageUrl(), equalTo(expectedNextPageUrl)); assertThat(actual.getEntries().size(), equalTo(expectedEntries.length)); - assertThat(actual.getEntries(), Matchers.containsInAnyOrder(expectedEntries)); + actual.getEntries().stream().map(v->new CrawlerUrlDTO(v.getUrl())).toArray(CrawlerUrlDTO[]::new); + assertThat(actual.getEntries().stream().map(v->new CrawlerUrlDTO(v.getUrl())).toList(), Matchers.containsInAnyOrder(expectedEntries)); } } diff --git a/src/test/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfDayPageTaskTest.java b/src/test/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfDayPageTaskTest.java index ed9ab750b..2858d55ba 100644 --- a/src/test/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfDayPageTaskTest.java +++ b/src/test/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfDayPageTaskTest.java @@ -22,9 +22,6 @@ public void testWithSinglePage() { new CrawlerUrlDTO[] { new CrawlerUrlDTO("https://api.zdf.de/content/documents/olympia-im-technikwahn-100.json"), new CrawlerUrlDTO("https://api.zdf.de/content/documents/gestrandet-102.json"), - new CrawlerUrlDTO("https://api.zdf.de/content/documents/im-dialog-vom-23022018-100.json"), - new CrawlerUrlDTO( - "https://api.zdf.de/content/documents/augstein--blome-vom-23022018-100.json"), new CrawlerUrlDTO( "https://api.zdf.de/content/documents/menschen---das-magazin-vom-24-februar-2018-100.json"), new CrawlerUrlDTO("https://api.zdf.de/content/documents/die-orakel-krake-100.json"), @@ -39,7 +36,7 @@ public void testWithSinglePage() { final Collection actual = executeTask(requestUrl); assertThat(actual, notNullValue()); - assertThat(actual, Matchers.containsInAnyOrder(expectedEntries)); + assertThat(actual.stream().map(v -> new CrawlerUrlDTO(v.getUrl())).toList(), Matchers.containsInAnyOrder(expectedEntries)); } @Test @@ -58,7 +55,7 @@ public void testWithMultiplePages() { final Collection actual = executeTask(requestUrl); assertThat(actual, notNullValue()); - assertThat(actual.size(), equalTo(35)); + assertThat(actual.size(), equalTo(33)); } @Test From 474b45be396afda319c3d96e4cbf070b56960649 Mon Sep 17 00:00:00 2001 From: CodingPF Date: Fri, 2 Jan 2026 14:59:18 +0100 Subject: [PATCH 07/23] Update last seen logic --- .../java/de/mediathekview/mserver/base/utils/FilmDBService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java index 42f1f87a4..bbe047227 100644 --- a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java +++ b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java @@ -155,7 +155,7 @@ public List filterNewVideos(List videos, Function idExtract futures.add(executorService.submit(() -> { List newVideos = new ArrayList<>(); - String sql = "UPDATE filme SET last_update = now() WHERE id = ?"; + String sql = "UPDATE filme SET last_seen = now() WHERE id = ?"; try (Connection con = dataSource.getConnection(); PreparedStatement ps = con.prepareStatement(sql)) { From 8bb41f7fba1afee43ccc501b48c6eb49090a39bc Mon Sep 17 00:00:00 2001 From: CodingPF Date: Fri, 2 Jan 2026 19:27:21 +0100 Subject: [PATCH 08/23] update filter logic and av. check --- .../mserver/base/utils/CheckUrlAvailability.java | 1 - .../mserver/base/utils/FilmDBService.java | 15 ++++++--------- .../mserver/ui/config/MServerExecutionFlow.java | 2 +- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/src/main/java/de/mediathekview/mserver/base/utils/CheckUrlAvailability.java b/src/main/java/de/mediathekview/mserver/base/utils/CheckUrlAvailability.java index c6782d855..bc56f0195 100644 --- a/src/main/java/de/mediathekview/mserver/base/utils/CheckUrlAvailability.java +++ b/src/main/java/de/mediathekview/mserver/base/utils/CheckUrlAvailability.java @@ -10,7 +10,6 @@ import de.mediathekview.mserver.daten.Film; import de.mediathekview.mserver.daten.Filmlist; -import de.mediathekview.mserver.daten.Resolution; import de.mediathekview.mserver.daten.Sender; import de.mediathekview.mserver.base.utils.FileSizeDeterminer.ResponseInfo; diff --git a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java index bbe047227..58f3c7923 100644 --- a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java +++ b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java @@ -142,20 +142,18 @@ public List filterNewVideos(List videos, Function idExtract } try { List>> futures = new ArrayList<>(); - + // sort to avoid deadlocks List allVideos = videos.stream() .sorted(Comparator.comparing(idExtractor)) .toList(); - for (int i = 0; i < allVideos.size(); i += batchSize) { int from = i; int to = Math.min(i + batchSize, allVideos.size()); List batch = allVideos.subList(from, to); - futures.add(executorService.submit(() -> { List newVideos = new ArrayList<>(); - - String sql = "UPDATE filme SET last_seen = now() WHERE id = ?"; + // update every 7 days + String sql = "UPDATE filme SET last_seen = now() WHERE id = ? AND last_seen - last_update <= interval '7' DAY"; try (Connection con = dataSource.getConnection(); PreparedStatement ps = con.prepareStatement(sql)) { @@ -266,12 +264,11 @@ private int saveBatch(List films) throws SQLException, IOException { int successCounter = 0; String sql = """ - INSERT INTO filme (id, data, created_at, last_update) - VALUES (?, ?::jsonb, now(), now()) + INSERT INTO filme (id, data) + VALUES (?, ?::jsonb) ON CONFLICT (id) DO UPDATE SET data = EXCLUDED.data, - last_update = now(), - created_at = filme.created_at + last_update = now() """; try (Connection con = dataSource.getConnection(); PreparedStatement ps = con.prepareStatement(sql)) { diff --git a/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java b/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java index 7722bb567..e20ea521c 100644 --- a/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java +++ b/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java @@ -105,7 +105,7 @@ void importFilmlistIntoDB() { } void checkAvailability() { FilmDBService filmDBService = new FilmDBService(manager.getExecutorService(), 2000); - String condition = "where last_url_check IS NULL OR last_url_check < NOW() - INTERVAL '1 DAY'"; + String condition = "WHERE last_url_check < NOW() - INTERVAL '3' DAY LIMIT 400000"; Optional dbFilmlist = filmDBService.readFilmlistFromDB(condition); dbFilmlist.ifPresent(filmlist -> manager.getFilmlist().addAllFilms(filmlist.getFilms().values())); CheckUrlAvailability checkUrlAvailability = new CheckUrlAvailability( From f84cc5ac1388d7010c8a7ff5470af486f8f48aee Mon Sep 17 00:00:00 2001 From: CodingPF Date: Sat, 3 Jan 2026 01:21:07 +0100 Subject: [PATCH 09/23] fix db retrieve and add more logging --- .../mserver/base/utils/FilmDBService.java | 11 ++++++----- .../mserver/crawler/basic/AbstractCrawler.java | 3 ++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java index 58f3c7923..077260759 100644 --- a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java +++ b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java @@ -7,6 +7,7 @@ import de.mediathekview.mserver.daten.Filmlist; import de.mediathekview.mserver.daten.GsonDurationAdapter; import de.mediathekview.mserver.daten.GsonLocalDateTimeAdapter; +import de.mediathekview.mserver.daten.Sender; import javax.sql.DataSource; @@ -106,16 +107,16 @@ public void deleteFilms(Collection abandonedFilmlist) { ///////////////////////////////////////////////////////////////////////////////////////// public Optional readFilmlistFromDB() { - return readFilmlistFromDB(""); + return readFilmlistFromDB("", ""); } - public Optional readFilmlistFromDB(String where) { + public Optional readFilmlistFromDB(String where, String limit) { long start = System.currentTimeMillis(); LOG.debug("import filmlist from DB"); int readCounter = 0; Filmlist list = new Filmlist(); try (Connection con = dataSource.getConnection(); - PreparedStatement ps = con.prepareStatement("SELECT data FROM filme " + where + " ORDER BY data ->> 'sender', data ->> 'thema', data ->> 'titel'"); + PreparedStatement ps = con.prepareStatement("SELECT data FROM filme " + where + " ORDER BY data ->> 'sender', data ->> 'thema', data ->> 'titel' " + limit); ) { ps.setFetchSize(50000); try (ResultSet rs = ps.executeQuery()) { @@ -136,7 +137,7 @@ public Optional readFilmlistFromDB(String where) { ///////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////// - public List filterNewVideos(List videos, Function idExtractor) { + public List filterNewVideos(Sender sender, List videos, Function idExtractor) { if(!PostgreSQLDataSourceProvider.isEnabled()) { return videos; } @@ -183,7 +184,7 @@ public List filterNewVideos(List videos, Function idExtract for (Future> f : futures) { result.addAll(f.get()); } - LOG.debug("Filtered {} (in {} out {})",(videos.size()-result.size()), videos.size(), result.size()); + LOG.debug("Filtered {} in {} (in {} vs out {})",(videos.size()-result.size()), sender.getName(), videos.size(), result.size()); return result; } catch (Exception e) { LOG.error("{}", e); diff --git a/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java index ab089a43c..f5a6caacd 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java @@ -78,10 +78,11 @@ protected AbstractCrawler( public Queue filterExistingFilms(Collection input, Function idExtractor) { return new ArrayDeque<>( filmDBService.filterNewVideos( + getSender(), new ArrayList<>(input), idExtractor ) - ); + ); } @Override From 433605b0a35ade1c2fb9a4bdf200b88b274d1dcc Mon Sep 17 00:00:00 2001 From: CodingPF Date: Mon, 5 Jan 2026 18:48:53 +0100 Subject: [PATCH 10/23] move logic for daysToCrawl into util method --- .../mserver/base/utils/DateUtils.java | 26 +++++++++++++++++++ .../mserver/crawler/ard/ArdCrawler.java | 15 +++++------ .../crawler/dreisat/DreiSatCrawler.java | 18 ++++++------- .../mserver/crawler/orfon/OrfOnCrawler.java | 18 ++++++------- .../mserver/crawler/srf/SrfCrawler.java | 20 +++++--------- .../crawler/zdf/AbstractZdfCrawler.java | 23 +++++----------- .../mserver/crawler/zdf/ZdfCrawler.java | 22 ++++------------ 7 files changed, 67 insertions(+), 75 deletions(-) diff --git a/src/main/java/de/mediathekview/mserver/base/utils/DateUtils.java b/src/main/java/de/mediathekview/mserver/base/utils/DateUtils.java index 657385585..63a659cf7 100644 --- a/src/main/java/de/mediathekview/mserver/base/utils/DateUtils.java +++ b/src/main/java/de/mediathekview/mserver/base/utils/DateUtils.java @@ -1,5 +1,12 @@ package de.mediathekview.mserver.base.utils; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; + +import de.mediathekview.mserver.base.config.MServerBasicConfigDTO; + /** A set of util methods to work with dates. */ public class DateUtils { private static final String SPLITTED_NUMBERS_REGEX_PATTERN = "$1:$2"; @@ -14,4 +21,23 @@ private DateUtils() { public static String changeDateTimeForMissingISO8601Support(final String aDateTimeString) { return aDateTimeString.replaceAll(SPLIT_NUMBERS_REGEX_PATTERN, SPLITTED_NUMBERS_REGEX_PATTERN); } + + public static List generateDaysToCrawl(MServerBasicConfigDTO config) { + return generateDaysToCrawl( + config.getMaximumDaysForSendungVerpasstSection(), + config.getMaximumDaysForSendungVerpasstSectionFuture(), + DateTimeFormatter.ofPattern("yyyy-MM-dd")); + } + + public static List generateDaysToCrawl(int numberOfDaysInThePast, int numberOfDaysInTheFuture, DateTimeFormatter formatter) { + List days = new ArrayList<>(); + final LocalDateTime now = LocalDateTime.now(); + for (int i = 0; i <= numberOfDaysInThePast; i++) { + days.add(now.minusDays(i).format(formatter)); + } + for (int i = 1; i < numberOfDaysInTheFuture; i++) { + days.add(now.plusDays(i).format(formatter)); + } + return days; + } } diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java index 54c7cc922..1c50621cf 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java @@ -3,6 +3,7 @@ import de.mediathekview.mserver.daten.Film; import de.mediathekview.mserver.daten.Sender; import de.mediathekview.mserver.base.messages.listener.MessageListener; +import de.mediathekview.mserver.base.utils.DateUtils; import de.mediathekview.mserver.base.config.MServerConfigManager; import de.mediathekview.mserver.base.messages.ServerMessages; import de.mediathekview.mserver.crawler.ard.tasks.*; @@ -12,10 +13,10 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.Collection; import java.util.HashSet; +import java.util.List; import java.util.Queue; import java.util.Set; import java.util.concurrent.*; @@ -41,17 +42,13 @@ public Sender getSender() { private Queue createDayUrlsToCrawl() { final Queue dayUrlsToCrawl = new ConcurrentLinkedQueue<>(); - - final LocalDateTime now = LocalDateTime.now(); - for (int i = 0; i <= crawlerConfig.getMaximumDaysForSendungVerpasstSection(); i++) { - final String day = now.minusDays(i).format(DAY_PAGE_DATE_FORMATTER); - + final List days = DateUtils.generateDaysToCrawl(crawlerConfig); + days.forEach( dateString -> { for (final String client : ArdConstants.CLIENTS) { - final String url = - String.format(ArdConstants.DAY_PAGE_URL, day, client); + final String url = String.format(ArdConstants.DAY_PAGE_URL, dateString, client); dayUrlsToCrawl.offer(new CrawlerUrlDTO(url)); } - } + }); return dayUrlsToCrawl; } diff --git a/src/main/java/de/mediathekview/mserver/crawler/dreisat/DreiSatCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/dreisat/DreiSatCrawler.java index 17d2ef124..2a8631fe7 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/dreisat/DreiSatCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/dreisat/DreiSatCrawler.java @@ -2,6 +2,7 @@ import de.mediathekview.mserver.daten.Sender; import de.mediathekview.mserver.base.messages.listener.MessageListener; +import de.mediathekview.mserver.base.utils.DateUtils; import de.mediathekview.mserver.base.config.MServerConfigManager; import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; import de.mediathekview.mserver.crawler.zdf.AbstractZdfCrawler; @@ -9,10 +10,9 @@ import de.mediathekview.mserver.progress.listeners.SenderProgressListener; import org.jetbrains.annotations.NotNull; -import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; -import java.time.temporal.ChronoUnit; import java.util.Collection; +import java.util.List; import java.util.Queue; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ExecutionException; @@ -75,14 +75,14 @@ protected Collection getExtraDaysEntries() private Queue getExtraDayUrls() { final Queue urls = new ConcurrentLinkedQueue<>(); - for (int i = 0; i <= getMaximumDaysPast(); i++) { - - final LocalDateTime local = LocalDateTime.now().minus(i, ChronoUnit.DAYS); - final String date = local.format(DateTimeFormatter.ofPattern("yyyy-MM-dd")); - final String url = String.format(DreisatConstants.URL_HTML_DAY, date); + final List days = DateUtils.generateDaysToCrawl( + getMaximumDaysPast(), + crawlerConfig.getMaximumDaysForSendungVerpasstSectionFuture(), + DateTimeFormatter.ofPattern("yyyy-MM-dd")); + days.forEach( dateString -> { + final String url = String.format(DreisatConstants.URL_HTML_DAY, dateString); urls.add(new CrawlerUrlDTO(url)); - } - + }); return urls; } diff --git a/src/main/java/de/mediathekview/mserver/crawler/orfon/OrfOnCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/orfon/OrfOnCrawler.java index ec5e6968c..42c7dc452 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/orfon/OrfOnCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/orfon/OrfOnCrawler.java @@ -3,6 +3,7 @@ import de.mediathekview.mserver.daten.Film; import de.mediathekview.mserver.daten.Sender; import de.mediathekview.mserver.base.messages.listener.MessageListener; +import de.mediathekview.mserver.base.utils.DateUtils; import de.mediathekview.mserver.base.config.MServerConfigManager; import de.mediathekview.mserver.base.messages.ServerMessages; import de.mediathekview.mserver.crawler.basic.AbstractCrawler; @@ -17,10 +18,9 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import java.time.LocalDateTime; -import java.time.format.DateTimeFormatter; import java.util.Collection; import java.util.HashSet; +import java.util.List; import java.util.Queue; import java.util.Set; import java.util.concurrent.ConcurrentLinkedQueue; @@ -31,7 +31,6 @@ public class OrfOnCrawler extends AbstractCrawler { private static final Logger LOG = LogManager.getLogger(OrfOnCrawler.class); - private static final DateTimeFormatter DAY_PAGE_DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd"); public OrfOnCrawler( final ForkJoinPool aForkJoinPool, @@ -92,14 +91,15 @@ private Set processDayUrlsToCrawl() throws InterruptedExc return dayTaskFilms; } + + private Queue createDayUrlsToCrawl() { final Queue dayUrlsToCrawl = new ConcurrentLinkedQueue<>(); - final LocalDateTime now = LocalDateTime.now(); - for (int i = 0; i <= crawlerConfig.getMaximumDaysForSendungVerpasstSection(); i++) { - final String day = now.minusDays(i).format(DAY_PAGE_DATE_FORMATTER); - final String url = OrfOnConstants.SCHEDULE + "/" + day; - dayUrlsToCrawl.offer(new OrfOnBreadCrumsUrlDTO(day,url)); - } + final List days = DateUtils.generateDaysToCrawl(crawlerConfig); + days.forEach( dateString -> { + final String url = OrfOnConstants.SCHEDULE + "/" + dateString; + dayUrlsToCrawl.offer(new OrfOnBreadCrumsUrlDTO(dateString, url)); + }); return dayUrlsToCrawl; } diff --git a/src/main/java/de/mediathekview/mserver/crawler/srf/SrfCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/srf/SrfCrawler.java index d8b10e410..b65ed272f 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/srf/SrfCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/srf/SrfCrawler.java @@ -3,6 +3,7 @@ import de.mediathekview.mserver.daten.Film; import de.mediathekview.mserver.daten.Sender; import de.mediathekview.mserver.base.messages.listener.MessageListener; +import de.mediathekview.mserver.base.utils.DateUtils; import de.mediathekview.mserver.base.config.MServerConfigManager; import de.mediathekview.mserver.base.messages.ServerMessages; import de.mediathekview.mserver.crawler.basic.AbstractCrawler; @@ -15,11 +16,9 @@ import de.mediathekview.mserver.progress.listeners.SenderProgressListener; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; - -import java.time.LocalDateTime; -import java.time.format.DateTimeFormatter; import java.util.Collection; import java.util.HashSet; +import java.util.List; import java.util.Queue; import java.util.Set; import java.util.concurrent.ConcurrentLinkedQueue; @@ -31,9 +30,6 @@ public class SrfCrawler extends AbstractCrawler { private static final Logger LOG = LogManager.getLogger(SrfCrawler.class); - private static final DateTimeFormatter ISO_DATE_FORMAT = - DateTimeFormatter.ofPattern("yyyy-MM-dd"); - public SrfCrawler( final ForkJoinPool aForkJoinPool, final Collection aMessageListeners, @@ -93,15 +89,11 @@ protected RecursiveTask> createCrawlerTask() { private Queue createScheduleUrls() { final Queue scheduleUrls = new ConcurrentLinkedQueue<>(); - final LocalDateTime now = LocalDateTime.now(); - for (int i = 0; i <= crawlerConfig.getMaximumDaysForSendungVerpasstSection(); i++) { - final String day = now.minusDays(i).format(ISO_DATE_FORMAT); - final String url = String.format(SrfConstants.SCHEDULE_PER_DAY, day); + final List days = DateUtils.generateDaysToCrawl(crawlerConfig); + days.forEach( dateString -> { + final String url = String.format(SrfConstants.SCHEDULE_PER_DAY, dateString); scheduleUrls.offer(new CrawlerUrlDTO(url)); - } - LOG.debug("SRF crawler for schedule {} to {}", - now.minusDays(crawlerConfig.getMaximumDaysForSendungVerpasstSection()).format(ISO_DATE_FORMAT), - now.minusDays(0).format(ISO_DATE_FORMAT)); + }); return scheduleUrls; } diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/AbstractZdfCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/AbstractZdfCrawler.java index 242b4fa03..4ec3b040e 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/AbstractZdfCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/AbstractZdfCrawler.java @@ -3,6 +3,7 @@ import de.mediathekview.mserver.daten.Film; import de.mediathekview.mserver.daten.Sender; import de.mediathekview.mserver.base.messages.listener.MessageListener; +import de.mediathekview.mserver.base.utils.DateUtils; import de.mediathekview.mserver.base.config.MServerConfigManager; import de.mediathekview.mserver.base.messages.ServerMessages; import de.mediathekview.mserver.crawler.basic.AbstractCrawler; @@ -15,12 +16,10 @@ import org.apache.logging.log4j.Logger; import org.jetbrains.annotations.NotNull; -import java.time.LocalDateTime; -import java.time.format.DateTimeFormatter; -import java.time.temporal.ChronoUnit; import java.util.ArrayDeque; import java.util.Collection; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Queue; import java.util.Set; @@ -113,21 +112,11 @@ protected Collection getExtraDaysEntries() private Queue getDayUrls() { final Queue urls = new ConcurrentLinkedQueue<>(); - for (int i = 0; - i - <= crawlerConfig.getMaximumDaysForSendungVerpasstSection() - + crawlerConfig.getMaximumDaysForSendungVerpasstSectionFuture(); - i++) { - - final LocalDateTime local = - LocalDateTime.now() - .plus(crawlerConfig.getMaximumDaysForSendungVerpasstSectionFuture(), ChronoUnit.DAYS) - .minus(i, ChronoUnit.DAYS); - final String date = local.format(DateTimeFormatter.ofPattern("yyyy-MM-dd")); - final String url = String.format(getUrlDay(), date, date); + final List days = DateUtils.generateDaysToCrawl(crawlerConfig); + days.forEach( dateString -> { + final String url = String.format(getUrlDay(), dateString, dateString); urls.add(new CrawlerUrlDTO(url)); - } - + }); return urls; } diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfCrawler.java index fe09a4643..ed5f13b06 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfCrawler.java @@ -3,6 +3,7 @@ import de.mediathekview.mserver.daten.Film; import de.mediathekview.mserver.daten.Sender; import de.mediathekview.mserver.base.messages.listener.MessageListener; +import de.mediathekview.mserver.base.utils.DateUtils; import de.mediathekview.mserver.base.config.MServerConfigManager; import de.mediathekview.mserver.base.messages.ServerMessages; import de.mediathekview.mserver.crawler.basic.AbstractCrawler; @@ -11,9 +12,6 @@ import de.mediathekview.mserver.crawler.zdf.tasks.*; import de.mediathekview.mserver.progress.listeners.SenderProgressListener; -import java.time.LocalDateTime; -import java.time.format.DateTimeFormatter; -import java.time.temporal.ChronoUnit; import java.util.*; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ExecutionException; @@ -151,21 +149,11 @@ private Set getDaysEntries(final ZdfConfiguration configuration) private Queue getDayUrls() { final Queue urls = new ConcurrentLinkedQueue<>(); - for (int i = 0; - i - <= crawlerConfig.getMaximumDaysForSendungVerpasstSection() - + crawlerConfig.getMaximumDaysForSendungVerpasstSectionFuture(); - i++) { - - final LocalDateTime local = - LocalDateTime.now() - .plus(crawlerConfig.getMaximumDaysForSendungVerpasstSectionFuture(), ChronoUnit.DAYS) - .minus(i, ChronoUnit.DAYS); - final String date = local.format(DateTimeFormatter.ofPattern("yyyy-MM-dd")); - final String url = String.format(getUrlDay(), date, date); + final List days = DateUtils.generateDaysToCrawl(crawlerConfig); + days.forEach( dateString -> { + final String url = String.format(getUrlDay(), dateString, dateString); urls.add(new CrawlerUrlDTO(url)); - } - + }); return urls; } From 8857f337569ff2b81ffcc38f3a76e2157e2c4219 Mon Sep 17 00:00:00 2001 From: CodingPF Date: Mon, 5 Jan 2026 18:49:14 +0100 Subject: [PATCH 11/23] remove livestream from diff list --- .../java/de/mediathekview/mserver/crawler/CrawlerManager.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java b/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java index b68f83481..2997436a1 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java +++ b/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java @@ -217,8 +217,9 @@ public void importFilmlist(final ImportFilmlistConfiguration importFilmlistConfi } // final Filmlist difflist = new Filmlist(UUID.randomUUID(), LocalDateTime.now()); - importedFilmlist.ifPresent(value -> Film.addAllToFilmlist(Film.mergeTwoFilmlists(filmlist,value),difflist)); + importedFilmlist.ifPresent(value -> Film.addAllToFilmlist(Film.mergeTwoFilmlists(filmlist,value), difflist)); if (importFilmlistConfiguration.isCreateDiff()) { + difflist.getFilms().entrySet().removeIf(entry -> entry.getValue().getThema().equals("Livestream") && entry.getValue().getTitel().endsWith("Livestream") && entry.getValue().getTime().getHour() == 0); Film.addAllToFilmlist(difflist, differenceList); } } catch (final IOException ioException) { From 0faabee3a278796bb8e981b4e97120a49b789411 Mon Sep 17 00:00:00 2001 From: CodingPF Date: Mon, 5 Jan 2026 18:49:45 +0100 Subject: [PATCH 12/23] fix availability check --- .../mserver/ui/config/MServerExecutionFlow.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java b/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java index e20ea521c..de97fb55c 100644 --- a/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java +++ b/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java @@ -105,8 +105,9 @@ void importFilmlistIntoDB() { } void checkAvailability() { FilmDBService filmDBService = new FilmDBService(manager.getExecutorService(), 2000); - String condition = "WHERE last_url_check < NOW() - INTERVAL '3' DAY LIMIT 400000"; - Optional dbFilmlist = filmDBService.readFilmlistFromDB(condition); + String condition = "WHERE last_url_check < NOW() - INTERVAL '3' DAY"; + String limit = " LIMIT 400000"; + Optional dbFilmlist = filmDBService.readFilmlistFromDB(condition, limit); dbFilmlist.ifPresent(filmlist -> manager.getFilmlist().addAllFilms(filmlist.getFilms().values())); CheckUrlAvailability checkUrlAvailability = new CheckUrlAvailability( manager.getConfigManager().getConfig().getCheckImportListUrlMinSize(), From b6da2c9ab86fdec27c234508b99a36200b260f95 Mon Sep 17 00:00:00 2001 From: CodingPF Date: Mon, 5 Jan 2026 18:50:54 +0100 Subject: [PATCH 13/23] add column --- src/main/docker/docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/docker/docker-compose.yml b/src/main/docker/docker-compose.yml index 15a9879f0..f87268681 100644 --- a/src/main/docker/docker-compose.yml +++ b/src/main/docker/docker-compose.yml @@ -6,6 +6,7 @@ configs: id TEXT PRIMARY KEY, -- eindeutige Film-ID data JSONB NOT NULL, -- JSON-Daten des Films created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + last_seen TIMESTAMPTZ NOT NULL DEFAULT now(). last_update TIMESTAMPTZ NOT NULL DEFAULT now(), last_url_check TIMESTAMPTZ NOT NULL DEFAULT now() ); From 45c8d353d014ed570bbc50f96d40f4b583acf223 Mon Sep 17 00:00:00 2001 From: CodingPF Date: Fri, 9 Jan 2026 17:11:56 +0100 Subject: [PATCH 14/23] DB config via yaml, move atomic filmlist, backwards compatibility --- .../mserver/base/config/MServerDBConfig.java | 38 ++++++++++++++++++- .../base/uploader/copy/FileCopyTask.java | 4 +- .../mserver/base/utils/FilmDBService.java | 16 +++++--- .../mserver/crawler/CrawlerManager.java | 4 +- .../mserver/crawler/ard/ArdCrawler.java | 3 -- .../crawler/basic/AbstractCrawler.java | 2 +- .../crawler/dw/tasks/DwFilmDetailTask.java | 4 +- .../crawler/zdf/tasks/ZdfFilmTask.java | 2 +- .../writer/AbstractFilmlistWriter.java | 17 +++++++-- .../writer/FilmlistOldFormatWriter.java | 10 ++++- .../ui/config/MServerExecutionFlow.java | 15 ++++++-- src/main/resources/MServer-Config.yaml | 3 ++ 12 files changed, 93 insertions(+), 25 deletions(-) diff --git a/src/main/java/de/mediathekview/mserver/base/config/MServerDBConfig.java b/src/main/java/de/mediathekview/mserver/base/config/MServerDBConfig.java index 7e6766ad2..1e78413d3 100644 --- a/src/main/java/de/mediathekview/mserver/base/config/MServerDBConfig.java +++ b/src/main/java/de/mediathekview/mserver/base/config/MServerDBConfig.java @@ -7,23 +7,57 @@ public class MServerDBConfig { private String url; private String username; private String password; + private Integer refreshIntervalInDays; + private Integer checkUrlIntervalInDays; + private Integer batchSize; public MServerDBConfig() { - active = true; + active = false; url = "jdbc:postgresql://postgresMV:55432/crawler"; username = "crawler"; password = "secret"; + refreshIntervalInDays = 7; + checkUrlIntervalInDays = 3; + batchSize = 2000; } - public MServerDBConfig(Boolean active, String url, String username, String password) { + public MServerDBConfig(Boolean active, String url, String username, String password, int refreshIntervalInDays, int checkUrlIntervalInDays, int batchSize ) { this.active = active; this.url = url; this.username = username; this.password = password; + this.refreshIntervalInDays = refreshIntervalInDays; + this.checkUrlIntervalInDays = checkUrlIntervalInDays; + this.batchSize = batchSize; } + public Integer getBatchSize() { + return batchSize; + } + + public void setBatchSize(Integer batchSize) { + this.batchSize = batchSize; + } + + public Integer getRefreshIntervalInDays() { + return refreshIntervalInDays; + } + + public void setRefreshIntervalInDays(Integer refreshIntervalInDays) { + this.refreshIntervalInDays = refreshIntervalInDays; + } + + public Integer getCheckUrlIntervalInDays() { + return checkUrlIntervalInDays; + } + + public void setCheckUrlIntervalInDays(Integer checkUrlIntervalInDays) { + this.checkUrlIntervalInDays = checkUrlIntervalInDays; + } + + public Boolean getActive() { return active; } diff --git a/src/main/java/de/mediathekview/mserver/base/uploader/copy/FileCopyTask.java b/src/main/java/de/mediathekview/mserver/base/uploader/copy/FileCopyTask.java index 2a69689dc..90f4278fd 100644 --- a/src/main/java/de/mediathekview/mserver/base/uploader/copy/FileCopyTask.java +++ b/src/main/java/de/mediathekview/mserver/base/uploader/copy/FileCopyTask.java @@ -32,7 +32,9 @@ protected void upload() { Path backup = backupExistingFile(target); LOG.debug("CopyTask found existing file - rename existing file to {} before overwrite", backup.getFileName()); } - Files.copy(sourcePath, uploadTarget.getTargetPath(), StandardCopyOption.REPLACE_EXISTING); + Path tmpTarget = Files.createTempFile( uploadTarget.getTargetPath().getParent(), uploadTarget.getTargetPath().getFileName().toString(), ".tmp"); + Files.copy(sourcePath, tmpTarget, StandardCopyOption.REPLACE_EXISTING); + Files.move(tmpTarget, target, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE); } catch (final IOException ioException) { LOG.error("Something went wrong on copying the film list.", ioException); printMessage(ServerMessages.FILE_COPY_ERROR); diff --git a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java index 077260759..03e20272b 100644 --- a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java +++ b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java @@ -45,11 +45,13 @@ public class FilmDBService { private final Gson gson; private final ExecutorService executorService; private final int batchSize; + private final Integer refreshIntervalInDays; - public FilmDBService(ExecutorService executorService, int batchSize) { + public FilmDBService(ExecutorService executorService, int batchSize, int refreshIntervalInDays) { this.dataSource = PostgreSQLDataSourceProvider.get(); this.executorService = executorService; this.batchSize = batchSize; + this.refreshIntervalInDays = refreshIntervalInDays; this.gson = new GsonBuilder().registerTypeAdapter(LocalDateTime.class, new GsonLocalDateTimeAdapter()) .registerTypeAdapter(Duration.class, new GsonDurationAdapter()).create(); @@ -153,10 +155,14 @@ public List filterNewVideos(Sender sender, List videos, Function batch = allVideos.subList(from, to); futures.add(executorService.submit(() -> { List newVideos = new ArrayList<>(); - // update every 7 days - String sql = "UPDATE filme SET last_seen = now() WHERE id = ? AND last_seen - last_update <= interval '7' DAY"; - - try (Connection con = dataSource.getConnection(); PreparedStatement ps = con.prepareStatement(sql)) { + StringBuffer sql = new StringBuffer(); + sql.append("UPDATE filme SET last_seen = now() ") + .append("WHERE id = ? AND (") + .append("( cast(created_at as date) = cast(last_update as date) and cast(created_at as date) <> cast(now() as date) )") + .append(" OR ") + .append("(last_seen - last_update <= interval '").append(refreshIntervalInDays).append("' DAY)") + .append(")"); + try (Connection con = dataSource.getConnection(); PreparedStatement ps = con.prepareStatement(sql.toString())) { for (T video : batch) { String id = idExtractor.apply(video); diff --git a/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java b/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java index 2997436a1..194d469ad 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java +++ b/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java @@ -95,7 +95,7 @@ public MServerConfigManager getConfigManager() { } public void storeFilmsToDB() { - FilmDBService filmDBService = new FilmDBService(executorService, 200); + FilmDBService filmDBService = new FilmDBService(executorService, getConfigManager().getConfig().getDatabaseConfig().getBatchSize(), getConfigManager().getConfig().getDatabaseConfig().getRefreshIntervalInDays()); try { filmDBService.saveAll(filmlist); } catch (Exception e) { @@ -459,7 +459,7 @@ private Set getCrawlerToRun() { } private Optional importFilmlistFromDB() throws IOException { - FilmDBService filmDBService = new FilmDBService(getExecutorService(), 2000); + FilmDBService filmDBService = new FilmDBService(getExecutorService(), getConfigManager().getConfig().getDatabaseConfig().getBatchSize(), getConfigManager().getConfig().getDatabaseConfig().getRefreshIntervalInDays()); Optional dbFilmlist = filmDBService.readFilmlistFromDB(); return dbFilmlist; } diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java index 1c50621cf..ac9304116 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java @@ -13,7 +13,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import java.time.format.DateTimeFormatter; import java.util.Collection; import java.util.HashSet; import java.util.List; @@ -24,8 +23,6 @@ public class ArdCrawler extends AbstractCrawler { private static final Logger LOG = LogManager.getLogger(ArdCrawler.class); - private static final DateTimeFormatter DAY_PAGE_DATE_FORMATTER = - DateTimeFormatter.ofPattern("yyyy-MM-dd"); public ArdCrawler( final ForkJoinPool aForkJoinPool, diff --git a/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java index f5a6caacd..6f8d7b991 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractCrawler.java @@ -71,7 +71,7 @@ protected AbstractCrawler( rootConfig.getSenderConfig(getSender()).getSocketTimeoutInSeconds(), runtimeConfig.getMaximumCpuThreads()); rateLimiter = RateLimiter.create(rootConfig.getSenderConfig(getSender()).getMaximumRequestsPerSecond()); - filmDBService = new FilmDBService(forkJoinPool, 200); + filmDBService = new FilmDBService(forkJoinPool, getRuntimeConfig().getDatabaseConfig().getBatchSize(), getRuntimeConfig().getDatabaseConfig().getRefreshIntervalInDays()); films = ConcurrentHashMap.newKeySet(); } diff --git a/src/main/java/de/mediathekview/mserver/crawler/dw/tasks/DwFilmDetailTask.java b/src/main/java/de/mediathekview/mserver/crawler/dw/tasks/DwFilmDetailTask.java index a8058796d..3c7520e9c 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/dw/tasks/DwFilmDetailTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/dw/tasks/DwFilmDetailTask.java @@ -4,7 +4,6 @@ import de.mediathekview.mserver.daten.Film; import de.mediathekview.mserver.crawler.basic.AbstractCrawler; import de.mediathekview.mserver.crawler.basic.AbstractRecursiveConverterTask; -import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; import de.mediathekview.mserver.crawler.basic.TopicUrlDTO; import de.mediathekview.mserver.crawler.dw.DWTaskBase; import de.mediathekview.mserver.crawler.dw.parser.DwFilmDetailDeserializer; @@ -46,7 +45,8 @@ protected void processRestTarget(final TopicUrlDTO aDTO, final WebTarget aTarget } catch (Exception e) { LOG.error("error processing {} ", aDTO.getUrl(), e); } - if (filmDetailDtoOptional.isEmpty()) { + // Optional can be null if response code is 200 and response body is empty + if (filmDetailDtoOptional == null || filmDetailDtoOptional.isEmpty()) { crawler.incrementAndGetErrorCount(); crawler.updateProgress(); return; diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmTask.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmTask.java index b60472f6c..a103d8900 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmTask.java @@ -108,7 +108,7 @@ private static void updateTitle(final String aLanguage, final Film aFilm) { @Override protected void processRestTarget(ZdfFilmDto aDTO, WebTarget aTarget) { final Optional downloadDto = - deserialize(aTarget, OPTIONAL_DOWNLOAD_DTO_TYPE_TOKEN); + deserializeOptional(aTarget, OPTIONAL_DOWNLOAD_DTO_TYPE_TOKEN); if (downloadDto.isPresent()) { try { addFilm(downloadDto.get(), createFilm(aDTO, downloadDto.get()), aDTO.getVideoType()); diff --git a/src/main/java/de/mediathekview/mserver/filmlisten/writer/AbstractFilmlistWriter.java b/src/main/java/de/mediathekview/mserver/filmlisten/writer/AbstractFilmlistWriter.java index 3f926caf6..ec7f1b44b 100644 --- a/src/main/java/de/mediathekview/mserver/filmlisten/writer/AbstractFilmlistWriter.java +++ b/src/main/java/de/mediathekview/mserver/filmlisten/writer/AbstractFilmlistWriter.java @@ -8,7 +8,10 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.StandardCopyOption; + import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -26,9 +29,17 @@ protected AbstractFilmlistWriter(final MessageListener... aListeners) { public abstract boolean write(Filmlist filmlist, OutputStream outputStream) throws IOException; public boolean write(Filmlist filmlist, Path savePath) { - try (final OutputStream os = new FileOutputStream(savePath.toFile()); - final BufferedOutputStream fos = new BufferedOutputStream(os, 512000)) { - return write(filmlist, fos); + try { + Path tmpTarget = Files.createTempFile( savePath.getParent(), savePath.getFileName().toString(), ".tmp"); + try (final OutputStream os = new FileOutputStream(tmpTarget.toFile()); + final BufferedOutputStream fos = new BufferedOutputStream(os, 512000)) { + boolean succuess = write(filmlist, fos); + fos.close(); + if (succuess) { + Files.move(tmpTarget, savePath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE); + } + return succuess; + } } catch (final IOException ioException) { LOG.debug("Something went wrong on writing the film list.", ioException); publishMessage(FilmListMessages.FILMLIST_WRITE_ERROR, savePath.toAbsolutePath().toString()); diff --git a/src/main/java/de/mediathekview/mserver/filmlisten/writer/FilmlistOldFormatWriter.java b/src/main/java/de/mediathekview/mserver/filmlisten/writer/FilmlistOldFormatWriter.java index ccc8505ec..192d30f7d 100644 --- a/src/main/java/de/mediathekview/mserver/filmlisten/writer/FilmlistOldFormatWriter.java +++ b/src/main/java/de/mediathekview/mserver/filmlisten/writer/FilmlistOldFormatWriter.java @@ -16,6 +16,7 @@ import java.io.OutputStreamWriter; import java.net.URL; import java.nio.charset.StandardCharsets; +import java.time.LocalDateTime; import java.time.LocalTime; import java.time.ZoneId; import java.time.ZoneOffset; @@ -41,6 +42,7 @@ public class FilmlistOldFormatWriter extends AbstractFilmlistWriter { private static final char GEO_SPLITTERATOR = '-'; private static final DateTimeFormatter DATE_TIME_FORMAT = DateTimeFormatter.ofLocalizedDateTime(MEDIUM, SHORT).withLocale(Locale.GERMANY); + private static final LocalDateTime EMPTY_DATE_TIME= LocalDateTime.of(1970, 1, 1, 00, 00, 00); private static final String META_HEADER_VERSION = "4"; private static final String META_HEADER_VERSION_LONG = "MSearch [Vers.: 4.0.1]"; @@ -196,10 +198,16 @@ private String writeRecord03Titel(AbstractMediaResource in) { } private String writeRecord04Datum(AbstractMediaResource in) { + if(in.getTime().isEqual(EMPTY_DATE_TIME)) { + return ""; + } return in.getTime().format(DATE_FORMATTER); } private String writeRecord05Zeit(AbstractMediaResource in) { + if(in.getTime().isEqual(EMPTY_DATE_TIME)) { + return ""; + } return in.getTime().format(TIME_FORMATTER); } @@ -211,7 +219,7 @@ private String writeRecord06Dauer(AbstractMediaResource in) { } private String writeRecord07Groesse(AbstractMediaResource in) { - if ((in instanceof Podcast pIn) && pIn.getUrl(Resolution.NORMAL) != null) + if ((in instanceof Podcast pIn) && pIn.getUrl(Resolution.NORMAL) != null && pIn.getUrl(Resolution.NORMAL).getFileSize() != 0) return (pIn.getUrl(Resolution.NORMAL).getFileSize()/1024) + ""; return ""; } diff --git a/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java b/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java index de97fb55c..60b758603 100644 --- a/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java +++ b/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java @@ -11,6 +11,7 @@ import de.mediathekview.mserver.base.utils.FilmDBService; import de.mediathekview.mserver.crawler.CrawlerManager; import de.mediathekview.mserver.daten.Filmlist; +import de.mediathekview.mserver.daten.Resolution; import de.mediathekview.mserver.ui.config.MServerCommandLine.CMDARG; public class MServerExecutionFlow { @@ -63,7 +64,7 @@ void startCrawlerFlow() { void exportFilmListFromDB() { try { - FilmDBService filmDBService = new FilmDBService(manager.getExecutorService(), 2000); + FilmDBService filmDBService = new FilmDBService(manager.getExecutorService(), 2000, -1); Optional dbFilmlist = filmDBService.readFilmlistFromDB(); dbFilmlist.ifPresent(filmlist -> manager.getFilmlist().addAllFilms(filmlist.getFilms().values())); // @@ -81,12 +82,15 @@ void exportFilmListFromDB() { } void importFilmlistIntoDB() { manager.importFilmlist(); - FilmDBService filmDBService = new FilmDBService(manager.getExecutorService(), 2000); + FilmDBService filmDBService = new FilmDBService(manager.getExecutorService(), 2000, -1); HashSet allVideoUrls = filmDBService.getAllVideoUrls(); LOG.debug("allVideoUrls loaded {} entries", allVideoUrls.size()); manager.getFilmlist().getFilms().entrySet().parallelStream() .forEach(entry -> { - if (allVideoUrls.contains(entry.getValue().getSender().name()+entry.getValue().getDefaultUrl().get().getUrl().toString())) { + if (allVideoUrls.contains(entry.getValue().getSender().name()+entry.getValue().getDefaultUrl().get().getUrl().toString()) || + (entry.getValue().getUrl(Resolution.SMALL) != null && allVideoUrls.contains(entry.getValue().getSender().name()+entry.getValue().getUrl(Resolution.SMALL).getUrl().toString())) || + (entry.getValue().getUrl(Resolution.HD) != null && allVideoUrls.contains(entry.getValue().getSender().name()+entry.getValue().getUrl(Resolution.HD).getUrl().toString())) + ) { manager.getFilmlist().getFilms().remove(entry.getKey()); } }); @@ -104,7 +108,10 @@ void importFilmlistIntoDB() { manager.stop(); } void checkAvailability() { - FilmDBService filmDBService = new FilmDBService(manager.getExecutorService(), 2000); + FilmDBService filmDBService = new FilmDBService( + manager.getExecutorService(), + manager.getConfigManager().getConfig().getDatabaseConfig().getBatchSize(), + manager.getConfigManager().getConfig().getDatabaseConfig().getRefreshIntervalInDays()); String condition = "WHERE last_url_check < NOW() - INTERVAL '3' DAY"; String limit = " LIMIT 400000"; Optional dbFilmlist = filmDBService.readFilmlistFromDB(condition, limit); diff --git a/src/main/resources/MServer-Config.yaml b/src/main/resources/MServer-Config.yaml index 88a72884a..8f54843dc 100644 --- a/src/main/resources/MServer-Config.yaml +++ b/src/main/resources/MServer-Config.yaml @@ -193,6 +193,9 @@ databaseConfig: url: "jdbc:postgresql://postgresMV:5432/crawler" username: "crawler" password: "secret" + refreshIntervalInDays: 7 + checkUrlIntervalInDays: 3 + batchSize: 2000 #### Logging #### logSettings: From 582ca850f9532fd5a7493f6e5b6497bc16a261a0 Mon Sep 17 00:00:00 2001 From: CodingPF Date: Fri, 23 Jan 2026 07:22:38 +0100 Subject: [PATCH 15/23] zdf partner2sender, ard generate urls, checkUrlAv --- MServer-Config.yaml | 27 +- src/main/docker/runDocker | 3 +- .../mserver/base/utils/FilmDBService.java | 55 ++- .../mserver/crawler/ard/ArdConstants.java | 2 +- .../mserver/crawler/ard/UrlOptimizer.java | 239 ++++++++++ .../crawler/ard/json/ArdFilmDeserializer.java | 37 +- .../crawler/ard/json/ArdVideoInfoDto.java | 11 + .../crawler/zdf/AbstractZdfCrawler.java | 2 +- .../mserver/crawler/zdf/ZdfCrawler.java | 2 +- .../zdf/json/ZdfDayPageDeserializer.java | 9 +- .../crawler/zdf/tasks/ZdfDayPageTask.java | 13 +- .../ui/config/MServerExecutionFlow.java | 17 +- .../mserver/crawler/ard/UrlOptimizerTest.java | 420 ++++++++++++++++++ .../zdf/json/ZdfDayPageDeserializerTest.java | 2 +- .../crawler/zdf/tasks/ZdfDayPageTaskTest.java | 2 +- 15 files changed, 796 insertions(+), 45 deletions(-) create mode 100644 src/main/java/de/mediathekview/mserver/crawler/ard/UrlOptimizer.java create mode 100644 src/test/java/de/mediathekview/mserver/crawler/ard/UrlOptimizerTest.java diff --git a/MServer-Config.yaml b/MServer-Config.yaml index 2772cd20e..d4684761d 100644 --- a/MServer-Config.yaml +++ b/MServer-Config.yaml @@ -6,6 +6,9 @@ databaseConfig: url: "jdbc:postgresql://localhost:55432/crawler" username: "crawler" password: "secret" + refreshIntervalInDays: 7 + checkUrlIntervalInDays: 3 + batchSize: 2000 # The maximum amount of cpu threads to be used. maximumCpuThreads: 10 @@ -24,7 +27,7 @@ maximumRequestsPerSecond: 999.0 # If set only these Sender will be crawled all other will be ignored. senderIncluded: - #- ARD + - ARD #- ARTE_DE #- ARTE_FR #- ARTE_PL @@ -37,7 +40,7 @@ senderIncluded: #- ORF #- PHOENIX #- SRF - - SR + #- SR #- ZDF #SRF,SR,PHONIX,ORF,KIKA,DW,3SAT< @@ -105,8 +108,8 @@ filmlistIdFilePath: target/filmlists/filmlist.id.xx # import additional filmlist sources importFilmlistConfigurations : - - active: false - path: "someCrawlerlist.json" + - active: true + path: "Filmliste-akt" format: OLD_JSON createDiff: false checkImportListUrl: false @@ -157,7 +160,7 @@ maximumSubpages: 5 maximumDaysForSendungVerpasstSection: 7 # The maximum amount of days going to future will be crawled for the "Sendung Verpasst?" section. -maximumDaysForSendungVerpasstSectionFuture: 0 +maximumDaysForSendungVerpasstSectionFuture: 3 # The time in seconds before a socket connection should time out. socketTimeoutInSeconds: 60 @@ -167,11 +170,12 @@ socketTimeoutInSeconds: 60 senderConfigurations: ARD: # Actually the ARD has a maximum of 6 days in the past - maximumDaysForSendungVerpasstSection: 1 + maximumDaysForSendungVerpasstSection: 6 + maximumDaysForSendungVerpasstSectionFuture: 6 #2,4,8 ok maximumUrlsPerTask: 32 #10,20,40 ok - maximumSubpages: 0 + maximumSubpages: 40 ORF: maximumRequestsPerSecond: 10.0 ARTE_DE: @@ -195,12 +199,13 @@ senderConfigurations: FUNK: maximumUrlsPerTask: 99 DREISAT: - maximumSubpages: 5 - maximumDaysForSendungVerpasstSection: 60 + maximumSubpages: 15 + maximumDaysForSendungVerpasstSection: 30 + maximumDaysForSendungVerpasstSectionFuture: 20 PHOENIX: maximumSubpages: 500 SRF: - maximumSubpages: 1 + maximumSubpages: 25 #### COPY #### copySettings: @@ -258,4 +263,4 @@ logSettings: # The pattern of the file name of the archived log files. # See: https://logging.apache.org/log4j/2.0/manual/appenders.html#RollingFileAppender - logFileRollingPattern: logs/${date:yyyy-MM}/server-%d{MM-dd-yyyy}-%i.log \ No newline at end of file + logFileRollingPattern: logs/${date:yyyy-MM}/server-%d{MM-dd-yyyy-HH}-%i.log \ No newline at end of file diff --git a/src/main/docker/runDocker b/src/main/docker/runDocker index c1e452092..fdd073e48 100644 --- a/src/main/docker/runDocker +++ b/src/main/docker/runDocker @@ -15,4 +15,5 @@ docker compose run -d --rm -e MSERVER_OPTS="--config /config/MServer-Config-R2.y # on demand - do not run this unless you know what you are doing! docker compose run -d --rm -e MSERVER_OPTS="--config /config/MServer-Config-R2.yaml --flow importFilmlistIntoDB" mserver-r3 - +## docker save -o mserver.tar mediathekview/mserver:4.0.1-SNAPSHOT +## docker load -i mserver.tar diff --git a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java index 03e20272b..24a6144dd 100644 --- a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java +++ b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java @@ -66,12 +66,49 @@ public void update(String sql) { } } + ///////////////////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////////// + + public void updateLastUrlCheck(List checked) { + try { + AtomicInteger updateCounter = new AtomicInteger(0); + List> futures = new ArrayList<>(); + List allVideos = checked.stream() + .sorted(Comparator.comparing(Film::getId)) + .toList(); + for (int i = 0; i < allVideos.size(); i += batchSize) { + int from = i; + int to = Math.min(i + batchSize, allVideos.size()); + List batch = allVideos.subList(from, to); + futures.add(executorService.submit(() -> { + String sql = "UPDATE filme SET last_url_check = NOW() WHERE id = ?"; + try (Connection con = dataSource.getConnection(); PreparedStatement ps = con.prepareStatement(sql)) { + for (Film video : batch) { + ps.setString(1, video.getId()); + ps.addBatch(); + } + int [] rs = ps.executeBatch(); + for (int rsCode : rs) { + updateCounter.addAndGet(rsCode); + } + } catch (SQLException e) { + LOG.error(e); + } + })); + } + futures.forEach( f -> {try { f.get(); } catch(Exception e) { LOG.error("{}",e); }}); + LOG.debug("updated lastUrlCheck {}", updateCounter.get()); + } catch (Exception e) { + LOG.error(e); + } + } + ///////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////// public void deleteFilms(Collection abandonedFilmlist) { try { - List>> futures = new ArrayList<>(); + List> futures = new ArrayList<>(); List allVideos = abandonedFilmlist.stream() .sorted(Comparator.comparing(Film::getId)) .toList(); @@ -80,7 +117,6 @@ public void deleteFilms(Collection abandonedFilmlist) { int to = Math.min(i + batchSize, allVideos.size()); List batch = allVideos.subList(from, to); futures.add(executorService.submit(() -> { - List newVideos = new ArrayList<>(); String sql = "DELETE FROM filme WHERE id = ?"; try (Connection con = dataSource.getConnection(); PreparedStatement ps = con.prepareStatement(sql)) { for (Film video : batch) { @@ -91,13 +127,9 @@ public void deleteFilms(Collection abandonedFilmlist) { } catch (SQLException e) { LOG.error(e); } - return newVideos; })); } - List result = new ArrayList<>(); - for (Future> f : futures) { - result.addAll(f.get()); - } + futures.forEach( f -> {try { f.get(); } catch(Exception e) { LOG.error("{}",e); }}); LOG.debug("deleted {}", abandonedFilmlist.size()); } catch (Exception e) { @@ -157,13 +189,10 @@ public List filterNewVideos(Sender sender, List videos, Function newVideos = new ArrayList<>(); StringBuffer sql = new StringBuffer(); sql.append("UPDATE filme SET last_seen = now() ") - .append("WHERE id = ? AND (") - .append("( cast(created_at as date) = cast(last_update as date) and cast(created_at as date) <> cast(now() as date) )") - .append(" OR ") - .append("(last_seen - last_update <= interval '").append(refreshIntervalInDays).append("' DAY)") - .append(")"); + .append("WHERE id = ? ") + .append("AND NOT( created_at::date = last_update::date and last_update::date <> CURRENT_DATE ) ") + .append("AND NOT( last_seen - last_update >= interval '").append(refreshIntervalInDays).append("' DAY)"); try (Connection con = dataSource.getConnection(); PreparedStatement ps = con.prepareStatement(sql.toString())) { - for (T video : batch) { String id = idExtractor.apply(video); if (id != null) { diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdConstants.java b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdConstants.java index 4859ebb0c..029659829 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdConstants.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdConstants.java @@ -19,7 +19,7 @@ public class ArdConstants { public static final String DAY_PAGE_URL = "https://programm-api.ard.de/program/api/program?day=%s&channelIds=%s&mode=channel"; public static final int TOPICS_COMPILATION_PAGE_SIZE = 200; - public static final int TOPIC_PAGE_SIZE = 50; + public static final int TOPIC_PAGE_SIZE = 200; public static final String DEFAULT_CLIENT = "ard"; diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/UrlOptimizer.java b/src/main/java/de/mediathekview/mserver/crawler/ard/UrlOptimizer.java new file mode 100644 index 000000000..d9651d852 --- /dev/null +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/UrlOptimizer.java @@ -0,0 +1,239 @@ +package de.mediathekview.mserver.crawler.ard; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.TreeMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import org.apache.logging.log4j.LogManager; +import de.mediathekview.mserver.crawler.basic.AbstractCrawler; +import de.mediathekview.mserver.daten.Resolution; + +public class UrlOptimizer { + private static final org.apache.logging.log4j.Logger LOG = LogManager.getLogger(UrlOptimizer.class); + protected AbstractCrawler crawler; + + public UrlOptimizer(AbstractCrawler aCrawler) { + crawler = aCrawler; + } + + static AtomicInteger good = new AtomicInteger(0); + static AtomicInteger bad = new AtomicInteger(0); + public void debug2(String adaptive, Map allUrls) { + Map proposal = buildFilmUrlFromAdaptive(adaptive, allUrls.entrySet().stream().findFirst().get().getValue()); + + if(proposal.size() != allUrls.size() && !adaptive.contains("arte.")) { + System.out.println("asdf"); + Map x = buildFromUrl(adaptive, allUrls.entrySet().stream().findFirst().get().getValue()); + StringBuffer sb = new StringBuffer(); + sb.append("#").append(adaptive).append("#").append(printMap(proposal)).append("#vs#").append(printMap(allUrls)); + LOG.debug(sb.toString()); + } + + boolean isEqual = proposal.equals(allUrls); + if (!isEqual && proposal.size() < 3 && allUrls.size() < 3) { + StringBuffer sb = new StringBuffer(); + sb.append(isEqual).append("#").append(good).append(":").append(bad).append("#").append(adaptive).append("#"); + proposal.forEach((r,url) -> { + sb.append(r).append("|").append(url); + }); + sb.append("#vs#"); + allUrls.forEach((r,url) -> { + sb.append(r).append("|").append(url); + }); + LOG.info(sb.toString()); + bad.incrementAndGet(); + } else { + good.incrementAndGet(); + } + } + + public static String printMap(Map urls) { + StringBuffer sb = new StringBuffer(); + urls.forEach((r,url) -> { + sb.append(r).append("|").append(url); + }); + return sb.toString(); + } + + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + record AdaptiveUrlStructure(String prefix, Map qualities, String suffix) { + } + + private static AdaptiveUrlStructure parseAdaptiveUrlStructure(String adaptive) { + if (adaptive == null || !adaptive.contains(",")) { + return null; + } + // kein split by "/" weil rb diese verwendet + Pattern p = Pattern.compile("/(?=[^/]*?,)"); + Matcher m = p.matcher(adaptive); + String metaSegment = ""; + if (m.find()) { + int start = m.start() + 1; + int end = adaptive.lastIndexOf('/'); + metaSegment = adaptive.substring(start, end); + } + // + List partsAndMeta = new ArrayList(Arrays.asList(metaSegment.split(","))); + String prefix = partsAndMeta.getFirst(); + // + partsAndMeta.removeFirst(); + String suffix = partsAndMeta.getLast(); + suffix = suffix.replace(".csmil", ""); + partsAndMeta.removeLast(); + // + Map qualities = new HashMap(); + for (int i = 0; i < partsAndMeta.size(); i++) { + qualities.put(i, partsAndMeta.get(i)); + } + Map sortedByLength = qualities.entrySet().stream() + .sorted(Comparator.comparingInt(e -> e.getValue().length())).collect(Collectors.toMap(Map.Entry::getKey, + Map.Entry::getValue, (v1, v2) -> v1, LinkedHashMap::new)); + // + if (qualities.values().stream().findAny().get().endsWith(suffix)) { + suffix = ""; + } + // + return new AdaptiveUrlStructure(prefix, sortedByLength, suffix); + } + + // --------------------------------------------------------------------------------------------------------------------- + + public List extractResolutionHVFromAdaptive(String adaptive) { + List resolutions = new ArrayList<>(); + String m3uContent; + try { + m3uContent = crawler.requestBodyAsString(adaptive); + } catch (IOException e) { + LOG.error("{}", e); + return resolutions; + } + String[] lines = m3uContent.split("\n"); + for (String line : lines) { + line = line.trim(); + if (line.startsWith("#EXT-X-STREAM-INF:")) { + // Extract the RESOLUTION part + String[] parts = line.substring("#EXT-X-STREAM-INF:".length()).split(","); + for (String part : parts) { + if (part.startsWith("RESOLUTION=")) { + String resolutionStr = part.substring("RESOLUTION=".length()); + String[] dims = resolutionStr.split("x"); + if (dims.length == 2) { + try { + int horizontal = Integer.parseInt(dims[0]); + int vertical = Integer.parseInt(dims[1]); + resolutions.add(new int[] { horizontal, vertical }); + } catch (NumberFormatException e) { + resolutions.add(new int[] { 0, 0 }); + } + } + break; + } + } + } + } + return resolutions; + } + + // ---------------------------------------------------------------------------------------------------------------------------------------------------- + + public Map buildFilmUrlFromAdaptive(String adaptive, String aUrl) { + Map result = new HashMap<>(); + Map rawStringUrlMap = buildFromUrl(adaptive, aUrl); + rawStringUrlMap.forEach( (resolutionVertical, url) -> { + try { + if (crawler.requestUrlExists(url)) { + Resolution r = ArdConstants.getResolutionFromWidth(resolutionVertical); + result.computeIfAbsent(r, k -> url); + } /*else { + LOG.debug("broken url {} from {}", url, adaptive); + }*/ + } catch(Exception e) { + LOG.error("adaptive: {} url: {} error: {}", adaptive, aUrl, e); + } + }); + return result; + } + + public Map buildFromUrl(String adaptive, String aUrl) { + if (adaptive.startsWith("https://dra-dd.akamaized.net")) { + return buildFromUrlForDRA(adaptive, aUrl); + } else { + Map positionToUrl = buildUrlsFromPlaylist(adaptive, aUrl); + return addResolutionToUrls(adaptive, positionToUrl); + } + } + + public Map buildUrlsFromPlaylist(String adaptive, String aUrl) { + if (adaptive.startsWith("https://dra-dd.akamaized.net")) { + return buildFromUrlForDRA(adaptive, aUrl); + } else { + return buildFromUrlForArdMediathek(adaptive, aUrl); + } + } + + private Map addResolutionToUrls(String adaptive, Map positionToUrl) { + Map result = new TreeMap<>(Comparator.reverseOrder()); + if(adaptive == null || adaptive.isBlank() || positionToUrl.size() == 0) { + return result; + } + List hv = extractResolutionHVFromAdaptive(adaptive); + for (int index = 0; index < hv.size(); index++) { + if (positionToUrl.containsKey(index)) { + result.put(hv.get(index)[0], positionToUrl.get(index)); + } + } + // + return result; + } + + private Map buildFromUrlForArdMediathek(String adaptive, String aUrl) { + Map positionToUrl= new HashMap<>(); + if (adaptive == null || aUrl == null || adaptive.isBlank() || aUrl.isBlank() || !adaptive.contains(",")) { + return positionToUrl; + } + AdaptiveUrlStructure x = parseAdaptiveUrlStructure(adaptive); + // find the quality of sample url to determine base url + String matchingPart = ""; + for (String qualityPart : x.qualities.values()) { + if (aUrl.contains(x.prefix + qualityPart + x.suffix)) { + matchingPart = x.prefix + qualityPart + x.suffix; + } + } + // url to position + String baseUrl = aUrl.substring(0, aUrl.length() - matchingPart.length()); + for (Entry qualityPart : x.qualities.entrySet()) { + String newUrl = baseUrl + x.prefix + qualityPart.getValue() + x.suffix; + positionToUrl.put(qualityPart.getKey(), newUrl); + } + // + return positionToUrl; + } + + private static Map buildFromUrlForDRA(String adaptive, String aUrl) { + String newUrl = adaptive.replace("/HLS/", "/mp4/"); + Map result = new TreeMap<>(Comparator.reverseOrder()); + result.put(360, newUrl.replace("_master.m3u8", "_vod.360.MP4")); + result.put(540, newUrl.replace("_master.m3u8", "_vod.540.MP4")); + result.put(720, newUrl.replace("_master.m3u8", "_vod.720.MP4")); + result.put(1080, newUrl.replace("_master.m3u8", "_vod.1080.MP4")); + return result; + } + + + + + + + +} diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java index 0b3f478c5..0440bf344 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java @@ -11,6 +11,7 @@ import de.mediathekview.mserver.crawler.ard.ArdConstants; import de.mediathekview.mserver.crawler.ard.ArdFilmDto; import de.mediathekview.mserver.crawler.ard.ArdFilmInfoDto; +import de.mediathekview.mserver.crawler.ard.UrlOptimizer; import de.mediathekview.mserver.crawler.basic.AbstractCrawler; import org.apache.logging.log4j.LogManager; @@ -27,6 +28,7 @@ import java.time.ZonedDateTime; import java.time.format.DateTimeParseException; import java.util.*; +import java.util.concurrent.atomic.AtomicInteger; public class ArdFilmDeserializer implements JsonDeserializer> { @@ -73,10 +75,12 @@ public class ArdFilmDeserializer implements JsonDeserializer> { private final ArdVideoInfoJsonDeserializer videoDeserializer; private final AbstractCrawler crawler; - + private final UrlOptimizer urlOptimizer; + public ArdFilmDeserializer(final AbstractCrawler crawler) { videoDeserializer = new ArdVideoInfoJsonDeserializer(crawler); this.crawler = crawler; + this.urlOptimizer = new UrlOptimizer(crawler); } private static Optional getMediaCollectionObject(final JsonObject itemObject) { @@ -416,7 +420,7 @@ private Optional parseVideos(final JsonObject playerPageObject, videoInfoAD = videoInfoStandard; videoInfoStandard = Optional.empty(); } - + videoInfoAdaptive.ifPresent(x -> allVideoUrls.setAdaptivUrl(x.entrySet().stream().findFirst().get().getValue())); videoInfoStandard.ifPresent(allVideoUrls::putAll); videoInfoAD.ifPresent(allVideoUrls::putAllAD); videoInfoDGS.ifPresent(allVideoUrls::putAllDGS); @@ -425,10 +429,37 @@ private Optional parseVideos(final JsonObject playerPageObject, if (allVideoUrls.getVideoUrls().isEmpty() && allVideoUrls.getVideoUrlsAD().isEmpty() && allVideoUrls.getVideoUrlsDGS().isEmpty() && allVideoUrls.getVideoUrlsOV().isEmpty() ) { return Optional.empty(); - } + } + if (videoInfoAdaptive.isPresent() && videoInfoStandard.isPresent() + && videoInfoStandard.get().size() == 1) { + String m3u8 = videoInfoAdaptive.get().entrySet().stream().findFirst().get().getValue(); + if (!m3u8.contains("funk") && !m3u8.contains("arte")) { + Map regenerated = urlOptimizer.buildFilmUrlFromAdaptive( + videoInfoAdaptive.get().entrySet().stream().findFirst().get().getValue(), + videoInfoStandard.get().entrySet().stream().findFirst().get().getValue()); + if(regenerated.size() > videoInfoStandard.get().size()) { + videoInfoStandard = Optional.of(regenerated); + //good.incrementAndGet(); + } else { + bad.incrementAndGet(); + //LOG.debug("asdf {} / {}", good, bad); + + } + } + /* + Optional> tt = parseVideoUrlMap(playerPageObject, MARKER_VIDEO_CATEGORY_MAIN, MARKER_VIDEO_STANDARD, MARKER_VIDEO_MP4, MARKER_VIDEO_DE); + String a = videoInfoAdaptive.get().entrySet().stream().findFirst().get().getValue(); + if(tt.isPresent() && !a.startsWith("https://funk") && !a.contains("arte") ) + //UrlOptimizer.debug(a, tt.get()); + urlOptimizer.debug2(a, videoInfoStandard.get());*/ + } + return Optional.of(allVideoUrls); } + static AtomicInteger good = new AtomicInteger(0); + static AtomicInteger bad = new AtomicInteger(0); + private Optional> parseVideoUrls(final JsonObject playerPageObject, String streamType, String aduioType, String mimeType, String language) { Optional> urls = parseVideoUrlMap(playerPageObject, streamType, aduioType, mimeType, language); if (urls.isEmpty()) { diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdVideoInfoDto.java b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdVideoInfoDto.java index 79ba0656e..7da7b4d7b 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdVideoInfoDto.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdVideoInfoDto.java @@ -20,6 +20,7 @@ public class ArdVideoInfoDto { private final Map videoUrlsAD; private final Map videoUrlsDGS; private final Map videoUrlsOV; + private String adaptivUrl = null; private Set subtitleUrl; @@ -128,4 +129,14 @@ public void putAllOV(Map entries) { putOV(e.getKey(), e.getValue()); } } + + public String getAdaptivUrl() { + return adaptivUrl; + } + + public void setAdaptivUrl(String adaptivUrl) { + this.adaptivUrl = adaptivUrl; + } + + } diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/AbstractZdfCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/AbstractZdfCrawler.java index 4ec3b040e..95bffb651 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/AbstractZdfCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/AbstractZdfCrawler.java @@ -91,7 +91,7 @@ private Set getDaysEntries(final ZdfConfiguration configuration) throws InterruptedException, ExecutionException { final ZdfDayPageTask dayTask = new ZdfDayPageTask( - this, getApiUrlBase(), getDayUrls(), configuration.getSearchAuthKey().orElse(null)); + this, getApiUrlBase(), getDayUrls(), configuration.getSearchAuthKey().orElse(null), partner2Sender); final Set shows = forkJoinPool.submit(dayTask).get(); final Collection extraDaysEntries = getExtraDaysEntries(); diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfCrawler.java index ed5f13b06..38f797873 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfCrawler.java @@ -138,7 +138,7 @@ private Set getDaysEntries(final ZdfConfiguration configuration) throws InterruptedException, ExecutionException { final ZdfDayPageTask dayTask = new ZdfDayPageTask( - this, getApiUrlBase(), getDayUrls(), configuration.getSearchAuthKey().orElse(null)); + this, getApiUrlBase(), getDayUrls(), configuration.getSearchAuthKey().orElse(null), ZdfConstants.PARTNER_TO_SENDER); final Set shows = forkJoinPool.submit(dayTask).get(); printMessage( diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDeserializer.java index 028c06bc5..3d1f081f8 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDeserializer.java @@ -10,8 +10,10 @@ import de.mediathekview.mserver.base.utils.UrlUtils; import de.mediathekview.mserver.crawler.basic.TopicUrlDTO; import de.mediathekview.mserver.crawler.zdf.ZdfConstants; +import de.mediathekview.mserver.daten.Sender; import java.lang.reflect.Type; +import java.util.Map; import java.util.Optional; import org.apache.logging.log4j.LogManager; @@ -29,9 +31,10 @@ public class ZdfDayPageDeserializer implements JsonDeserializer { private static final String JSON_ATTRIBUTE_NEXT = "next"; private final String apiUrlBase; + private final Map partnerToSender; - public ZdfDayPageDeserializer(final String aApiUrlBase) { - + public ZdfDayPageDeserializer(final String aApiUrlBase, final Map partnerToSender) { + this.partnerToSender = partnerToSender; apiUrlBase = aApiUrlBase; } @@ -90,7 +93,7 @@ private Optional parseSearchEntry(final JsonObject aResultObject) { return Optional.empty(); } final Optional tvService = JsonUtils.getElementValueAsString(target, "tvService"); - if (tvService.isPresent() && !ZdfConstants.PARTNER_TO_SENDER.containsKey(tvService.orElse("ZDF"))) { + if (tvService.isPresent() && !partnerToSender.containsKey(tvService.orElse("ZDF"))) { return Optional.empty(); } diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfDayPageTask.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfDayPageTask.java index 3935da37f..3f004d8d2 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfDayPageTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfDayPageTask.java @@ -5,24 +5,29 @@ import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; import de.mediathekview.mserver.crawler.zdf.json.ZdfDayPageDeserializer; import de.mediathekview.mserver.crawler.zdf.json.ZdfDayPageDto; - +import de.mediathekview.mserver.daten.Sender; import jakarta.annotation.Nullable; import jakarta.ws.rs.client.WebTarget; + +import java.util.Map; import java.util.Queue; import java.util.concurrent.ConcurrentLinkedQueue; public class ZdfDayPageTask extends ZdfTaskBase { private final String apiUrlBase; + private final Map partnerToSender; public ZdfDayPageTask( final AbstractCrawler crawler, final String apiUrlBase, final Queue urlToCrawlDTOs, - @Nullable final String authKey) { + @Nullable final String authKey, + Map partnerToSender) { super(crawler, urlToCrawlDTOs, authKey); this.apiUrlBase = apiUrlBase; - registerJsonDeserializer(ZdfDayPageDto.class, new ZdfDayPageDeserializer(this.apiUrlBase)); + this.partnerToSender = partnerToSender; + registerJsonDeserializer(ZdfDayPageDto.class, new ZdfDayPageDeserializer(this.apiUrlBase, partnerToSender)); } @Override @@ -37,7 +42,7 @@ protected void processRestTarget(final CrawlerUrlDTO aDto, final WebTarget aTarg @Override protected AbstractRecursiveConverterTask createNewOwnInstance( final Queue aElementsToProcess) { - return new ZdfDayPageTask(crawler, apiUrlBase, aElementsToProcess, getAuthKey().orElse(null)); + return new ZdfDayPageTask(crawler, apiUrlBase, aElementsToProcess, getAuthKey().orElse(null), partnerToSender); } private void processNextPage(final ZdfDayPageDto entries) { diff --git a/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java b/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java index 60b758603..2cb93c65f 100644 --- a/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java +++ b/src/main/java/de/mediathekview/mserver/ui/config/MServerExecutionFlow.java @@ -1,6 +1,7 @@ package de.mediathekview.mserver.ui.config; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Optional; @@ -112,17 +113,23 @@ void checkAvailability() { manager.getExecutorService(), manager.getConfigManager().getConfig().getDatabaseConfig().getBatchSize(), manager.getConfigManager().getConfig().getDatabaseConfig().getRefreshIntervalInDays()); - String condition = "WHERE last_url_check < NOW() - INTERVAL '3' DAY"; + int checkUrlIntervalInDays = manager.getConfigManager().getConfig().getDatabaseConfig().getCheckUrlIntervalInDays(); + String condition = "WHERE last_url_check < NOW() - INTERVAL '"+checkUrlIntervalInDays+"' DAY"; String limit = " LIMIT 400000"; Optional dbFilmlist = filmDBService.readFilmlistFromDB(condition, limit); - dbFilmlist.ifPresent(filmlist -> manager.getFilmlist().addAllFilms(filmlist.getFilms().values())); + if (dbFilmlist.isEmpty()) { + LOG.info("no Film found for checkAvailability"); + return; + } + //dbFilmlist.ifPresent(filmlist -> manager.getFilmlist().addAllFilms(filmlist.getFilms().values())); CheckUrlAvailability checkUrlAvailability = new CheckUrlAvailability( manager.getConfigManager().getConfig().getCheckImportListUrlMinSize(), manager.getConfigManager().getConfig().getCheckImportListUrlTimeoutInSec(), manager.getConfigManager().getConfig().getMaximumCpuThreads()); - Filmlist abonednedList = checkUrlAvailability.getAvailableFilmlist(dbFilmlist.get(), false); - filmDBService.deleteFilms(abonednedList.getFilms().values()); - filmDBService.update("UPDATE filme SET last_url_check = NOW() " + condition); + Filmlist abandonedList = checkUrlAvailability.getAvailableFilmlist(dbFilmlist.get(), false); + filmDBService.deleteFilms(abandonedList.getFilms().values()); + filmDBService.updateLastUrlCheck(List.copyOf(dbFilmlist.get().getFilms().values())); + //filmDBService.update("UPDATE filme SET last_url_check = NOW() " + condition); manager.stop(); } } diff --git a/src/test/java/de/mediathekview/mserver/crawler/ard/UrlOptimizerTest.java b/src/test/java/de/mediathekview/mserver/crawler/ard/UrlOptimizerTest.java new file mode 100644 index 000000000..9178ca37f --- /dev/null +++ b/src/test/java/de/mediathekview/mserver/crawler/ard/UrlOptimizerTest.java @@ -0,0 +1,420 @@ +package de.mediathekview.mserver.crawler.ard; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.when; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ForkJoinPool; +import java.util.stream.Stream; + +import org.junit.Before; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; + +import de.mediathekview.mserver.base.config.MServerConfigManager; +import de.mediathekview.mserver.base.messages.listener.MessageListener; +import de.mediathekview.mserver.daten.Resolution; +import de.mediathekview.mserver.progress.listeners.SenderProgressListener; + +public class UrlOptimizerTest { + + @Mock + private ArdCrawler crawler; + + @Before + public void setUp() { + MockitoAnnotations.openMocks(this); + + } + + @ParameterizedTest(name = "[{index}] adaptive={0}") + @MethodSource("dataM3UToUrls") + void testM3UToUrls(String adaptive, String sampleUrl, List expected) { + crawler = createCrawler(); + crawler = Mockito.mock(ArdCrawler.class); + when(crawler.requestUrlExists(anyString())).thenReturn(true); + UrlOptimizer urlOptimizer = new UrlOptimizer(crawler); + + Map actual = urlOptimizer.buildUrlsFromPlaylist(adaptive, sampleUrl); + assertThat(actual.values()).containsAll(expected); + } + + @Test + void singleTest() { + // https://manifest-arte.akamaized.net/api/manifest/v1/Generate/f620eafe-7d6d-4965-95cd-b11aea6e65d3/VOA-STA/XQ/129139-000-A.m3u8 + // https://manifest-arte.akamaized.net/api/manifest/v1/Generate/f620eafe-7d6d-4965-95cd-b11aea6e65d3/VOA-STA/XQ/129139-000-A.m3u8 + String adaptive = "https://manifest-arte.akamaized.net/api/manifest/v1/Generate/f620eafe-7d6d-4965-95cd-b11aea6e65d3/VOA-STA/XQ/129139-000-A.m3u8"; + String sampleUrl = "https://hrardmediathek-a.akamaihd.net/video/as/allgemein/2021_06/hrLogo_210619151025_0215626_512x288-25p-500kbit.mp4"; + crawler = createCrawler(); + //crawler = Mockito.mock(ArdCrawler.class); + //when(crawler.requestUrlExists(anyString())).thenReturn(true); + UrlOptimizer urlOptimizer = new UrlOptimizer(crawler); + Map actual = urlOptimizer.buildFilmUrlFromAdaptive(adaptive, sampleUrl); + System.out.println(urlOptimizer.printMap(actual)); + } + + protected MServerConfigManager rootConfig = new MServerConfigManager("MServer-JUnit-Config.yaml"); + protected ArdCrawler createCrawler() { + final ForkJoinPool forkJoinPool = new ForkJoinPool(); + final Collection nachrichten = new ArrayList<>(); + final Collection fortschritte = new ArrayList<>(); + + return new ArdCrawler(forkJoinPool, nachrichten, fortschritte, rootConfig); + } + + + static Stream dataM3UToUrls() { + return Stream.of( + // ─────────────────────────────────────────────────────────────── + // 1. Das Erste + // ─────────────────────────────────────────────────────────────── + Arguments.of( + "https://universal-vod.daserste.de/i/int/2025/04/30/DEGSM156010/,DEGSM156010_3287187_sendeton_640x360-50p-1200kbit,DEGSM156010_3287187_sendeton_480x270-50p-700kbit,DEGSM156010_3287187_sendeton_960x540-50p-1600kbit,DEGSM156010_3287187_sendeton_1280x720-50p-3200kbit,DEGSM156010_3287187_sendeton_1920x1080-50p-5000kbit,.mp4.csmil/master.m3u8", + "https://pd-videos.daserste.de/int/2025/04/30/DEGSM156010/DEGSM156010_3287187_sendeton_480x270-50p-700kbit.mp4", + List.of( + "https://pd-videos.daserste.de/int/2025/04/30/DEGSM156010/DEGSM156010_3287187_sendeton_480x270-50p-700kbit.mp4", + "https://pd-videos.daserste.de/int/2025/04/30/DEGSM156010/DEGSM156010_3287187_sendeton_640x360-50p-1200kbit.mp4", + "https://pd-videos.daserste.de/int/2025/04/30/DEGSM156010/DEGSM156010_3287187_sendeton_960x540-50p-1600kbit.mp4", + "https://pd-videos.daserste.de/int/2025/04/30/DEGSM156010/DEGSM156010_3287187_sendeton_1280x720-50p-3200kbit.mp4", + "https://pd-videos.daserste.de/int/2025/04/30/DEGSM156010/DEGSM156010_3287187_sendeton_1920x1080-50p-5000kbit.mp4" + ) + ), + Arguments.of( + "https://universal-vod.daserste.de/i/int/staging/int/2026/01/05/SWRSM206242/,SWRSM206242_3638331_sendeton_480x270-50p-700kbit.mp4,SWRSM206242_3638331_sendeton_640x360-50p-1200kbit.mp4,SWRSM206242_3638331_sendeton_960x540-50p-1600kbit.mp4,SWRSM206242_3638331_sendeton_1280x720-50p-3200kbit.mp4,SWRSM206242_3638331_sendeton_1920x1080-50p-5000kbit.mp4,.csmil/master.m3u8", + "https://ctv-videos.daserste.de/int/staging/int/2026/01/05/SWRSM206242/SWRSM206242_3638331_sendeton_1920x1080-50p-5000kbit.mp4", + List.of( + "https://ctv-videos.daserste.de/int/staging/int/2026/01/05/SWRSM206242/SWRSM206242_3638331_sendeton_480x270-50p-700kbit.mp4", + "https://ctv-videos.daserste.de/int/staging/int/2026/01/05/SWRSM206242/SWRSM206242_3638331_sendeton_640x360-50p-1200kbit.mp4", + "https://ctv-videos.daserste.de/int/staging/int/2026/01/05/SWRSM206242/SWRSM206242_3638331_sendeton_960x540-50p-1600kbit.mp4", + "https://ctv-videos.daserste.de/int/staging/int/2026/01/05/SWRSM206242/SWRSM206242_3638331_sendeton_1280x720-50p-3200kbit.mp4", + "https://ctv-videos.daserste.de/int/staging/int/2026/01/05/SWRSM206242/SWRSM206242_3638331_sendeton_1920x1080-50p-5000kbit.mp4" + ) + ), + // ─────────────────────────────────────────────────────────────── + // 2. BR – Buchstaben-Kürzel (A,X,C,HD,E) + // ─────────────────────────────────────────────────────────────── + Arguments.of( + "https://br-i.akamaihd.net/i/b7/2026-01/09/cfb3cd10-ed78-11f0-a101-02420a000526_,A,X,C,HD,E,.mp4.csmil/master.m3u8", + "https://cdn-storage.br.de/b7/2026-01/09/cfb3cd10-ed78-11f0-a101-02420a000526_A.mp4", + List.of( + "https://cdn-storage.br.de/b7/2026-01/09/cfb3cd10-ed78-11f0-a101-02420a000526_A.mp4", + "https://cdn-storage.br.de/b7/2026-01/09/cfb3cd10-ed78-11f0-a101-02420a000526_X.mp4", // 1280 + "https://cdn-storage.br.de/b7/2026-01/09/cfb3cd10-ed78-11f0-a101-02420a000526_C.mp4", // 960 + "https://cdn-storage.br.de/b7/2026-01/09/cfb3cd10-ed78-11f0-a101-02420a000526_E.mp4", // 640 + "https://cdn-storage.br.de/b7/2026-01/09/cfb3cd10-ed78-11f0-a101-02420a000526_HD.mp4" // 1920 + ) + ), + Arguments.of( + "https://br-i.akamaihd.net/i/geo/b7/2026-01/05/8f5bb760-ea89-11f0-a101-02420a000526_,A,X,C,HD,E,.mp4.csmil/master.m3u8", + "https://cdn-storage.br.de/geo/b7/2026-01/05/8f5bb760-ea89-11f0-a101-02420a000526_HD.mp4", + List.of( + "https://cdn-storage.br.de/geo/b7/2026-01/05/8f5bb760-ea89-11f0-a101-02420a000526_E.mp4", // 640 + "https://cdn-storage.br.de/geo/b7/2026-01/05/8f5bb760-ea89-11f0-a101-02420a000526_C.mp4", // 960 + "https://cdn-storage.br.de/geo/b7/2026-01/05/8f5bb760-ea89-11f0-a101-02420a000526_X.mp4", // 1280 + "https://cdn-storage.br.de/geo/b7/2026-01/05/8f5bb760-ea89-11f0-a101-02420a000526_HD.mp4" // 1920 + ) + ), + + // ─────────────────────────────────────────────────────────────── + // 3. MDR + // ─────────────────────────────────────────────────────────────── + Arguments.of( + "https://mdronline-vh.akamaihd.net/i/mp4dyn2/b/FCMS-b306f4e4-3057-4f9a-9de3-587119512396-,c3f46785fa07,a33182bdaf26,3c67be5cd760,557cadc3dbd7,ef73d58f2f02,_b3.mp4.csmil/master.m3u8", + "https://odmdr-a.akamaihd.net/mp4dyn2/b/FCMS-b306f4e4-3057-4f9a-9de3-587119512396-c3f46785fa07_b3.mp4", + List.of( + "https://odmdr-a.akamaihd.net/mp4dyn2/b/FCMS-b306f4e4-3057-4f9a-9de3-587119512396-c3f46785fa07_b3.mp4", // 640 + "https://odmdr-a.akamaihd.net/mp4dyn2/b/FCMS-b306f4e4-3057-4f9a-9de3-587119512396-a33182bdaf26_b3.mp4", // 960 + "https://odmdr-a.akamaihd.net/mp4dyn2/b/FCMS-b306f4e4-3057-4f9a-9de3-587119512396-3c67be5cd760_b3.mp4", // 1280 + "https://odmdr-a.akamaihd.net/mp4dyn2/b/FCMS-b306f4e4-3057-4f9a-9de3-587119512396-557cadc3dbd7_b3.mp4", // 1920 + "https://odmdr-a.akamaihd.net/mp4dyn2/b/FCMS-b306f4e4-3057-4f9a-9de3-587119512396-ef73d58f2f02_b3.mp4" + ) + ), + Arguments.of( + "https://mdronline-vh.akamaihd.net/i/mp4dyn2/f/FCMS-fa64aa93-0416-459a-bdf3-fdbf803cccdc-,c3f46785fa07,a33182bdaf26,3c67be5cd760,557cadc3dbd7,ef73d58f2f02,_fa.mp4.csmil/master.m3u8", + "https://odmdr-a.akamaihd.net/mp4dyn2/f/FCMS-fa64aa93-0416-459a-bdf3-fdbf803cccdc-557cadc3dbd7_fa.mp4", + List.of( + "https://odmdr-a.akamaihd.net/mp4dyn2/f/FCMS-fa64aa93-0416-459a-bdf3-fdbf803cccdc-c3f46785fa07_fa.mp4", // 640 + "https://odmdr-a.akamaihd.net/mp4dyn2/f/FCMS-fa64aa93-0416-459a-bdf3-fdbf803cccdc-a33182bdaf26_fa.mp4", // 960 + "https://odmdr-a.akamaihd.net/mp4dyn2/f/FCMS-fa64aa93-0416-459a-bdf3-fdbf803cccdc-3c67be5cd760_fa.mp4", // 1280 + "https://odmdr-a.akamaihd.net/mp4dyn2/f/FCMS-fa64aa93-0416-459a-bdf3-fdbf803cccdc-557cadc3dbd7_fa.mp4" // 1920 + ) + ), + + // ─────────────────────────────────────────────────────────────── + // 4. HR + // ─────────────────────────────────────────────────────────────── + Arguments.of( + "https://hrardmediathek-vh.akamaihd.net/i/odinson/geoavailability_DACH/grzimeks-vermaechtnis-wie-weit-darf-naturschutz-gehen/SVID-638971AA-EB4E-4480-BD97-42470163FDE1/ad2c8976-9701-4dd1-b415-1dec7c959c11/0239333_sendeton_,480x270-50p-700,1920x1080-50p-5000,1280x720-50p-3200,960x540-50p-1600,640x360-50p-1200,kbit.mp4/master.m3u8", + "https://hrardmediathek-a.akamaihd.net/odinson/geoavailability_DACH/grzimeks-vermaechtnis-wie-weit-darf-naturschutz-gehen/SVID-638971AA-EB4E-4480-BD97-42470163FDE1/ad2c8976-9701-4dd1-b415-1dec7c959c11/0239333_sendeton_480x270-50p-700kbit.mp4", + List.of( + "https://hrardmediathek-a.akamaihd.net/odinson/geoavailability_DACH/grzimeks-vermaechtnis-wie-weit-darf-naturschutz-gehen/SVID-638971AA-EB4E-4480-BD97-42470163FDE1/ad2c8976-9701-4dd1-b415-1dec7c959c11/0239333_sendeton_480x270-50p-700kbit.mp4", + "https://hrardmediathek-a.akamaihd.net/odinson/geoavailability_DACH/grzimeks-vermaechtnis-wie-weit-darf-naturschutz-gehen/SVID-638971AA-EB4E-4480-BD97-42470163FDE1/ad2c8976-9701-4dd1-b415-1dec7c959c11/0239333_sendeton_960x540-50p-1600kbit.mp4", + "https://hrardmediathek-a.akamaihd.net/odinson/geoavailability_DACH/grzimeks-vermaechtnis-wie-weit-darf-naturschutz-gehen/SVID-638971AA-EB4E-4480-BD97-42470163FDE1/ad2c8976-9701-4dd1-b415-1dec7c959c11/0239333_sendeton_640x360-50p-1200kbit.mp4", + "https://hrardmediathek-a.akamaihd.net/odinson/geoavailability_DACH/grzimeks-vermaechtnis-wie-weit-darf-naturschutz-gehen/SVID-638971AA-EB4E-4480-BD97-42470163FDE1/ad2c8976-9701-4dd1-b415-1dec7c959c11/0239333_sendeton_1280x720-50p-3200kbit.mp4", + "https://hrardmediathek-a.akamaihd.net/odinson/geoavailability_DACH/grzimeks-vermaechtnis-wie-weit-darf-naturschutz-gehen/SVID-638971AA-EB4E-4480-BD97-42470163FDE1/ad2c8976-9701-4dd1-b415-1dec7c959c11/0239333_sendeton_1920x1080-50p-5000kbit.mp4" + ) + ), + Arguments.of( + "https://hrardmediathek-vh.akamaihd.net/i/odinson/show-and-unterhaltung/SVID-AD59C600-61C6-42C5-BC7F-E34AB911469E/f155cbc9-33b7-4fa1-90ac-631dfcf95709/0229913_sendeton_,640x360-50p-1200,1920x1080-50p-5000,1280x720-50p-3200,960x540-50p-1600,480x270-50p-700,kbit.mp4/master.m3u8", + "https://hrardmediathek-a.akamaihd.net/odinson/show-and-unterhaltung/SVID-AD59C600-61C6-42C5-BC7F-E34AB911469E/f155cbc9-33b7-4fa1-90ac-631dfcf95709/0229913_sendeton_1920x1080-50p-5000kbit.mp4", + List.of( + "https://hrardmediathek-a.akamaihd.net/odinson/show-and-unterhaltung/SVID-AD59C600-61C6-42C5-BC7F-E34AB911469E/f155cbc9-33b7-4fa1-90ac-631dfcf95709/0229913_sendeton_640x360-50p-1200kbit.mp4", // 640 + "https://hrardmediathek-a.akamaihd.net/odinson/show-and-unterhaltung/SVID-AD59C600-61C6-42C5-BC7F-E34AB911469E/f155cbc9-33b7-4fa1-90ac-631dfcf95709/0229913_sendeton_960x540-50p-1600kbit.mp4", // 960 + "https://hrardmediathek-a.akamaihd.net/odinson/show-and-unterhaltung/SVID-AD59C600-61C6-42C5-BC7F-E34AB911469E/f155cbc9-33b7-4fa1-90ac-631dfcf95709/0229913_sendeton_1280x720-50p-3200kbit.mp4", // 1280 + "https://hrardmediathek-a.akamaihd.net/odinson/show-and-unterhaltung/SVID-AD59C600-61C6-42C5-BC7F-E34AB911469E/f155cbc9-33b7-4fa1-90ac-631dfcf95709/0229913_sendeton_1920x1080-50p-5000kbit.mp4" // 1920 + ) + ), + + // ─────────────────────────────────────────────────────────────── + // 5. SWR – Mischung aus sm/ml/xl + .l / .sm etc. + // ─────────────────────────────────────────────────────────────── + Arguments.of( + "https://hlsodswr-vh.akamaihd.net/i/swrfernsehen/die-scheune/1534517,.sm,.ml,.l,.xl,.xxl,.mp4.csmil/master.m3u8", + "https://pdodswr-a.akamaihd.net/swrfernsehen/die-scheune/1534517.l.mp4", + List.of( + "https://pdodswr-a.akamaihd.net/swrfernsehen/die-scheune/1534517.l.mp4", + "https://pdodswr-a.akamaihd.net/swrfernsehen/die-scheune/1534517.sm.mp4", + "https://pdodswr-a.akamaihd.net/swrfernsehen/die-scheune/1534517.ml.mp4", + "https://pdodswr-a.akamaihd.net/swrfernsehen/die-scheune/1534517.xl.mp4", + "https://pdodswr-a.akamaihd.net/swrfernsehen/die-scheune/1534517.xxl.mp4" + ) + ), + Arguments.of( + "https://av-adaptive.swr.de/i/planet-schule/nie-wieder-keine-ahnung-malerei-der-betrachter,.sm,.ml,.l,.xl,.xxl,.mp4.csmil/master.m3u8", + "https://avdlswr-a.akamaihd.net/planet-schule/nie-wieder-keine-ahnung-malerei-der-betrachter.l.mp4", + List.of( + "https://avdlswr-a.akamaihd.net/planet-schule/nie-wieder-keine-ahnung-malerei-der-betrachter.l.mp4", + "https://avdlswr-a.akamaihd.net/planet-schule/nie-wieder-keine-ahnung-malerei-der-betrachter.sm.mp4", + "https://avdlswr-a.akamaihd.net/planet-schule/nie-wieder-keine-ahnung-malerei-der-betrachter.ml.mp4", + "https://avdlswr-a.akamaihd.net/planet-schule/nie-wieder-keine-ahnung-malerei-der-betrachter.xl.mp4", + "https://avdlswr-a.akamaihd.net/planet-schule/nie-wieder-keine-ahnung-malerei-der-betrachter.xxl.mp4" + ) + ), + + // ─────────────────────────────────────────────────────────────── + // 6. Sportschau – AVC-Präfix + // ─────────────────────────────────────────────────────────────── + Arguments.of( + "https://sportschau-vod.ard-mcdn.de/i/de/nfsk/2026/01/09/7e2abbd3-4d64-438c-ab13-b6d1563bec37/7e2abbd3-4d64-438c-ab13-b6d1563bec37_,AVC-360,AVC-1080,AVC-720,AVC-540,AVC-270,.mp4.csmil/master.m3u8", + "https://sportschau-progressive.ard-mcdn.de/de/nfsk/2026/01/09/7e2abbd3-4d64-438c-ab13-b6d1563bec37/7e2abbd3-4d64-438c-ab13-b6d1563bec37_AVC-360.mp4", + List.of( + "https://sportschau-progressive.ard-mcdn.de/de/nfsk/2026/01/09/7e2abbd3-4d64-438c-ab13-b6d1563bec37/7e2abbd3-4d64-438c-ab13-b6d1563bec37_AVC-360.mp4", // 640 + "https://sportschau-progressive.ard-mcdn.de/de/nfsk/2026/01/09/7e2abbd3-4d64-438c-ab13-b6d1563bec37/7e2abbd3-4d64-438c-ab13-b6d1563bec37_AVC-720.mp4", // 1280 + "https://sportschau-progressive.ard-mcdn.de/de/nfsk/2026/01/09/7e2abbd3-4d64-438c-ab13-b6d1563bec37/7e2abbd3-4d64-438c-ab13-b6d1563bec37_AVC-540.mp4", // 960 + "https://sportschau-progressive.ard-mcdn.de/de/nfsk/2026/01/09/7e2abbd3-4d64-438c-ab13-b6d1563bec37/7e2abbd3-4d64-438c-ab13-b6d1563bec37_AVC-270.mp4", + "https://sportschau-progressive.ard-mcdn.de/de/nfsk/2026/01/09/7e2abbd3-4d64-438c-ab13-b6d1563bec37/7e2abbd3-4d64-438c-ab13-b6d1563bec37_AVC-1080.mp4" // 1920 + ) + ), + Arguments.of( + "https://sportschau-vod.ard-mcdn.de/i/de/nfsk/2026/01/06/49f0de7b-ab0f-425c-8723-734cc3559848/49f0de7b-ab0f-425c-8723-734cc3559848_,AVC-360,AVC-1080,AVC-720,AVC-540,AVC-270,.mp4.csmil/master.m3u8", + "https://sportschau-progressive.ard-mcdn.de/de/nfsk/2026/01/06/49f0de7b-ab0f-425c-8723-734cc3559848/49f0de7b-ab0f-425c-8723-734cc3559848_AVC-1080.mp4", + List.of( + "https://sportschau-progressive.ard-mcdn.de/de/nfsk/2026/01/06/49f0de7b-ab0f-425c-8723-734cc3559848/49f0de7b-ab0f-425c-8723-734cc3559848_AVC-360.mp4", // 640 + "https://sportschau-progressive.ard-mcdn.de/de/nfsk/2026/01/06/49f0de7b-ab0f-425c-8723-734cc3559848/49f0de7b-ab0f-425c-8723-734cc3559848_AVC-540.mp4", // 960 + "https://sportschau-progressive.ard-mcdn.de/de/nfsk/2026/01/06/49f0de7b-ab0f-425c-8723-734cc3559848/49f0de7b-ab0f-425c-8723-734cc3559848_AVC-720.mp4", // 1280 + "https://sportschau-progressive.ard-mcdn.de/de/nfsk/2026/01/06/49f0de7b-ab0f-425c-8723-734cc3559848/49f0de7b-ab0f-425c-8723-734cc3559848_AVC-1080.mp4" // 1920 + ) + ), + + // ─────────────────────────────────────────────────────────────── + // 7. RBB + // ─────────────────────────────────────────────────────────────── + Arguments.of( + "https://rbbvod.akamaized.net/i/content/14/fc/14fc82d8-07ff-4d74-9584-62043970aaec/d829dcd0-eb36-11f0-9132-02420a00032c_,hd1080-avc360,hd1080-avc270,hd1080-avc540,hd1080-avc720,hd1080-avc1080,.mp4.csmil/master.m3u8", + "https://rbbmediapmdp-a.akamaihd.net/content/14/fc/14fc82d8-07ff-4d74-9584-62043970aaec/d829dcd0-eb36-11f0-9132-02420a00032c_hd1080-avc360.mp4", + List.of( + "https://rbbmediapmdp-a.akamaihd.net/content/14/fc/14fc82d8-07ff-4d74-9584-62043970aaec/d829dcd0-eb36-11f0-9132-02420a00032c_hd1080-avc360.mp4", // 640 + "https://rbbmediapmdp-a.akamaihd.net/content/14/fc/14fc82d8-07ff-4d74-9584-62043970aaec/d829dcd0-eb36-11f0-9132-02420a00032c_hd1080-avc270.mp4", + "https://rbbmediapmdp-a.akamaihd.net/content/14/fc/14fc82d8-07ff-4d74-9584-62043970aaec/d829dcd0-eb36-11f0-9132-02420a00032c_hd1080-avc540.mp4", //960 + "https://rbbmediapmdp-a.akamaihd.net/content/14/fc/14fc82d8-07ff-4d74-9584-62043970aaec/d829dcd0-eb36-11f0-9132-02420a00032c_hd1080-avc720.mp4", // 1280 + "https://rbbmediapmdp-a.akamaihd.net/content/14/fc/14fc82d8-07ff-4d74-9584-62043970aaec/d829dcd0-eb36-11f0-9132-02420a00032c_hd1080-avc1080.mp4" // 1920 + ) + ), + Arguments.of( + "https://rbbvod.akamaized.net/i/content/42/83/428320c9-51a6-4aa0-9329-5b81be1ec5f9/8ac3982b-09be-4f0c-ad87-09948c9a0c3a_,hd1080-avc360,hd1080-avc270,hd1080-avc540,hd1080-avc720,hd1080-avc1080,.mp4.csmil/master.m3u8", + "https://rbbmediapmdp-a.akamaihd.net/content/42/83/428320c9-51a6-4aa0-9329-5b81be1ec5f9/8ac3982b-09be-4f0c-ad87-09948c9a0c3a_hd1080-avc1080.mp4", + List.of( + "https://rbbmediapmdp-a.akamaihd.net/content/42/83/428320c9-51a6-4aa0-9329-5b81be1ec5f9/8ac3982b-09be-4f0c-ad87-09948c9a0c3a_hd1080-avc360.mp4", // 640 + "https://rbbmediapmdp-a.akamaihd.net/content/42/83/428320c9-51a6-4aa0-9329-5b81be1ec5f9/8ac3982b-09be-4f0c-ad87-09948c9a0c3a_hd1080-avc540.mp4", // 960 + "https://rbbmediapmdp-a.akamaihd.net/content/42/83/428320c9-51a6-4aa0-9329-5b81be1ec5f9/8ac3982b-09be-4f0c-ad87-09948c9a0c3a_hd1080-avc720.mp4", // 1280 + "https://rbbmediapmdp-a.akamaihd.net/content/42/83/428320c9-51a6-4aa0-9329-5b81be1ec5f9/8ac3982b-09be-4f0c-ad87-09948c9a0c3a_hd1080-avc1080.mp4" // 1920 + ) + ), + + // ─────────────────────────────────────────────────────────────── + // 8. WDR – Nummern-basierte Varianten + // ─────────────────────────────────────────────────────────────── + Arguments.of( + "https://wdradaptiv-vh.akamaihd.net/i/medp/ondemand/weltweit/fsk0/341/3417125/,3417125_65497916,3417125_65497917,3417125_65497915,3417125_65497918,3417125_65497914,.mp4.csmil/master.m3u8", + "https://wdr-progressive.ard-mcdn.de/medp/ondemand/weltweit/fsk0/341/3417125/3417125_65497916.mp4", + List.of( + "https://wdr-progressive.ard-mcdn.de/medp/ondemand/weltweit/fsk0/341/3417125/3417125_65497916.mp4", // 640 + "https://wdr-progressive.ard-mcdn.de/medp/ondemand/weltweit/fsk0/341/3417125/3417125_65497917.mp4", //960 + "https://wdr-progressive.ard-mcdn.de/medp/ondemand/weltweit/fsk0/341/3417125/3417125_65497915.mp4", + "https://wdr-progressive.ard-mcdn.de/medp/ondemand/weltweit/fsk0/341/3417125/3417125_65497918.mp4", //1280 + "https://wdr-progressive.ard-mcdn.de/medp/ondemand/weltweit/fsk0/341/3417125/3417125_65497914.mp4" //1920 + ) + ), + Arguments.of( + "https://wdradaptiv-vh.akamaihd.net/i/medp/ondemand/de/fsk0/322/3220429/,3220429_60325869,3220429_60325870,3220429_60325868,3220429_60325871,3220429_60325867,.mp4.csmil/master.m3u8", + "https://wdr-progressive.ard-mcdn.de/medp/ondemand/de/fsk0/322/3220429/3220429_60325867.mp4", + List.of( + "https://wdr-progressive.ard-mcdn.de/medp/ondemand/de/fsk0/322/3220429/3220429_60325869.mp4", // 640 + "https://wdr-progressive.ard-mcdn.de/medp/ondemand/de/fsk0/322/3220429/3220429_60325870.mp4", // 960 + "https://wdr-progressive.ard-mcdn.de/medp/ondemand/de/fsk0/322/3220429/3220429_60325871.mp4", // 1280 + "https://wdr-progressive.ard-mcdn.de/medp/ondemand/de/fsk0/322/3220429/3220429_60325867.mp4" // 1920 + ) + ), + + // ─────────────────────────────────────────────────────────────── + // 9. SR – N/L/P/H Kürzel + // ─────────────────────────────────────────────────────────────── + Arguments.of( + "https://srod-vh.akamaihd.net/i/media/FS/SRINFO/srinfo_18_20260110_185001_,N,L,P,H,.mp4.csmil/master.m3u8", + "https://srstorage01-a.akamaihd.net/Video/FS/SRINFO/srinfo_18_20260110_185001_N.mp4", + List.of( + "https://srstorage01-a.akamaihd.net/Video/FS/SRINFO/srinfo_18_20260110_185001_N.mp4", //640 + "https://srstorage01-a.akamaihd.net/Video/FS/SRINFO/srinfo_18_20260110_185001_L.mp4", //960 + "https://srstorage01-a.akamaihd.net/Video/FS/SRINFO/srinfo_18_20260110_185001_P.mp4", //1280 + "https://srstorage01-a.akamaihd.net/Video/FS/SRINFO/srinfo_18_20260110_185001_H.mp4" //1920 + ) + ), + Arguments.of( + "https://srod-vh.akamaihd.net/i/media/FS/HUHMO/HUHMO-241217-115348_,N,L,P,H,.mp4.csmil/master.m3u8", + "https://srstorage01-a.akamaihd.net/Video/FS/HUHMO/HUHMO-241217-115348_H.mp4", + List.of( + "https://srstorage01-a.akamaihd.net/Video/FS/HUHMO/HUHMO-241217-115348_N.mp4", + "https://srstorage01-a.akamaihd.net/Video/FS/HUHMO/HUHMO-241217-115348_L.mp4", + "https://srstorage01-a.akamaihd.net/Video/FS/HUHMO/HUHMO-241217-115348_P.mp4", + "https://srstorage01-a.akamaihd.net/Video/FS/HUHMO/HUHMO-241217-115348_H.mp4" + ) + ), + Arguments.of( + "https://srod-vh.akamaihd.net/i/media/FS/MHAH/MHAH-251229-123635_,N,L,P,H,.mp4.csmil/master.m3u8", + "https://srstorage01-a.akamaihd.net/Video/FS/MHAH/MHAH-251229-123635_H.mp4", + List.of( + "https://srstorage01-a.akamaihd.net/Video/FS/MHAH/MHAH-251229-123635_N.mp4", + "https://srstorage01-a.akamaihd.net/Video/FS/MHAH/MHAH-251229-123635_L.mp4", + "https://srstorage01-a.akamaihd.net/Video/FS/MHAH/MHAH-251229-123635_P.mp4", + "https://srstorage01-a.akamaihd.net/Video/FS/MHAH/MHAH-251229-123635_H.mp4" + ) + ), + + // ─────────────────────────────────────────────────────────────── + // 10. NDR – ln/hd/hq/mn/1080 + // ─────────────────────────────────────────────────────────────── + Arguments.of( + "https://adaptive.ndr.de/i/ndr/2024/1001/TV-20241001-2009-2200.,ln,1080,hd,hq,mn,.mp4.csmil/master.m3u8", + "https://ndr-progressive.ard-mcdn.de/progressive/2024/1001/TV-20241001-2009-2200.ln.mp4", + List.of( + "https://ndr-progressive.ard-mcdn.de/progressive/2024/1001/TV-20241001-2009-2200.ln.mp4", //640 + "https://ndr-progressive.ard-mcdn.de/progressive/2024/1001/TV-20241001-2009-2200.hd.mp4", //1280 + "https://ndr-progressive.ard-mcdn.de/progressive/2024/1001/TV-20241001-2009-2200.hq.mp4", //960 + "https://ndr-progressive.ard-mcdn.de/progressive/2024/1001/TV-20241001-2009-2200.mn.mp4", // 480 + "https://ndr-progressive.ard-mcdn.de/progressive/2024/1001/TV-20241001-2009-2200.1080.mp4" //1920 + ) + ), + Arguments.of( + "https://adaptive.ndr.de/i/geo/2024/1128/TV-20241128-1322-3100.,ln,1080,hd,hq,mn,.mp4.csmil/master.m3u8", + "https://ndr-progressive.ard-mcdn.de/progressive_geo/2024/1128/TV-20241128-1322-3100.1080.mp4", + List.of( + "https://ndr-progressive.ard-mcdn.de/progressive_geo/2024/1128/TV-20241128-1322-3100.ln.mp4", // 640 + "https://ndr-progressive.ard-mcdn.de/progressive_geo/2024/1128/TV-20241128-1322-3100.hq.mp4", // 960 + "https://ndr-progressive.ard-mcdn.de/progressive_geo/2024/1128/TV-20241128-1322-3100.hd.mp4", // 1280 + "https://ndr-progressive.ard-mcdn.de/progressive_geo/2024/1128/TV-20241128-1322-3100.1080.mp4" // 1920 + ) + ), + + // ─────────────────────────────────────────────────────────────── + // 11. SWR aktuell – avc-Präfix mit 270/360/540/720/1080 + // ─────────────────────────────────────────────────────────────── + Arguments.of( + "https://av-adaptive.swr.de/i/swr/swraktuell/bw/tv/gesamtsendung/2290935,.avc-270,.avc-360,.avc-540,.avc-720,.avc-1080,.mp4.csmil/master.m3u8", + "https://pdodswr-a.akamaihd.net/swr/swraktuell/bw/tv/gesamtsendung/2290935.avc-270.mp4", + List.of( + "https://pdodswr-a.akamaihd.net/swr/swraktuell/bw/tv/gesamtsendung/2290935.avc-270.mp4", + "https://pdodswr-a.akamaihd.net/swr/swraktuell/bw/tv/gesamtsendung/2290935.avc-360.mp4", //640 + "https://pdodswr-a.akamaihd.net/swr/swraktuell/bw/tv/gesamtsendung/2290935.avc-540.mp4", //960 + "https://pdodswr-a.akamaihd.net/swr/swraktuell/bw/tv/gesamtsendung/2290935.avc-720.mp4", //1280 + "https://pdodswr-a.akamaihd.net/swr/swraktuell/bw/tv/gesamtsendung/2290935.avc-1080.mp4" //1920 + ) + ), + Arguments.of( + "https://av-adaptive.swr.de/i/swr/swraktuell/bw/tv/gesamtsendung/2292127,.avc-270,.avc-360,.avc-540,.avc-720,.avc-1080,.mp4.csmil/master.m3u8", + "https://pdodswr-a.akamaihd.net/swr/swraktuell/bw/tv/gesamtsendung/2292127.avc-1080.mp4", + List.of( + "https://pdodswr-a.akamaihd.net/swr/swraktuell/bw/tv/gesamtsendung/2292127.avc-360.mp4", // 640 + "https://pdodswr-a.akamaihd.net/swr/swraktuell/bw/tv/gesamtsendung/2292127.avc-540.mp4", // 960 + "https://pdodswr-a.akamaihd.net/swr/swraktuell/bw/tv/gesamtsendung/2292127.avc-720.mp4", // 1280 + "https://pdodswr-a.akamaihd.net/swr/swraktuell/bw/tv/gesamtsendung/2292127.avc-1080.mp4" // 1920 + ) + ), + + // ─────────────────────────────────────────────────────────────── + // 12. RB + // ─────────────────────────────────────────────────────────────── + Arguments.of( + "https://rbhlsod-vh.akamaihd.net/i/,clips/zt/welt/pz/PZ4x6OtreF/PZ4x6OtreF1920x1080-50p.mp4,clips/zt/welt/pz/PZ4x6OtreF/PZ4x6OtreF1280x720-50p.mp4,clips/zt/welt/pz/PZ4x6OtreF/PZ4x6OtreF960x540-50p.mp4,clips/zt/welt/pz/PZ4x6OtreF/PZ4x6OtreF640x360-50p.mp4,.csmil/master.m3u8", + "https://rbprogressivedl-a.akamaihd.net/clips/zt/welt/pz/PZ4x6OtreF/PZ4x6OtreF1920x1080-50p.mp4", + List.of( + "https://rbprogressivedl-a.akamaihd.net/clips/zt/welt/pz/PZ4x6OtreF/PZ4x6OtreF640x360-50p.mp4", //640 + "https://rbprogressivedl-a.akamaihd.net/clips/zt/welt/pz/PZ4x6OtreF/PZ4x6OtreF960x540-50p.mp4", //960 + "https://rbprogressivedl-a.akamaihd.net/clips/zt/welt/pz/PZ4x6OtreF/PZ4x6OtreF1280x720-50p.mp4", //1280 + "https://rbprogressivedl-a.akamaihd.net/clips/zt/welt/pz/PZ4x6OtreF/PZ4x6OtreF1920x1080-50p.mp4" //1920 + ) + ), + + Arguments.of( + "https://rbhlsod-vh.akamaihd.net/i/,clips/zt/welt/zs/zShs2LSY8I/zShs2LSY8I1920x1080-50p.mp4,clips/zt/welt/zs/zShs2LSY8I/zShs2LSY8I1280x720-50p.mp4,clips/zt/welt/zs/zShs2LSY8I/zShs2LSY8I960x540-50p.mp4,clips/zt/welt/zs/zShs2LSY8I/zShs2LSY8I640x360-50p.mp4,.csmil/master.m3u8", + "https://rbprogressivedl-a.akamaihd.net/clips/zt/welt/zs/zShs2LSY8I/zShs2LSY8I1920x1080-50p.mp4", + List.of( + "https://rbprogressivedl-a.akamaihd.net/clips/zt/welt/zs/zShs2LSY8I/zShs2LSY8I640x360-50p.mp4", // 640 + "https://rbprogressivedl-a.akamaihd.net/clips/zt/welt/zs/zShs2LSY8I/zShs2LSY8I960x540-50p.mp4", // 960 + "https://rbprogressivedl-a.akamaihd.net/clips/zt/welt/zs/zShs2LSY8I/zShs2LSY8I1280x720-50p.mp4", // 1280 + "https://rbprogressivedl-a.akamaihd.net/clips/zt/welt/zs/zShs2LSY8I/zShs2LSY8I1920x1080-50p.mp4" // 1920 + ) + ), + + // ─────────────────────────────────────────────────────────────── + // 13. DRA Deutsches Rundfunkarchive + // ─────────────────────────────────────────────────────────────── + Arguments.of( + "https://dra-dd.akamaized.net/video/152870/349904/HLS/17Tagungder4VolkskammerderDDR-17Tagungder4VolkskammerderDDR_152870_349904_master.m3u8", + "", + List.of( + "https://dra-dd.akamaized.net/video/152870/349904/mp4/17Tagungder4VolkskammerderDDR-17Tagungder4VolkskammerderDDR_152870_349904_vod.360.MP4", + "https://dra-dd.akamaized.net/video/152870/349904/mp4/17Tagungder4VolkskammerderDDR-17Tagungder4VolkskammerderDDR_152870_349904_vod.540.MP4", + "https://dra-dd.akamaized.net/video/152870/349904/mp4/17Tagungder4VolkskammerderDDR-17Tagungder4VolkskammerderDDR_152870_349904_vod.720.MP4", + "https://dra-dd.akamaized.net/video/152870/349904/mp4/17Tagungder4VolkskammerderDDR-17Tagungder4VolkskammerderDDR_152870_349904_vod.1080.MP4" + ) + ), + + // ─────────────────────────────────────────────────────────────── + // 14. ZDF + // ─────────────────────────────────────────────────────────────── + Arguments.of( + "https://zdfvod.akamaized.net/i/mp4/none/zdf/25/12/251218_trailer_kudamm77_hero_kud/1/251218_trailer_kudamm77_hero_kud,_508k_p9,_808k_p11,_1628k_p13,_3328k_p15,_6628k_p61,v17.mp4.csmil/master.m3u8", + "https://nrodlzdf-a.akamaihd.net/none/zdf/25/12/251218_trailer_kudamm77_hero_kud/1/251218_trailer_kudamm77_hero_kud_508k_p9v17.mp4", + List.of( + "https://nrodlzdf-a.akamaihd.net/none/zdf/25/12/251218_trailer_kudamm77_hero_kud/1/251218_trailer_kudamm77_hero_kud_508k_p9v17.mp4", // 270 low + "https://nrodlzdf-a.akamaihd.net/none/zdf/25/12/251218_trailer_kudamm77_hero_kud/1/251218_trailer_kudamm77_hero_kud_808k_p11v17.mp4", // 360 high + "https://nrodlzdf-a.akamaihd.net/none/zdf/25/12/251218_trailer_kudamm77_hero_kud/1/251218_trailer_kudamm77_hero_kud_1628k_p13v17.mp4", // 540 veryhigh + "https://nrodlzdf-a.akamaihd.net/none/zdf/25/12/251218_trailer_kudamm77_hero_kud/1/251218_trailer_kudamm77_hero_kud_3328k_p15v17.mp4", // 720 hd + "https://nrodlzdf-a.akamaihd.net/none/zdf/25/12/251218_trailer_kudamm77_hero_kud/1/251218_trailer_kudamm77_hero_kud_6628k_p61v17.mp4" // 1080 fhd + ) + ) + + + ); + } +} diff --git a/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDeserializerTest.java b/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDeserializerTest.java index 48f1e51d2..91222db34 100644 --- a/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDeserializerTest.java +++ b/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfDayPageDeserializerTest.java @@ -29,7 +29,7 @@ public ZdfDayPageDeserializerTest( final String aJsonFile, final CrawlerUrlDTO[] aExpectedEntries, final Optional aExpectedNextPageUrl) { - target = new ZdfDayPageDeserializer(ZdfConstants.URL_API_BASE); + target = new ZdfDayPageDeserializer(ZdfConstants.URL_API_BASE, ZdfConstants.PARTNER_TO_SENDER); jsonFile = aJsonFile; expectedEntries = aExpectedEntries; diff --git a/src/test/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfDayPageTaskTest.java b/src/test/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfDayPageTaskTest.java index 2858d55ba..abbb4456f 100644 --- a/src/test/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfDayPageTaskTest.java +++ b/src/test/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfDayPageTaskTest.java @@ -72,7 +72,7 @@ public void testOverviewPageNotFound() { private Set executeTask(final String aRequestUrl) { return new ZdfDayPageTask( - createCrawler(), ZdfConstants.URL_API_BASE, createCrawlerUrlDto(aRequestUrl), null) + createCrawler(), ZdfConstants.URL_API_BASE, createCrawlerUrlDto(aRequestUrl), null, ZdfConstants.PARTNER_TO_SENDER) .invoke(); } } From 75f36a5dfbf497b4d08aab3848f501d950c9d405 Mon Sep 17 00:00:00 2001 From: CodingPF Date: Sun, 25 Jan 2026 11:12:26 +0100 Subject: [PATCH 16/23] Merge branch 'feature/ArdGroupStructure' into develop3 --- src/main/docker/runDocker | 8 ++++---- .../mserver/base/utils/FilmDBService.java | 9 ++++++++- .../base/utils/PostgreSQLDataSourceProvider.java | 8 ++++---- .../mserver/crawler/ard/ArdCrawler.java | 2 +- .../crawler/ard/json/ArdTeasersDeserializer.java | 12 ------------ .../mserver/crawler/ard/tasks/ArdTopicPageTask.java | 4 ++-- .../crawler/ard/tasks/ArdTopicsLetterTask.java | 2 +- .../ard/json/ArdTopicsLetterDeserializerTest.java | 10 +++++----- 8 files changed, 25 insertions(+), 30 deletions(-) diff --git a/src/main/docker/runDocker b/src/main/docker/runDocker index fdd073e48..989b93375 100644 --- a/src/main/docker/runDocker +++ b/src/main/docker/runDocker @@ -4,16 +4,16 @@ docker compose up -d postgresMV # cron 01 01 long run -docker compose run -d --rm -e MSERVER_OPTS="--config /config/MServer-Config-R1.yaml" mserver-r1 +docker compose run -d --rm -e MSERVER_OPTS="--config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R1.yaml" mserver-r1 # cron 06-22 1,31 short run -docker compose run -d --rm -e MSERVER_OPTS="--config /config/MServer-Config-R2.yaml" mserver-r2 +docker compose run -d --rm -e MSERVER_OPTS="--config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R2.yaml" mserver-r2 # cron 22 55 url check -docker compose run -d --rm -e MSERVER_OPTS="--config /config/MServer-Config-R2.yaml --flow checkAvailability" mserver-checkUrls +docker compose run -d --rm -e MSERVER_OPTS="--config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R2.yaml --flow checkAvailability" mserver-checkUrls # on demand - do not run this unless you know what you are doing! -docker compose run -d --rm -e MSERVER_OPTS="--config /config/MServer-Config-R2.yaml --flow importFilmlistIntoDB" mserver-r3 +docker compose run -d --rm -e MSERVER_OPTS="--config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R2.yaml --flow importFilmlistIntoDB" mserver-r3 ## docker save -o mserver.tar mediathekview/mserver:4.0.1-SNAPSHOT ## docker load -i mserver.tar diff --git a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java index 24a6144dd..f81610851 100644 --- a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java +++ b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java @@ -88,6 +88,7 @@ public void updateLastUrlCheck(List checked) { ps.addBatch(); } int [] rs = ps.executeBatch(); + con.commit(); for (int rsCode : rs) { updateCounter.addAndGet(rsCode); } @@ -124,6 +125,7 @@ public void deleteFilms(Collection abandonedFilmlist) { ps.addBatch(); } ps.executeBatch(); + con.commit(); } catch (SQLException e) { LOG.error(e); } @@ -203,6 +205,7 @@ public List filterNewVideos(Sender sender, List videos, Function List filterNewVideos(Sender sender, List videos, Function(result.subList(0, 200000)); + * + */ return result; } catch (Exception e) { LOG.error("{}", e); @@ -319,8 +326,8 @@ ON CONFLICT (id) DO UPDATE LOG.error("saveBatch - Missing ID for film {}", film); } } - ps.executeBatch(); + con.commit(); } return successCounter; } diff --git a/src/main/java/de/mediathekview/mserver/base/utils/PostgreSQLDataSourceProvider.java b/src/main/java/de/mediathekview/mserver/base/utils/PostgreSQLDataSourceProvider.java index af245fb58..d1daf8494 100644 --- a/src/main/java/de/mediathekview/mserver/base/utils/PostgreSQLDataSourceProvider.java +++ b/src/main/java/de/mediathekview/mserver/base/utils/PostgreSQLDataSourceProvider.java @@ -39,19 +39,19 @@ private void init() { cfg.setUsername(aMServerConfigManager.getConfig().getDatabaseConfig().getUsername()); cfg.setPassword(aMServerConfigManager.getConfig().getDatabaseConfig().getPassword()); - // === Pool Sizing (wichtig!) === - cfg.setMaximumPoolSize(16); // Sweet Spot für 10k+/min + // === Pool Sizing === + cfg.setMaximumPoolSize(50); cfg.setMinimumIdle(4); // === Performance === - cfg.setAutoCommit(true); + cfg.setAutoCommit(false); cfg.setConnectionTimeout(3000); cfg.setIdleTimeout(600_000); cfg.setMaxLifetime(1_800_000); // === PostgreSQL Optimierungen === - cfg.addDataSourceProperty("reWriteBatchedInserts", "true"); cfg.addDataSourceProperty("stringtype", "unspecified"); + cfg.addDataSourceProperty("defaultRowFetchSize", "10000"); // === Debug (optional) === cfg.setPoolName("CrawlerPool"); diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java index 6c039172c..685bd94a7 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java @@ -123,7 +123,7 @@ private ForkJoinTask> getTopicEntriesBySender(final String se Set senderSingleLetterUrls = forkJoinPool.submit( new ArdTopicsTask(this, sender, CreateLetterUrlQuery(sender))).get(); - LOG.debug("topics task result {}", senderSingleLetterUrls.size()); + //LOG.debug("topics task result {}", senderSingleLetterUrls.size()); return forkJoinPool.submit(new ArdTopicsLetterTask(this, sender, new ConcurrentLinkedQueue<>(senderSingleLetterUrls))); } diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTeasersDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTeasersDeserializer.java index 1cde6222d..2a927cd25 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTeasersDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTeasersDeserializer.java @@ -60,19 +60,7 @@ private ArdFilmInfoDto createFilmInfo(final String id, final int numberOfClips) if(id.contains(":")) { refId = id.replace(":", "%3A"); } - final String url = String.format(ArdConstants.ITEM_URL, refId); - - - - if (id.contains("a04c5a47-0801-40e5-b530-b7f9a4312be9:6898178275329995836") - || id.contains("Y3JpZDovL25kci5kZS9wcm9wbGFuXzE5NjM4MTA5N19nYW56ZVNlbmR1bmc") - || id.contains("1TDLUvc8cVEtcSb9GGsOnt:6898178275329995836") - || id.contains("6b64fc2c-4bd7-47ae-af6c-680e65b53b89") - ) { - System.out.println("stop"); - } - return new ArdFilmInfoDto(id, url, numberOfClips); } diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicPageTask.java b/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicPageTask.java index 95020d2ca..29b594882 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicPageTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicPageTask.java @@ -39,7 +39,7 @@ protected void processRestTarget(final CrawlerUrlDTO aDTO, final WebTarget aTarg && topicInfo.getFilmInfos() != null && !topicInfo.getFilmInfos().isEmpty()) { taskResults.addAll(topicInfo.getFilmInfos()); - LOG.debug("Found {} shows for a topic of ARD.", topicInfo.getFilmInfos().size()); + //LOG.debug("Found {} shows for a topic of ARD.", topicInfo.getFilmInfos().size()); final Queue subpages = createSubPageUrls(aTarget, topicInfo); if (!subpages.isEmpty()) { @@ -65,7 +65,7 @@ private Queue createSubPageUrls( break; } } - LOG.debug("Found {} subpage", subpages.size()); + //LOG.debug("Found {} subpage", subpages.size()); return subpages; } diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicsLetterTask.java b/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicsLetterTask.java index dbdfede34..47d834799 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicsLetterTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicsLetterTask.java @@ -42,7 +42,7 @@ protected AbstractRecursiveConverterTask createNew @Override protected void processRestTarget(final CrawlerUrlDTO aDTO, final WebTarget aTarget) { final PaginationUrlDto results = deserialize(aTarget, PAGINATION_URL_DTO_TYPE_TOKEN, aDTO); - LOG.debug("Found {} shows for {}.", results.getUrls().size(), sender); + //LOG.debug("Found {} shows for {}.", results.getUrls().size(), sender); taskResults.addAll(results.getUrls()); if (results.getActualPage() == 0 && results.getMaxPages() > 1) { diff --git a/src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicsLetterDeserializerTest.java b/src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicsLetterDeserializerTest.java index 11b54895f..ad3b0c209 100644 --- a/src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicsLetterDeserializerTest.java +++ b/src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicsLetterDeserializerTest.java @@ -19,15 +19,15 @@ public void testDeserialize() { final CrawlerUrlDTO[] expected = new CrawlerUrlDTO[] { new CrawlerUrlDTO( - "https://api.ardmediathek.de/page-gateway/widgets/ard/asset/Y3JpZDovL3JhZGlvYnJlbWVuLmRlL3NlbmRlcmVpaGVuL2lkX2J1dGVudW5iaW5uZW4?pageSize=50"), + "https://api.ardmediathek.de/page-gateway/pages/ard/grouping/Y3JpZDovL3JhZGlvYnJlbWVuLmRlL2J1dGVudW5iaW5uZW5nZWJhZXJkZW5zcHJhY2hl?embedded=true"), new CrawlerUrlDTO( - "https://api.ardmediathek.de/page-gateway/widgets/ard/asset/Y3JpZDovL3JhZGlvYnJlbWVuLmRlL3NlbmRlcmVpaGVuL2lkX3Nwb3J0YmxpdHo?pageSize=50"), + "https://api.ardmediathek.de/page-gateway/pages/ard/grouping/Y3JpZDovL3JhZGlvYnJlbWVuLmRlL3NlbmRlcmVpaGVuL2lkX3Nwb3J0YmxpdHo?embedded=true"), new CrawlerUrlDTO( - "https://api.ardmediathek.de/page-gateway/widgets/ard/asset/Y3JpZDovL3JhZGlvYnJlbWVuLmRlL3NlbmRlcmVpaGVuL2lkX2J1dGVudW5iaW5uZW53ZXR0ZXI?pageSize=50"), + "https://api.ardmediathek.de/page-gateway/pages/ard/grouping/Y3JpZDovL3JhZGlvYnJlbWVuLmRlL3NlbmRlcmVpaGVuL2lkX2J1dGVudW5iaW5uZW53ZXR0ZXI?embedded=true"), new CrawlerUrlDTO( - "https://api.ardmediathek.de/page-gateway/widgets/ard/asset/Y3JpZDovL3JhZGlvYnJlbWVuLmRlL3NlbmRlcmVpaGVuL2lkX2J1dGVudW5iaW5uZW51bTY?pageSize=50"), + "https://api.ardmediathek.de/page-gateway/pages/ard/grouping/Y3JpZDovL3JhZGlvYnJlbWVuLmRlL3NlbmRlcmVpaGVuL2lkX2J1dGVudW5iaW5uZW4?embedded=true"), new CrawlerUrlDTO( - "https://api.ardmediathek.de/page-gateway/widgets/ard/asset/Y3JpZDovL3JhZGlvYnJlbWVuLmRlL2J1dGVudW5iaW5uZW5nZWJhZXJkZW5zcHJhY2hl?pageSize=50"), + "https://api.ardmediathek.de/page-gateway/pages/ard/grouping/Y3JpZDovL3JhZGlvYnJlbWVuLmRlL3NlbmRlcmVpaGVuL2lkX2J1dGVudW5iaW5uZW51bTY?embedded=true"), }; final ArdTopicsLetterDeserializer instance = new ArdTopicsLetterDeserializer(); From 7b2199e84ee6708ec609ea397d4173e237a61c70 Mon Sep 17 00:00:00 2001 From: CodingPF Date: Sun, 25 Jan 2026 12:29:36 +0100 Subject: [PATCH 17/23] update ard crawler --- .../mserver/crawler/ard/ArdCrawler.java | 13 +++++++------ .../crawler/ard/json/ArdFilmDeserializer.java | 6 +++--- .../mserver/crawler/ard/tasks/ArdTaskBase.java | 1 - 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java index 685bd94a7..3fdaeb34f 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java @@ -13,6 +13,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.List; @@ -40,8 +41,13 @@ public Sender getSender() { private Queue createDayUrlsToCrawl() { final Queue dayUrlsToCrawl = new ConcurrentLinkedQueue<>(); final List days = DateUtils.generateDaysToCrawl(crawlerConfig); + // funk hat keine program übersicht + final String[] CLIENTS_WITHOUT_FUNK = + Arrays.stream(ArdConstants.CLIENTS) + .filter(c -> !"funk".equals(c)) + .toArray(String[]::new); days.forEach( dateString -> { - for (final String client : ArdConstants.CLIENTS) { + for (final String client : CLIENTS_WITHOUT_FUNK) { final String url = String.format(ArdConstants.DAY_PAGE_URL, dateString, client); dayUrlsToCrawl.offer(new CrawlerUrlDTO(url)); } @@ -57,7 +63,6 @@ protected RecursiveTask> createCrawlerTask() { forkJoinPool.submit(new ArdDayPageTask(this, createDayUrlsToCrawl())); final Set shows = dayTask.get(); - shows.clear(); printMessage( ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), shows.size()); @@ -74,12 +79,8 @@ protected RecursiveTask> createCrawlerTask() { assitUrls.addAll(forkJoinPool.submit(groupsToAsset).get()); LOG.debug("sender group assit tasks: {}", assitUrls.size()); - //test.add(new CrawlerUrlDTO("https://api.ardmediathek.de/page-gateway/widgets/swr/asset/Y3JpZDovL3N3ci5kZS8yNDEwMzY1MA?pageNumber=0&pageSize=48&embedded=true&seasoned=false&seasonNumber=&withAudiodescription=false&withOriginalWithSubtitle=false&withOriginalversion=false&single=false")); - test.add(new CrawlerUrlDTO("https://api.ardmediathek.de/page-gateway/widgets/wdr/asset/Y3JpZDovL3dkci5kZS93ZXN0cG9s?pageNumber=0&pageSize=48&embedded=true&seasoned=false&seasonNumber=&withAudiodescription=false&withOriginalWithSubtitle=false&withOriginalversion=false&single=false")); - final ArdTopicPageTask topicTask = new ArdTopicPageTask(this, new ConcurrentLinkedQueue<>(assitUrls)); - //new ArdTopicPageTask(this, new ConcurrentLinkedQueue<>(test)); final int showsCountBefore = shows.size(); shows.addAll(forkJoinPool.submit(topicTask).get()); diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java index 0440bf344..44f6f0ead 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java @@ -210,9 +210,9 @@ public List deserialize( date.orElse(null), duration.orElse(null), videoInfo.get())); - if (widgets.size() > 1) { - parseRelatedFilms(filmDto, widgets.get(1).getAsJsonObject()); - } + //if (widgets.size() > 1) { + //parseRelatedFilms(filmDto, widgets.get(1).getAsJsonObject()); + //} films.add(filmDto); } // OV - long term this should go into Film as "OV" diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTaskBase.java b/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTaskBase.java index ccab6a077..c9a3d09ba 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTaskBase.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTaskBase.java @@ -101,7 +101,6 @@ private Response executeRequest(final WebTarget aTarget) { } return request - .header("Accept-Encoding", "br, gzip, deflate, zstd") .header(HEADER_ACCEPT, APPLICATION_JSON) .header(HEADER_CONTENT_TYPE, APPLICATION_JSON) .get(); From cad68dab9916e3a7a79ce7297c650f3de8c0657d Mon Sep 17 00:00:00 2001 From: CodingPF Date: Sat, 14 Feb 2026 18:09:48 +0100 Subject: [PATCH 18/23] add more url sources #1123 --- .../orfon/json/OrfOnEpisodeDeserializer.java | 49 ++++++++++++++++--- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/src/main/java/de/mediathekview/mserver/crawler/orfon/json/OrfOnEpisodeDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/orfon/json/OrfOnEpisodeDeserializer.java index 2ede913f7..474e35132 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/orfon/json/OrfOnEpisodeDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/orfon/json/OrfOnEpisodeDeserializer.java @@ -22,6 +22,7 @@ import java.util.Collection; import java.util.EnumMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; @@ -43,7 +44,6 @@ public class OrfOnEpisodeDeserializer implements JsonDeserializer> optimizeUrls(Optional urlMap = urls.get(); final FilmUrl url = urlMap.get(Resolution.NORMAL); - final String urlToOptimize = url.getUrl().toString(); + String urlToOptimize = url.getUrl().toString(); + for (String s : List.of("QXA","QXB")) { + urlToOptimize = urlToOptimize.replace(s, "#Q#"); + } try { - urlMap.put(Resolution.SMALL, new FilmUrl(urlToOptimize.replace("QXA", "Q4A"), 0L)); - urlMap.put(Resolution.NORMAL, new FilmUrl(urlToOptimize.replace("QXA", "Q6A"), 0L)); - urlMap.put(Resolution.HD, new FilmUrl(urlToOptimize.replace("QXA", "Q8C"), 0L)); + urlMap.put(Resolution.SMALL, new FilmUrl(urlToOptimize.replace("#Q#", "Q4A"), 0L)); + urlMap.put(Resolution.NORMAL, new FilmUrl(urlToOptimize.replace("#Q#", "Q6A"), 0L)); + urlMap.put(Resolution.HD, new FilmUrl(urlToOptimize.replace("#Q#", "Q8C"), 0L)); } catch (MalformedURLException e) {} } return urls; @@ -215,7 +218,15 @@ private Optional> parseVideoFromSources(JsonElement roo } } } - return parseVideoFromThumbnail(root); + Optional> fallbackThumbnail = parseVideoFromThumbnail(root); + if (fallbackThumbnail.isPresent()) { + return fallbackThumbnail; + } + Optional> fallbackGapless = parseVideoFromGapless(root); + if (fallbackGapless.isPresent()) { + return fallbackGapless; + } + return Optional.empty(); } @@ -248,6 +259,32 @@ private Optional> parseVideoFromThumbnail(JsonElement r return Optional.of(urls); } + private Optional> parseVideoFromGapless(JsonElement root) { + Map urls = new EnumMap<>(Resolution.class); + try { + Optional gaplessSourceAT = JsonUtils.getElement(root, "gapless_sources_austria", "hls"); + if (gaplessSourceAT.isPresent()) { + gaplessSourceAT.get().getAsJsonArray().forEach( e -> { + Optional url = JsonUtils.getElementValueAsString(e, "src"); + Optional drm = JsonUtils.getElementValueAsString(e, "is_drm_protected"); + try { + if (url.isPresent() && drm.orElse("").equalsIgnoreCase("false")) { + urls.put(Resolution.NORMAL, new FilmUrl(url.get(), 0L)); + } + } catch (MalformedURLException err) { + LOG.error("Malformed video url {} {}", url, err); + } + }); + } + } catch (Exception e) { + LOG.error("generateFallbackVideo {}", e); + } + if (urls.size() == 0) { + return Optional.empty(); + } + return Optional.of(urls); + } + private Optional> readVideoForTargetCodec(JsonElement urlArray, String targetCodec) { Map urls = new EnumMap<>(Resolution.class); for (JsonElement videoElement : urlArray.getAsJsonArray()) { From 81e7884decb06862f484efe86dffb0dddfe5e5ed Mon Sep 17 00:00:00 2001 From: CodingPF Date: Mon, 16 Feb 2026 18:13:56 +0100 Subject: [PATCH 19/23] remove stacktrace from srf website url rebuild, limit number of films --- .../mserver/base/utils/FilmDBService.java | 7 ++++--- .../mserver/crawler/ard/ArdCrawler.java | 1 - .../crawler/srf/parser/SrfFilmJsonDeserializer.java | 12 ++++++++++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java index f81610851..e06dbbc50 100644 --- a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java +++ b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java @@ -224,9 +224,10 @@ public List filterNewVideos(Sender sender, List videos, Function(result.subList(0, 200000)); - * - */ + if(result.size() > 100_000) { + LOG.debug("CARP - reduced number of films to 100000 in one job"); + } + result = new ArrayList<>(result.subList(0, 100_000)); return result; } catch (Exception e) { LOG.error("{}", e); diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java index 3fdaeb34f..7529719bf 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java @@ -57,7 +57,6 @@ private Queue createDayUrlsToCrawl() { @Override protected RecursiveTask> createCrawlerTask() { - ConcurrentLinkedQueue test = new ConcurrentLinkedQueue<>(); try { final ForkJoinTask> dayTask = forkJoinPool.submit(new ArdDayPageTask(this, createDayUrlsToCrawl())); diff --git a/src/main/java/de/mediathekview/mserver/crawler/srf/parser/SrfFilmJsonDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/srf/parser/SrfFilmJsonDeserializer.java index 90c8955e6..ead6d4cba 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/srf/parser/SrfFilmJsonDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/srf/parser/SrfFilmJsonDeserializer.java @@ -97,7 +97,9 @@ private static Optional buildWebsiteUrl( try { return Optional.of(URI.create(url).toURL()); } catch (final MalformedURLException ex) { - LOG.error(String.format("The website url \"%s\" isn't valid.", url), ex); + LOG.debug("The website url {} isn't valid", url); + //LOG.error(String.format("The website url \"%s\" isn't valid.", url), ex); + } return Optional.empty(); @@ -106,7 +108,12 @@ private static Optional buildWebsiteUrl( private static String replaceCharForUrl(final String aValue) { return aValue .toLowerCase() + .replaceAll("\\p{C}", "") // entfernt ALLE Control Char .replace(' ', '-') + .replace("ä", "ae") + .replace("ü", "ue") + .replace("ö", "oe") + .replace("ß", "ss") .replace('.', '-') .replace(',', '-') .replace(":", "") @@ -120,9 +127,10 @@ private static String replaceCharForUrl(final String aValue) { .replace("«", "") .replace("»", "") .replace(" ", "") + .replace("--", "-") .replace("--", "-"); } - + private static String parseShow(final JsonObject aJsonObject) { if (aJsonObject.has(ELEMENT_SHOW)) { final JsonElement showElement = aJsonObject.get(ELEMENT_SHOW); From bc19de5a1e958b96b35e232cd6d7542e2780d112 Mon Sep 17 00:00:00 2001 From: CodingPF Date: Mon, 16 Feb 2026 21:28:57 +0100 Subject: [PATCH 20/23] typo --- .../java/de/mediathekview/mserver/base/utils/FilmDBService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java index e06dbbc50..3897b0fe9 100644 --- a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java +++ b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java @@ -226,8 +226,8 @@ public List filterNewVideos(Sender sender, List videos, Function 100_000) { LOG.debug("CARP - reduced number of films to 100000 in one job"); + result = new ArrayList<>(result.subList(0, 100000)); } - result = new ArrayList<>(result.subList(0, 100_000)); return result; } catch (Exception e) { LOG.error("{}", e); From e659c60be341956414da660b93a57c0fcb62c100 Mon Sep 17 00:00:00 2001 From: CodingPF Date: Fri, 20 Feb 2026 18:35:54 +0100 Subject: [PATCH 21/23] http 429 in jsonresttask --- .../crawler/arte/tasks/ArteVideoInfoTask.java | 4 +- .../crawler/arte/tasks/ArteVideoLinkTask.java | 1 - .../crawler/basic/AbstractJsonRestTask.java | 78 +++++++++++++++---- 3 files changed, 65 insertions(+), 18 deletions(-) diff --git a/src/main/java/de/mediathekview/mserver/crawler/arte/tasks/ArteVideoInfoTask.java b/src/main/java/de/mediathekview/mserver/crawler/arte/tasks/ArteVideoInfoTask.java index 36b81c9a5..3bfa3a078 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/arte/tasks/ArteVideoInfoTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/arte/tasks/ArteVideoInfoTask.java @@ -64,7 +64,9 @@ protected void postProcessingNextPage(PagedElementListDTO aRes protected void postProcessingElements(Set elements, TopicUrlDTO aDTO) { for (ArteVideoInfoDto element : elements) { - taskResults.add(element); + if(!taskResults.add(element)) { + log.debug("Duplicate film"); + } } } diff --git a/src/main/java/de/mediathekview/mserver/crawler/arte/tasks/ArteVideoLinkTask.java b/src/main/java/de/mediathekview/mserver/crawler/arte/tasks/ArteVideoLinkTask.java index 724938bcf..ede10fc40 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/arte/tasks/ArteVideoLinkTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/arte/tasks/ArteVideoLinkTask.java @@ -18,7 +18,6 @@ import de.mediathekview.mserver.crawler.basic.AbstractCrawler; import de.mediathekview.mserver.crawler.basic.AbstractJsonRestTask; import de.mediathekview.mserver.crawler.basic.AbstractRecursiveConverterTask; -import de.mediathekview.mserver.crawler.basic.PagedElementListDTO; import jakarta.ws.rs.core.Response; //return T Class from this task, desirialisation of class R , D , Reasearch in this url diff --git a/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractJsonRestTask.java b/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractJsonRestTask.java index 8cbd2f197..c18ee4b77 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractJsonRestTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractJsonRestTask.java @@ -13,12 +13,16 @@ import java.util.Optional; import java.util.Queue; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + import static jakarta.ws.rs.core.HttpHeaders.ACCEPT_CHARSET; import static jakarta.ws.rs.core.HttpHeaders.ACCEPT_ENCODING; /** A abstract REST api task which requests the given url with the Funk Api settings. */ public abstract class AbstractJsonRestTask extends AbstractRestTask { + protected final transient Logger log = LogManager.getLogger(this.getClass()); protected static final String ENCODING_GZIP = "gzip"; private static final long serialVersionUID = -1090560363478964885L; protected final transient GsonBuilder gsonBuilder; @@ -41,24 +45,66 @@ protected AbstractJsonRestTask( @Override protected void processRestTarget(final D aDTO, final WebTarget aTarget) { - gsonBuilder.registerTypeAdapter(getType(), getParser(aDTO)); - final Gson gson = gsonBuilder.create(); - Builder request = aTarget.request(); - final Optional authKey = getAuthKey(); - if (authKey.isPresent()) { - request = request.header(HEADER_AUTHORIZATION, authKey.get()); - } + gsonBuilder.registerTypeAdapter(getType(), getParser(aDTO)); + final Gson gson = gsonBuilder.create(); - final Response response = createResponse(request, aDTO); - - if (response.getStatus() == 200) { - final String jsonOutput = response.readEntity(String.class); - final R responseObj = gson.fromJson(jsonOutput, getType()); - postProcessing(responseObj, aDTO); - } else { - handleHttpError(aDTO, aTarget.getUri(), response); - } + Builder request = aTarget.request(); + final Optional authKey = getAuthKey(); + if (authKey.isPresent()) { + request = request.header(HEADER_AUTHORIZATION, authKey.get()); + } + final int maxRetries = 3; + int attempt = 0; + while (attempt < maxRetries) { + attempt++; + Response response = null; + try { + response = createResponse(request, aDTO); + int status = response.getStatus(); + if (status == 200) { + final String jsonOutput = response.readEntity(String.class); + final R responseObj = gson.fromJson(jsonOutput, getType()); + postProcessing(responseObj, aDTO); + return; + } + if (status == 429 && attempt < maxRetries) { + final long proposalWaitMillis = getRetryAfterMillis(response).orElse(1000L) * attempt; + long waitMillis = proposalWaitMillis; + if (waitMillis < 100) { + waitMillis = 100; + } else if (waitMillis > 180000 ) { + waitMillis = 180000; + } + //log.debug("Too Many Requests - propsoal: {} waiting: {} ", proposalWaitMillis, waitMillis); + Thread.sleep(waitMillis); + continue; + } + handleHttpError(aDTO, aTarget.getUri(), response); + return; + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Retry interrupted", e); + } finally { + if (response != null) { + response.close(); + } + } + } } + + private Optional getRetryAfterMillis(Response response) { + String retryAfter = response.getHeaderString("Retry-After"); + if (retryAfter == null) { + return Optional.empty(); + } + try { + long seconds = Long.parseLong(retryAfter); + return Optional.of(seconds * 1000); + } catch (NumberFormatException e) { + return Optional.empty(); + } +} + protected Response createResponse(final Builder request, final D aDTO) { request.header(ACCEPT_CHARSET, StandardCharsets.UTF_8); From 3b0398f4e85bd162c2bc988c6fb92d41d0217d09 Mon Sep 17 00:00:00 2001 From: CodingPF Date: Sat, 21 Feb 2026 01:03:19 +0100 Subject: [PATCH 22/23] kika geo, dw old resolutions, ard fix typo, testcase and remove related --- .../mserver/crawler/ard/json/ArdFilmDeserializer.java | 8 ++++---- .../crawler/dw/parser/DwFilmDetailDeserializer.java | 8 +++++++- .../mserver/crawler/kika/tasks/KikaApiFilmTask.java | 10 ++++++++-- .../crawler/ard/json/ArdFilmDeserializerTest.java | 2 +- 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java index 44f6f0ead..8e58e868d 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java @@ -248,10 +248,10 @@ private Optional> fallbackToM3U(Optional newUrls.put(key, value.toString())); // // TODO: FIXME - if (!resolutionUrlMapFromM3U.containsKey(Resolution.NORMAL)) { - Resolution anyResolution = resolutionUrlMapFromM3U.keySet().stream().findFirst().get(); - resolutionUrlMapFromM3U.put(Resolution.NORMAL, resolutionUrlMapFromM3U.get(anyResolution)); - resolutionUrlMapFromM3U.remove(anyResolution); + if (!newUrls.containsKey(Resolution.NORMAL)) { + Resolution anyResolution = newUrls.keySet().stream().findFirst().get(); + newUrls.put(Resolution.NORMAL, newUrls.get(anyResolution)); + newUrls.remove(anyResolution); } return Optional.of(newUrls); } diff --git a/src/main/java/de/mediathekview/mserver/crawler/dw/parser/DwFilmDetailDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/dw/parser/DwFilmDetailDeserializer.java index cda5be96c..fe0852e12 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/dw/parser/DwFilmDetailDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/dw/parser/DwFilmDetailDeserializer.java @@ -245,8 +245,14 @@ private Optional> getVideos( videoListe.remove(Resolution.HD); } } + // TODO: add more resolutions etc - but for backwards compatibility - set to the current once + final Map videoListeBackwardsCompat = new ConcurrentHashMap<>(); + videoListeBackwardsCompat.put(Resolution.HD, videoListe.get(Resolution.WQHD)); + videoListeBackwardsCompat.put(Resolution.NORMAL, videoListe.get(Resolution.NORMAL)); + videoListeBackwardsCompat.put(Resolution.SMALL, videoListe.get(Resolution.SMALL)); + if (videoListe.size() > 0) { - return Optional.of(videoListe); + return Optional.of(videoListeBackwardsCompat); } LOG.error("No video url for video: {}", videoid); return Optional.empty(); diff --git a/src/main/java/de/mediathekview/mserver/crawler/kika/tasks/KikaApiFilmTask.java b/src/main/java/de/mediathekview/mserver/crawler/kika/tasks/KikaApiFilmTask.java index 7460fd79d..d0e5459e9 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/kika/tasks/KikaApiFilmTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/kika/tasks/KikaApiFilmTask.java @@ -11,6 +11,7 @@ import java.util.Collection; import java.util.EnumMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Queue; @@ -118,8 +119,13 @@ protected void postProcessing(KikaApiVideoInfoDto aResponseObj, KikaApiFilmDto a aFilm.setUrls(getVideoUrls(aResponseObj, aDTO)); aFilm.addAllSubtitleUrls(getSubtitle(aResponseObj, aDTO)); // - - + if(aFilm.getDefaultUrl().isPresent() && ( + aFilm.getDefaultUrl().get().getUrl().toString().contains("/dach/") || + aFilm.getDefaultUrl().get().getUrl().toString().contains("/deChAt/")) + ) { + aFilm.setGeoLocations(List.of(GeoLocations.GEO_DE_AT_CH)); + } + // if (!taskResults.add(aFilm)) { LOG.debug("Rejected duplicate {}",aFilm); crawler.incrementAndGetErrorCount(); diff --git a/src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializerTest.java b/src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializerTest.java index 006b87b5a..51ddca289 100644 --- a/src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializerTest.java +++ b/src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializerTest.java @@ -422,7 +422,7 @@ public void test() { expectedADUrlNormal, expectedADUrlHd, expectedSubtitle); - assertThat(films[0].getRelatedFilms(), Matchers.containsInAnyOrder(relatedFilms)); + //assertThat(films[0].getRelatedFilms(), Matchers.containsInAnyOrder(relatedFilms)); } } From 129e281fdd9b71074ab02237d7ba0b0fe8c33811 Mon Sep 17 00:00:00 2001 From: CodingPF Date: Sat, 14 Mar 2026 19:57:58 +0100 Subject: [PATCH 23/23] add ratelimiter, cleanup comments, fix ard test, DW backwards compat fix --- .../crawler/ard/json/ArdFilmDeserializer.java | 26 ------------- .../crawler/ard/tasks/ArdTopicPageTask.java | 2 - .../crawler/basic/AbstractJsonRestTask.java | 1 + .../dw/parser/DwFilmDetailDeserializer.java | 8 +--- .../ard/json/ArdFilmDeserializerTest.java | 39 ------------------- 5 files changed, 2 insertions(+), 74 deletions(-) diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java index 8e58e868d..aabf7c52a 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java @@ -210,9 +210,6 @@ public List deserialize( date.orElse(null), duration.orElse(null), videoInfo.get())); - //if (widgets.size() > 1) { - //parseRelatedFilms(filmDto, widgets.get(1).getAsJsonObject()); - //} films.add(filmDto); } // OV - long term this should go into Film as "OV" @@ -297,23 +294,6 @@ private Optional parsePartner(final JsonObject playerPageObject) { return Optional.empty(); } - private void parseRelatedFilms(final ArdFilmDto filmDto, final JsonObject playerPageObject) { - if (playerPageObject.has(ELEMENT_TEASERS)) { - final JsonElement teasersElement = playerPageObject.get(ELEMENT_TEASERS); - if (teasersElement.isJsonArray()) { - for (final JsonElement teasersItemElement : teasersElement.getAsJsonArray()) { - final JsonObject teasersItemObject = teasersItemElement.getAsJsonObject(); - final Optional id = - JsonUtils.getAttributeAsString(teasersItemObject, ATTRIBUTE_ID); - if (id.isPresent()) { - final String url = String.format(ArdConstants.ITEM_URL, id.get()); - filmDto.addRelatedFilm(new ArdFilmInfoDto(id.get(), url, 0)); - } - } - } - } - } - private Film createFilm( final String id, final Sender sender, @@ -446,12 +426,6 @@ private Optional parseVideos(final JsonObject playerPageObject, } } - /* - Optional> tt = parseVideoUrlMap(playerPageObject, MARKER_VIDEO_CATEGORY_MAIN, MARKER_VIDEO_STANDARD, MARKER_VIDEO_MP4, MARKER_VIDEO_DE); - String a = videoInfoAdaptive.get().entrySet().stream().findFirst().get().getValue(); - if(tt.isPresent() && !a.startsWith("https://funk") && !a.contains("arte") ) - //UrlOptimizer.debug(a, tt.get()); - urlOptimizer.debug2(a, videoInfoStandard.get());*/ } return Optional.of(allVideoUrls); diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicPageTask.java b/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicPageTask.java index 29b594882..64ce1ad1b 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicPageTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicPageTask.java @@ -39,7 +39,6 @@ protected void processRestTarget(final CrawlerUrlDTO aDTO, final WebTarget aTarg && topicInfo.getFilmInfos() != null && !topicInfo.getFilmInfos().isEmpty()) { taskResults.addAll(topicInfo.getFilmInfos()); - //LOG.debug("Found {} shows for a topic of ARD.", topicInfo.getFilmInfos().size()); final Queue subpages = createSubPageUrls(aTarget, topicInfo); if (!subpages.isEmpty()) { @@ -65,7 +64,6 @@ private Queue createSubPageUrls( break; } } - //LOG.debug("Found {} subpage", subpages.size()); return subpages; } diff --git a/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractJsonRestTask.java b/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractJsonRestTask.java index c18ee4b77..2654b92bf 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractJsonRestTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/basic/AbstractJsonRestTask.java @@ -77,6 +77,7 @@ protected void processRestTarget(final D aDTO, final WebTarget aTarget) { } //log.debug("Too Many Requests - propsoal: {} waiting: {} ", proposalWaitMillis, waitMillis); Thread.sleep(waitMillis); + crawler.getRateLimiter().acquire(); continue; } handleHttpError(aDTO, aTarget.getUri(), response); diff --git a/src/main/java/de/mediathekview/mserver/crawler/dw/parser/DwFilmDetailDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/dw/parser/DwFilmDetailDeserializer.java index fe0852e12..cda5be96c 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/dw/parser/DwFilmDetailDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/dw/parser/DwFilmDetailDeserializer.java @@ -245,14 +245,8 @@ private Optional> getVideos( videoListe.remove(Resolution.HD); } } - // TODO: add more resolutions etc - but for backwards compatibility - set to the current once - final Map videoListeBackwardsCompat = new ConcurrentHashMap<>(); - videoListeBackwardsCompat.put(Resolution.HD, videoListe.get(Resolution.WQHD)); - videoListeBackwardsCompat.put(Resolution.NORMAL, videoListe.get(Resolution.NORMAL)); - videoListeBackwardsCompat.put(Resolution.SMALL, videoListe.get(Resolution.SMALL)); - if (videoListe.size() > 0) { - return Optional.of(videoListeBackwardsCompat); + return Optional.of(videoListe); } LOG.error("No video url for video: {}", videoid); return Optional.empty(); diff --git a/src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializerTest.java b/src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializerTest.java index 51ddca289..b1de5db1c 100644 --- a/src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializerTest.java +++ b/src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializerTest.java @@ -5,14 +5,11 @@ import de.mediathekview.mserver.daten.Sender; import de.mediathekview.mserver.base.messages.listener.MessageListener; import de.mediathekview.mserver.base.config.MServerConfigManager; -import de.mediathekview.mserver.crawler.ard.ArdConstants; import de.mediathekview.mserver.crawler.ard.ArdCrawler; import de.mediathekview.mserver.crawler.ard.ArdFilmDto; -import de.mediathekview.mserver.crawler.ard.ArdFilmInfoDto; import de.mediathekview.mserver.progress.listeners.SenderProgressListener; import de.mediathekview.mserver.testhelper.AssertFilm; import de.mediathekview.mserver.testhelper.JsonFileReader; -import org.hamcrest.Matchers; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -45,7 +42,6 @@ public class ArdFilmDeserializerTest { private final String expectedDGSUrlHd; private final String expectedSubtitle; private final GeoLocations expectedGeo; - private final ArdFilmInfoDto[] relatedFilms; private final Optional additionalSender; protected MServerConfigManager rootConfig = new MServerConfigManager("MServer-JUnit-Config.yaml"); @@ -68,7 +64,6 @@ public ArdFilmDeserializerTest( final String expectedDGSUrlHd, final String expectedSubtitle, final GeoLocations expectedGeo, - final ArdFilmInfoDto[] relatedFilms, final Optional additionalSender) { this.jsonFile = jsonFile; this.expectedTopic = expectedTopic; @@ -87,7 +82,6 @@ public ArdFilmDeserializerTest( this.expectedDGSUrlHd = expectedDGSUrlHd; this.expectedSubtitle = expectedSubtitle; this.expectedGeo = expectedGeo; - this.relatedFilms = relatedFilms; this.additionalSender = additionalSender; } @@ -113,7 +107,6 @@ public static Collection data() { /*DGShd*/ "https://mediandr-a.akamaihd.net/progressive_geo/2022/0104/TV-20220104-0902-5000.hd.mp4", /*sub*/ "https://api.ardmediathek.de/player-service/subtitle/ebutt/urn:ard:subtitle:eaa2ed13a677cd00", /*hd*/ GeoLocations.GEO_DE, - /*related*/ new ArdFilmInfoDto[0], /*sender*/ Optional.of(Sender.KIKA) }, { @@ -134,16 +127,6 @@ public static Collection data() { /*DGShd*/ "https://pd-videos.daserste.de/int/2024/01/24/03247ab1-4dcc-427e-b577-a6ca25c1dffe/JOB_432151_sendeton_1920x1080-50p-5000kbit.mp4", /*sub*/ "https://api.ardmediathek.de/player-service/subtitle/webvtt/urn:ard:subtitle:7b0043ec0b358eb8.vtt", /*hd*/ GeoLocations.GEO_NONE, - /*related*/ new ArdFilmInfoDto[] { - new ArdFilmInfoDto( - "Y3JpZDovL3dkci5kZS9CZWl0cmFnLThlNjczODVlLWZhZTktNDMwYi1iNzI1LTA0NjU1ZmRmMDljZQ", - String.format(ArdConstants.ITEM_URL, "Y3JpZDovL3dkci5kZS9CZWl0cmFnLThlNjczODVlLWZhZTktNDMwYi1iNzI1LTA0NjU1ZmRmMDljZQ"), - 0), - new ArdFilmInfoDto( - "Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtZWRmMTRhM2UtNmM3Ny00NGZhLTg1ZWYtYTJkYmZmNzM0NTg5", - String.format(ArdConstants.ITEM_URL, "Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtZWRmMTRhM2UtNmM3Ny00NGZhLTg1ZWYtYTJkYmZmNzM0NTg5"), - 0) - }, /*sender*/ Optional.of(Sender.ARD) }, { @@ -164,12 +147,6 @@ public static Collection data() { /*DGShd */ "", /*sub*/ "", /*hd*/ GeoLocations.GEO_NONE, - /*related*/ new ArdFilmInfoDto[] { - new ArdFilmInfoDto( - "Y3JpZDovL2JyLmRlL3ZpZGVvLzkwZTA1Y2Y5LTA4ZDEtNGU4Zi1iNTQyLWNiYjIyYzcyZDA0Mw", - String.format(ArdConstants.ITEM_URL, "Y3JpZDovL2JyLmRlL3ZpZGVvLzkwZTA1Y2Y5LTA4ZDEtNGU4Zi1iNTQyLWNiYjIyYzcyZDA0Mw"), - 0) - }, /*sender*/ Optional.of(Sender.BR) }, { @@ -190,12 +167,6 @@ public static Collection data() { /*DGShd */ "", /*sub*/ "https://api.ardmediathek.de/player-service/subtitle/webvtt/urn:ard:subtitle:c09c9cee3bf53db8.vtt", /*hd*/ GeoLocations.GEO_NONE, - /*related*/ new ArdFilmInfoDto[] { - new ArdFilmInfoDto( - "Y3JpZDovL3RhZ2Vzc2NoYXUuZGUvNTBjOTc0OGUtMTIwYi00MjllLWI2ODEtZTkyMTY5ODEyNGI0X2dhbnplU2VuZHVuZw", - String.format(ArdConstants.ITEM_URL, "Y3JpZDovL3RhZ2Vzc2NoYXUuZGUvNTBjOTc0OGUtMTIwYi00MjllLWI2ODEtZTkyMTY5ODEyNGI0X2dhbnplU2VuZHVuZw"), - 0) - }, /*sender*/ Optional.of(Sender.ARD), }, { @@ -216,7 +187,6 @@ public static Collection data() { /*DGShd */ "", /*sub*/ "", /*hd*/ GeoLocations.GEO_NONE, - /*related*/ new ArdFilmInfoDto[0], /*sender*/ Optional.of(Sender.HR), }, { @@ -237,7 +207,6 @@ public static Collection data() { /*DGShd */ "", /*sub*/ "https://api.ardmediathek.de/player-service/subtitle/webvtt/urn:ard:subtitle:ea9ad6b71df1b8ed.vtt", /*hd*/ GeoLocations.GEO_NONE, - /*related*/ new ArdFilmInfoDto[0], /*sender*/ Optional.of(Sender.NDR), }, { @@ -258,7 +227,6 @@ public static Collection data() { /*DGShd */ "", /*sub*/ "https://api.ardmediathek.de/player-service/subtitle/webvtt/urn:ard:subtitle:0567b031db73e4b9.vtt", /*hd*/ GeoLocations.GEO_DE, - /*related*/ new ArdFilmInfoDto[0], /*sender*/ Optional.of(Sender.ONE), }, { @@ -279,7 +247,6 @@ public static Collection data() { /*DGShd */ "", /*sub*/ "", /*hd*/ GeoLocations.GEO_NONE, - /*related*/ new ArdFilmInfoDto[0], /*sender*/ Optional.of(Sender.RBB), }, { @@ -300,7 +267,6 @@ public static Collection data() { /*DGShd */ "", /*sub*/ "https://api.ardmediathek.de/player-service/subtitle/webvtt/urn:ard:subtitle:a1d11ac623c7d120.vtt", /*hd*/ GeoLocations.GEO_NONE, - /*related*/ new ArdFilmInfoDto[0], /*sender*/ Optional.of(Sender.ARD), }, { @@ -321,7 +287,6 @@ public static Collection data() { /*DGShd */ "", /*sub*/ "https://api.ardmediathek.de/player-service/subtitle/ebutt/urn:ard:subtitle:7d1c01087f8cae77", /*hd*/ GeoLocations.GEO_DE, - /*related*/ new ArdFilmInfoDto[0], /*sender*/ Optional.of(Sender.MDR), }, { @@ -342,7 +307,6 @@ public static Collection data() { /*DGShd */ "", /*sub*/ "", /*hd*/ GeoLocations.GEO_DE, - /*related*/ new ArdFilmInfoDto[0], /*sender*/ Optional.of(Sender.SWR), }, { @@ -363,7 +327,6 @@ public static Collection data() { /*DGShd */ "", /*sub*/ "https://api.ardmediathek.de/player-service/subtitle/webvtt/urn:ard:subtitle:efab8bf55007171e.vtt", /*hd*/ GeoLocations.GEO_DE, - /*related*/ new ArdFilmInfoDto[0], /*sender*/ Optional.of(Sender.ARD), }, { @@ -384,7 +347,6 @@ public static Collection data() { /*DGShd */ "", /*sub*/ "https://api.ardmediathek.de/player-service/subtitle/webvtt/urn:ard:subtitle:d0e38dd26e6cc85e.vtt", /*hd*/ GeoLocations.GEO_DE, - /*related*/ new ArdFilmInfoDto[0], /*sender*/ Optional.of(Sender.ONE), } }); @@ -422,7 +384,6 @@ public void test() { expectedADUrlNormal, expectedADUrlHd, expectedSubtitle); - //assertThat(films[0].getRelatedFilms(), Matchers.containsInAnyOrder(relatedFilms)); } }