diff --git a/MServer-Config.yaml b/MServer-Config.yaml
index 93925bd50..d4684761d 100644
--- a/MServer-Config.yaml
+++ b/MServer-Config.yaml
@@ -1,5 +1,15 @@
#### Server configurations ####
+# Film DB
+databaseConfig:
+ active: false
+ url: "jdbc:postgresql://localhost:55432/crawler"
+ username: "crawler"
+ password: "secret"
+ refreshIntervalInDays: 7
+ checkUrlIntervalInDays: 3
+ batchSize: 2000
+
# The maximum amount of cpu threads to be used.
maximumCpuThreads: 10
@@ -17,17 +27,16 @@ maximumRequestsPerSecond: 999.0
# If set only these Sender will be crawled all other will be ignored.
senderIncluded:
- #- ARD
- - ARTE_DE
- - ARTE_FR
- - ARTE_PL
- - ARTE_IT
- - ARTE_ES
- - ARTE_EN
+ - ARD
+ #- ARTE_DE
+ #- ARTE_FR
+ #- ARTE_PL
+ #- ARTE_IT
+ #- ARTE_ES
+ #- ARTE_EN
#- DREISAT
- #- FUNK
#- KIKA
- # - DW
+ #- DW
#- ORF
#- PHOENIX
#- SRF
@@ -99,8 +108,8 @@ filmlistIdFilePath: target/filmlists/filmlist.id.xx
# import additional filmlist sources
importFilmlistConfigurations :
- - active: false
- path: "someCrawlerlist.json"
+ - active: true
+ path: "Filmliste-akt"
format: OLD_JSON
createDiff: false
checkImportListUrl: false
@@ -113,7 +122,17 @@ importFilmlistConfigurations :
path: "https://verteiler1.mediathekview.de/filme-org.xz"
format: OLD_JSON_COMPRESSED_XZ
createDiff: true
- checkImportListUrl: true
+ checkImportListUrl: true
+ - active: false
+ path: "https://verteiler1.mediathekview.de/Filmliste-akt.xz"
+ format: OLD_JSON_COMPRESSED_XZ
+ createDiff: false
+ checkImportListUrl: false
+ - active: false
+ path: jdbc
+ format: OLD_JSON
+ createDiff: false
+ checkImportListUrl: false
# film url is consider invalid if the size is below the minSize
checkImportListUrlMinSize: 5012
@@ -141,7 +160,7 @@ maximumSubpages: 5
maximumDaysForSendungVerpasstSection: 7
# The maximum amount of days going to future will be crawled for the "Sendung Verpasst?" section.
-maximumDaysForSendungVerpasstSectionFuture: 0
+maximumDaysForSendungVerpasstSectionFuture: 3
# The time in seconds before a socket connection should time out.
socketTimeoutInSeconds: 60
@@ -151,11 +170,12 @@ socketTimeoutInSeconds: 60
senderConfigurations:
ARD:
# Actually the ARD has a maximum of 6 days in the past
- maximumDaysForSendungVerpasstSection: 1
+ maximumDaysForSendungVerpasstSection: 6
+ maximumDaysForSendungVerpasstSectionFuture: 6
#2,4,8 ok
maximumUrlsPerTask: 32
#10,20,40 ok
- maximumSubpages: 0
+ maximumSubpages: 40
ORF:
maximumRequestsPerSecond: 10.0
ARTE_DE:
@@ -171,7 +191,7 @@ senderConfigurations:
ARTE_ES:
maximumSubpages: 6
KIKA:
- maximumSubpages: 2
+ maximumSubpages: 4
maximumRequestsPerSecond: 8.0
ZDF:
maximumDaysForSendungVerpasstSection: 21
@@ -179,12 +199,13 @@ senderConfigurations:
FUNK:
maximumUrlsPerTask: 99
DREISAT:
- maximumSubpages: 5
- maximumDaysForSendungVerpasstSection: 60
+ maximumSubpages: 15
+ maximumDaysForSendungVerpasstSection: 30
+ maximumDaysForSendungVerpasstSectionFuture: 20
PHOENIX:
maximumSubpages: 500
SRF:
- maximumSubpages: 1
+ maximumSubpages: 25
#### COPY ####
copySettings:
@@ -206,6 +227,8 @@ copySettings:
# JSON_COMPRESSED: /var/www/mediathekview/filmlisten/filmliste_diff.json.xz
OLD_JSON_COMPRESSED_XZ: copyTarget/filmliste_old_diff.json.xz
+
+
#### Logging ####
logSettings:
# The log level for the console.
@@ -240,4 +263,4 @@ logSettings:
# The pattern of the file name of the archived log files.
# See: https://logging.apache.org/log4j/2.0/manual/appenders.html#RollingFileAppender
- logFileRollingPattern: logs/${date:yyyy-MM}/server-%d{MM-dd-yyyy}-%i.log
\ No newline at end of file
+ logFileRollingPattern: logs/${date:yyyy-MM}/server-%d{MM-dd-yyyy-HH}-%i.log
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index e4e6d34dc..c56c407ac 100644
--- a/pom.xml
+++ b/pom.xml
@@ -93,7 +93,7 @@
2.35.2
1.10
0.9.2
- 0.40.2
+ 0.48.0
3.2.0
@@ -125,6 +125,20 @@
+
+
+ org.postgresql
+ postgresql
+ 42.7.4
+
+
+
+
+ com.zaxxer
+ HikariCP
+ 5.1.0
+
+
org.apache.commons
commons-compress
@@ -486,16 +500,14 @@
-
io.fabric8
docker-maven-plugin
${docker-maven-plugin.version}
-
+
mediathekview/mserver:${project.version}
-
eclipse-temurin:${maven.compiler.target}
Nicklas Wiegandt <nicklas@wiegandt.eu>
@@ -503,7 +515,7 @@
docker-assembly.xml
- -Xmx4G
+ -Xmx8G
config.yaml
diff --git a/src/main/docker/docker-compose.yml b/src/main/docker/docker-compose.yml
new file mode 100644
index 000000000..f87268681
--- /dev/null
+++ b/src/main/docker/docker-compose.yml
@@ -0,0 +1,62 @@
+
+configs:
+ init_sql:
+ content: |
+ CREATE TABLE IF NOT EXISTS filme (
+ id TEXT PRIMARY KEY, -- eindeutige Film-ID
+ data JSONB NOT NULL, -- JSON-Daten des Films
+ created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
+ last_seen TIMESTAMPTZ NOT NULL DEFAULT now().
+ last_update TIMESTAMPTZ NOT NULL DEFAULT now(),
+ last_url_check TIMESTAMPTZ NOT NULL DEFAULT now()
+ );
+
+services:
+
+ postgresMV:
+ image: postgres:18
+ container_name: crawler-postgres
+ environment:
+ POSTGRES_DB: crawler
+ POSTGRES_USER: crawler
+ POSTGRES_PASSWORD: secret
+ TZ: Europe/Berlin
+ ports:
+ - "55432:5432"
+ volumes:
+ # Persistente Daten
+ - c:/tmp/pgdata:/var/lib/postgresql
+ configs:
+ - source: init_sql
+ target: /docker-entrypoint-initdb.d/init.sql
+ restart: unless-stopped
+
+ mserver-r1:
+ image: mediathekview/mserver:4.0.1-SNAPSHOT
+ environment:
+ MSERVER_OPTS: --config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R1.yaml
+ volumes:
+ - c:/tmp/filmlists:/filmlists
+ - c:/tmp/logs:/logs
+ - c:/tmp/hist:/hist
+ - c:/tmp/config:/config
+
+ mserver-r2:
+ image: mediathekview/mserver:4.0.1-SNAPSHOT
+ environment:
+ MSERVER_OPTS: --config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R2.yaml
+ volumes:
+ - c:/tmp/filmlists:/filmlists
+ - c:/tmp/logs:/logs
+ - c:/tmp/hist:/hist
+ - c:/tmp/config:/config
+
+ mserver-checkUrls:
+ image: mediathekview/mserver:4.0.1-SNAPSHOT
+ environment:
+ MSERVER_OPTS: --config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R2.yaml --flow checkAvailability
+ volumes:
+ - /volume1/docker/Projekt-mv2/filmlists:/filmlists
+ - /volume1/docker/Projekt-mv2/logs:/logs
+ - /volume1/docker/Projekt-mv2/hist:/hist
+ - /volume1/docker/Projekt-mv2/config:/config
\ No newline at end of file
diff --git a/src/main/docker/runDocker b/src/main/docker/runDocker
new file mode 100644
index 000000000..989b93375
--- /dev/null
+++ b/src/main/docker/runDocker
@@ -0,0 +1,19 @@
+# all docker command
+
+# run PG
+docker compose up -d postgresMV
+
+# cron 01 01 long run
+docker compose run -d --rm -e MSERVER_OPTS="--config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R1.yaml" mserver-r1
+
+# cron 06-22 1,31 short run
+docker compose run -d --rm -e MSERVER_OPTS="--config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R2.yaml" mserver-r2
+
+# cron 22 55 url check
+docker compose run -d --rm -e MSERVER_OPTS="--config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R2.yaml --flow checkAvailability" mserver-checkUrls
+
+# on demand - do not run this unless you know what you are doing!
+docker compose run -d --rm -e MSERVER_OPTS="--config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R2.yaml --flow importFilmlistIntoDB" mserver-r3
+
+## docker save -o mserver.tar mediathekview/mserver:4.0.1-SNAPSHOT
+## docker load -i mserver.tar
diff --git a/src/main/java/de/mediathekview/mserver/base/config/ConfigManager.java b/src/main/java/de/mediathekview/mserver/base/config/ConfigManager.java
index 6aa3a6de5..12ebbaef9 100644
--- a/src/main/java/de/mediathekview/mserver/base/config/ConfigManager.java
+++ b/src/main/java/de/mediathekview/mserver/base/config/ConfigManager.java
@@ -11,7 +11,7 @@
/** A manager to load configurations. */
public abstract class ConfigManager {
private T config;
- private static final Logger LOG = LogManager.getLogger(ConfigManager.class);
+ //private static final Logger LOG = LogManager.getLogger(ConfigManager.class);
protected abstract String getConfigFileName();
@@ -58,7 +58,8 @@ public String getResourcePath(String resourceName) {
}
}
} catch(Exception e) {
- LOG.debug(e);
+ //LOG.debug(e);
+ e.printStackTrace();
}
return null;
}
diff --git a/src/main/java/de/mediathekview/mserver/base/config/MServerConfigDTO.java b/src/main/java/de/mediathekview/mserver/base/config/MServerConfigDTO.java
index 921c0f61a..e004a81e3 100644
--- a/src/main/java/de/mediathekview/mserver/base/config/MServerConfigDTO.java
+++ b/src/main/java/de/mediathekview/mserver/base/config/MServerConfigDTO.java
@@ -8,11 +8,13 @@
/** A POJO with the configs for MServer. */
public class MServerConfigDTO extends MServerBasicConfigDTO implements ConfigDTO {
+ private MServerDBConfig databaseConfig;
private final MServerCopySettings copySettings;
private final Boolean writeFilmlistHashFileEnabled;
private final String filmlistHashFilePath;
private final Boolean writeFilmlistIdFileEnabled;
private final String filmlistIdFilePath;
+
/** ignore certain film by title **/
private String ignoreFilmlistPath;
/** add livestreams from external list **/
@@ -48,6 +50,7 @@ public MServerConfigDTO() {
filmlistSavePaths = new EnumMap<>(FilmlistFormats.class);
filmlistDiffSavePaths = new EnumMap<>(FilmlistFormats.class);
copySettings = new MServerCopySettings();
+ databaseConfig = new MServerDBConfig();
logSettings = new MServerLogSettingsDTO();
crawlerURLs = new EnumMap<>(CrawlerUrlType.class);
@@ -242,7 +245,15 @@ public ImportLivestreamConfiguration getImportLivestreamConfiguration() {
public List getImportFilmlistConfigurations() {
return importFilmlistConfigurations;
}
-
+
+ public MServerDBConfig getDatabaseConfig() {
+ return databaseConfig;
+ }
+
+ public void setDatabaseConfig(MServerDBConfig databaseConfig) {
+ this.databaseConfig = databaseConfig;
+ }
+
/**
* Loads the {@link Sender} specific configuration and if it not exist creates one.
*
@@ -286,7 +297,8 @@ public boolean equals(final Object o) {
&& Objects.equals(getFilmlistIdFilePath(), that.getFilmlistIdFilePath())
&& Objects.equals(getIgnoreFilmslistPath(), that.getIgnoreFilmslistPath())
&& Objects.equals(getImportLivestreamConfiguration(), that.getImportLivestreamConfiguration())
- && Objects.equals(getImportFilmlistConfigurations(), that.getImportFilmlistConfigurations());
+ && Objects.equals(getImportFilmlistConfigurations(), that.getImportFilmlistConfigurations())
+ && Objects.equals(getDatabaseConfig(), that.getDatabaseConfig());
}
@Override
@@ -313,7 +325,8 @@ public int hashCode() {
getFilmlistIdFilePath(),
getIgnoreFilmslistPath(),
getImportLivestreamConfiguration(),
- getImportFilmlistConfigurations());
+ getImportFilmlistConfigurations(),
+ getDatabaseConfig());
}
public void initializeSenderConfigurations() {
diff --git a/src/main/java/de/mediathekview/mserver/base/config/MServerDBConfig.java b/src/main/java/de/mediathekview/mserver/base/config/MServerDBConfig.java
new file mode 100644
index 000000000..1e78413d3
--- /dev/null
+++ b/src/main/java/de/mediathekview/mserver/base/config/MServerDBConfig.java
@@ -0,0 +1,108 @@
+package de.mediathekview.mserver.base.config;
+
+import java.util.Objects;
+
+public class MServerDBConfig {
+ private boolean active;
+ private String url;
+ private String username;
+ private String password;
+ private Integer refreshIntervalInDays;
+ private Integer checkUrlIntervalInDays;
+ private Integer batchSize;
+
+ public MServerDBConfig() {
+ active = false;
+ url = "jdbc:postgresql://postgresMV:55432/crawler";
+ username = "crawler";
+ password = "secret";
+ refreshIntervalInDays = 7;
+ checkUrlIntervalInDays = 3;
+ batchSize = 2000;
+ }
+
+ public MServerDBConfig(Boolean active, String url, String username, String password, int refreshIntervalInDays, int checkUrlIntervalInDays, int batchSize ) {
+ this.active = active;
+ this.url = url;
+ this.username = username;
+ this.password = password;
+ this.refreshIntervalInDays = refreshIntervalInDays;
+ this.checkUrlIntervalInDays = checkUrlIntervalInDays;
+ this.batchSize = batchSize;
+ }
+
+
+
+ public Integer getBatchSize() {
+ return batchSize;
+ }
+
+ public void setBatchSize(Integer batchSize) {
+ this.batchSize = batchSize;
+ }
+
+ public Integer getRefreshIntervalInDays() {
+ return refreshIntervalInDays;
+ }
+
+ public void setRefreshIntervalInDays(Integer refreshIntervalInDays) {
+ this.refreshIntervalInDays = refreshIntervalInDays;
+ }
+
+ public Integer getCheckUrlIntervalInDays() {
+ return checkUrlIntervalInDays;
+ }
+
+ public void setCheckUrlIntervalInDays(Integer checkUrlIntervalInDays) {
+ this.checkUrlIntervalInDays = checkUrlIntervalInDays;
+ }
+
+
+ public Boolean getActive() {
+ return active;
+ }
+ public String getUrl() {
+ return url;
+ }
+ public String getUsername() {
+ return username;
+ }
+ public String getPassword() {
+ return password;
+ }
+
+
+ public void setActive(Boolean active) {
+ this.active = active;
+ }
+
+ public void setUrl(String url) {
+ this.url = url;
+ }
+
+ public void setUsername(String username) {
+ this.username = username;
+ }
+
+ public void setPassword(String password) {
+ this.password = password;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ MServerDBConfig that = (MServerDBConfig) o;
+
+ return Objects.equals(active, that.active)
+ && Objects.equals(url, that.url)
+ && Objects.equals(username, that.username)
+ && Objects.equals(password, that.password);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(active, url, username, password);
+ }
+}
diff --git a/src/main/java/de/mediathekview/mserver/base/uploader/copy/FileCopyTask.java b/src/main/java/de/mediathekview/mserver/base/uploader/copy/FileCopyTask.java
index 7dc5927cd..90f4278fd 100644
--- a/src/main/java/de/mediathekview/mserver/base/uploader/copy/FileCopyTask.java
+++ b/src/main/java/de/mediathekview/mserver/base/uploader/copy/FileCopyTask.java
@@ -10,6 +10,8 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
+import java.time.LocalDate;
+import java.time.format.DateTimeFormatter;
public class FileCopyTask extends UploadTask {
private static final Logger LOG = LogManager.getLogger(FileCopyTask.class);
@@ -25,15 +27,38 @@ protected void printMessage(final Message aMessage, final Object... args) {
@Override
protected void upload() {
try {
- if (Files.exists(uploadTarget.getTargetPath())) {
- printMessage(
- ServerMessages.FILE_COPY_TARGET_EXISTS,
- uploadTarget.getTargetPath().toAbsolutePath().toString());
+ Path target = uploadTarget.getTargetPath();
+ if (Files.exists(target)) {
+ Path backup = backupExistingFile(target);
+ LOG.debug("CopyTask found existing file - rename existing file to {} before overwrite", backup.getFileName());
}
- Files.copy(sourcePath, uploadTarget.getTargetPath(), StandardCopyOption.REPLACE_EXISTING);
+ Path tmpTarget = Files.createTempFile( uploadTarget.getTargetPath().getParent(), uploadTarget.getTargetPath().getFileName().toString(), ".tmp");
+ Files.copy(sourcePath, tmpTarget, StandardCopyOption.REPLACE_EXISTING);
+ Files.move(tmpTarget, target, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE);
} catch (final IOException ioException) {
LOG.error("Something went wrong on copying the film list.", ioException);
printMessage(ServerMessages.FILE_COPY_ERROR);
}
}
+
+ private Path backupExistingFile(Path target) throws IOException {
+ String fileName = target.getFileName().toString();
+ Path dir = target.getParent();
+
+ String date = LocalDate.now()
+ .format(DateTimeFormatter.ofPattern("yyyy-MM-dd"));
+
+ Path backup = dir.resolve(fileName + "." + date);
+
+ int counter = 1;
+ while (Files.exists(backup)) {
+ backup = dir.resolve(fileName + "." + date + "." + counter);
+ counter++;
+ }
+
+ Files.move(target, backup);
+
+ return backup;
+ }
+
}
diff --git a/src/main/java/de/mediathekview/mserver/base/utils/CheckUrlAvailability.java b/src/main/java/de/mediathekview/mserver/base/utils/CheckUrlAvailability.java
index 42cedf74b..bc56f0195 100644
--- a/src/main/java/de/mediathekview/mserver/base/utils/CheckUrlAvailability.java
+++ b/src/main/java/de/mediathekview/mserver/base/utils/CheckUrlAvailability.java
@@ -10,7 +10,6 @@
import de.mediathekview.mserver.daten.Film;
import de.mediathekview.mserver.daten.Filmlist;
-import de.mediathekview.mserver.daten.Resolution;
import de.mediathekview.mserver.daten.Sender;
import de.mediathekview.mserver.base.utils.FileSizeDeterminer.ResponseInfo;
@@ -32,8 +31,11 @@ public CheckUrlAvailability(final long minFileSize, final long timeoutInSec, fin
fsd = new FileSizeDeterminer(30L, 30L, numberOfThreads);
}
- public Filmlist getAvaiableFilmlist(final Filmlist importList) {
- LOG.debug("start getAvaiableFilmlist(minSize {} byte, timeout {} sec)", this.minFileSize, (this.timeoutInMS/1000));
+ public Filmlist getAvailableFilmlist(final Filmlist importList) {
+ return getAvailableFilmlist(importList, true);
+ }
+ public Filmlist getAvailableFilmlist(final Filmlist importList, final boolean available) {
+ LOG.debug("start getAvailableFilmlist(minSize {} byte, timeout {} sec)", this.minFileSize, (this.timeoutInMS/1000));
start = System.currentTimeMillis();
Filmlist filteredFilmlist = new Filmlist();
filteredFilmlist.setCreationDate(importList.getCreationDate());
@@ -41,10 +43,11 @@ public Filmlist getAvaiableFilmlist(final Filmlist importList) {
//
ForkJoinPool customThreadPool = new ForkJoinPool(numberOfThreads);
customThreadPool.submit(() -> importList.getFilms().values().parallelStream()
- .filter(this::isAvailable)
+ .filter(film -> isAvailable(film) == available)
.forEach(filteredFilmlist::add))
.join();
customThreadPool.shutdown();
+ customThreadPool.close();
//
LOG.debug("checked {} urls and removed {} in {} sec and timeout was reached: {}", importList.getFilms().size(), removedCounter.get(), ((System.currentTimeMillis()-start)/1000), timeout.get());
return filteredFilmlist;
@@ -55,8 +58,7 @@ private boolean isAvailable(Film pFilm) {
timeout.set(true);
return true;
}
-
- String normalUrl = pFilm.getUrl(Resolution.NORMAL).getUrl().toString();
+ String normalUrl = pFilm.getDefaultUrl().get().getUrl().toString();
ResponseInfo ri = fsd.getRequestInfo(normalUrl);
if (pFilm.getThema().equalsIgnoreCase("Livestream")) {
diff --git a/src/main/java/de/mediathekview/mserver/base/utils/DateUtils.java b/src/main/java/de/mediathekview/mserver/base/utils/DateUtils.java
index 657385585..63a659cf7 100644
--- a/src/main/java/de/mediathekview/mserver/base/utils/DateUtils.java
+++ b/src/main/java/de/mediathekview/mserver/base/utils/DateUtils.java
@@ -1,5 +1,12 @@
package de.mediathekview.mserver.base.utils;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
+import java.util.List;
+
+import de.mediathekview.mserver.base.config.MServerBasicConfigDTO;
+
/** A set of util methods to work with dates. */
public class DateUtils {
private static final String SPLITTED_NUMBERS_REGEX_PATTERN = "$1:$2";
@@ -14,4 +21,23 @@ private DateUtils() {
public static String changeDateTimeForMissingISO8601Support(final String aDateTimeString) {
return aDateTimeString.replaceAll(SPLIT_NUMBERS_REGEX_PATTERN, SPLITTED_NUMBERS_REGEX_PATTERN);
}
+
+ public static List generateDaysToCrawl(MServerBasicConfigDTO config) {
+ return generateDaysToCrawl(
+ config.getMaximumDaysForSendungVerpasstSection(),
+ config.getMaximumDaysForSendungVerpasstSectionFuture(),
+ DateTimeFormatter.ofPattern("yyyy-MM-dd"));
+ }
+
+ public static List generateDaysToCrawl(int numberOfDaysInThePast, int numberOfDaysInTheFuture, DateTimeFormatter formatter) {
+ List days = new ArrayList<>();
+ final LocalDateTime now = LocalDateTime.now();
+ for (int i = 0; i <= numberOfDaysInThePast; i++) {
+ days.add(now.minusDays(i).format(formatter));
+ }
+ for (int i = 1; i < numberOfDaysInTheFuture; i++) {
+ days.add(now.plusDays(i).format(formatter));
+ }
+ return days;
+ }
}
diff --git a/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java
new file mode 100644
index 000000000..3897b0fe9
--- /dev/null
+++ b/src/main/java/de/mediathekview/mserver/base/utils/FilmDBService.java
@@ -0,0 +1,353 @@
+package de.mediathekview.mserver.base.utils;
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+
+import de.mediathekview.mserver.daten.Film;
+import de.mediathekview.mserver.daten.Filmlist;
+import de.mediathekview.mserver.daten.GsonDurationAdapter;
+import de.mediathekview.mserver.daten.GsonLocalDateTimeAdapter;
+import de.mediathekview.mserver.daten.Sender;
+
+import javax.sql.DataSource;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.time.Duration;
+import java.time.LocalDateTime;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+/**
+ * Service zum Speichern einzelner Filme aus einer Filmlist in die DB. Nutzt
+ * Batch-UPSERT und einen vorhandenen ExecutorService für Parallelität.
+ */
+public class FilmDBService {
+ private static final Logger LOG = LogManager.getLogger(FilmDBService.class);
+ private final DataSource dataSource;
+ private final Gson gson;
+ private final ExecutorService executorService;
+ private final int batchSize;
+ private final Integer refreshIntervalInDays;
+
+ public FilmDBService(ExecutorService executorService, int batchSize, int refreshIntervalInDays) {
+ this.dataSource = PostgreSQLDataSourceProvider.get();
+ this.executorService = executorService;
+ this.batchSize = batchSize;
+ this.refreshIntervalInDays = refreshIntervalInDays;
+
+ this.gson = new GsonBuilder().registerTypeAdapter(LocalDateTime.class, new GsonLocalDateTimeAdapter())
+ .registerTypeAdapter(Duration.class, new GsonDurationAdapter()).create();
+ }
+
+
+ public void update(String sql) {
+ try (Connection con = dataSource.getConnection(); PreparedStatement ps = con.prepareStatement(sql)) {
+ LOG.debug("updated {} rows", ps.executeUpdate());
+ } catch (Exception e) {
+ LOG.error(e);
+ }
+ }
+
+ /////////////////////////////////////////////////////////////////////////////////////////
+ /////////////////////////////////////////////////////////////////////////////////////////
+
+ public void updateLastUrlCheck(List checked) {
+ try {
+ AtomicInteger updateCounter = new AtomicInteger(0);
+ List> futures = new ArrayList<>();
+ List allVideos = checked.stream()
+ .sorted(Comparator.comparing(Film::getId))
+ .toList();
+ for (int i = 0; i < allVideos.size(); i += batchSize) {
+ int from = i;
+ int to = Math.min(i + batchSize, allVideos.size());
+ List batch = allVideos.subList(from, to);
+ futures.add(executorService.submit(() -> {
+ String sql = "UPDATE filme SET last_url_check = NOW() WHERE id = ?";
+ try (Connection con = dataSource.getConnection(); PreparedStatement ps = con.prepareStatement(sql)) {
+ for (Film video : batch) {
+ ps.setString(1, video.getId());
+ ps.addBatch();
+ }
+ int [] rs = ps.executeBatch();
+ con.commit();
+ for (int rsCode : rs) {
+ updateCounter.addAndGet(rsCode);
+ }
+ } catch (SQLException e) {
+ LOG.error(e);
+ }
+ }));
+ }
+ futures.forEach( f -> {try { f.get(); } catch(Exception e) { LOG.error("{}",e); }});
+ LOG.debug("updated lastUrlCheck {}", updateCounter.get());
+ } catch (Exception e) {
+ LOG.error(e);
+ }
+ }
+
+ /////////////////////////////////////////////////////////////////////////////////////////
+ /////////////////////////////////////////////////////////////////////////////////////////
+
+ public void deleteFilms(Collection abandonedFilmlist) {
+ try {
+ List> futures = new ArrayList<>();
+ List allVideos = abandonedFilmlist.stream()
+ .sorted(Comparator.comparing(Film::getId))
+ .toList();
+ for (int i = 0; i < allVideos.size(); i += batchSize) {
+ int from = i;
+ int to = Math.min(i + batchSize, allVideos.size());
+ List batch = allVideos.subList(from, to);
+ futures.add(executorService.submit(() -> {
+ String sql = "DELETE FROM filme WHERE id = ?";
+ try (Connection con = dataSource.getConnection(); PreparedStatement ps = con.prepareStatement(sql)) {
+ for (Film video : batch) {
+ ps.setString(1, video.getId());
+ ps.addBatch();
+ }
+ ps.executeBatch();
+ con.commit();
+ } catch (SQLException e) {
+ LOG.error(e);
+ }
+ }));
+ }
+ futures.forEach( f -> {try { f.get(); } catch(Exception e) { LOG.error("{}",e); }});
+ LOG.debug("deleted {}", abandonedFilmlist.size());
+
+ } catch (Exception e) {
+ LOG.error(e);
+ }
+ }
+
+ /////////////////////////////////////////////////////////////////////////////////////////
+ /////////////////////////////////////////////////////////////////////////////////////////
+
+ public Optional readFilmlistFromDB() {
+ return readFilmlistFromDB("", "");
+ }
+
+ public Optional readFilmlistFromDB(String where, String limit) {
+ long start = System.currentTimeMillis();
+ LOG.debug("import filmlist from DB");
+ int readCounter = 0;
+ Filmlist list = new Filmlist();
+ try (Connection con = dataSource.getConnection();
+ PreparedStatement ps = con.prepareStatement("SELECT data FROM filme " + where + " ORDER BY data ->> 'sender', data ->> 'thema', data ->> 'titel' " + limit);
+ ) {
+ ps.setFetchSize(50000);
+ try (ResultSet rs = ps.executeQuery()) {
+ while (rs.next()) {
+ String json = rs.getString("data");
+ list.add(gson.fromJson(json, Film.class));
+ readCounter++;
+ }
+ }
+ LOG.debug("done reading in {} sec for {} elements resulting in {} elements", ((System.currentTimeMillis()-start)/1000), readCounter, list.getFilms().size());
+ return Optional.of(list);
+ } catch (Exception e) {
+ LOG.error(e);
+ }
+ return Optional.empty();
+ }
+
+ /////////////////////////////////////////////////////////////////////////////////////////
+ /////////////////////////////////////////////////////////////////////////////////////////
+
+ public List filterNewVideos(Sender sender, List videos, Function idExtractor) {
+ if(!PostgreSQLDataSourceProvider.isEnabled()) {
+ return videos;
+ }
+ try {
+ List>> futures = new ArrayList<>();
+ // sort to avoid deadlocks
+ List allVideos = videos.stream()
+ .sorted(Comparator.comparing(idExtractor))
+ .toList();
+ for (int i = 0; i < allVideos.size(); i += batchSize) {
+ int from = i;
+ int to = Math.min(i + batchSize, allVideos.size());
+ List batch = allVideos.subList(from, to);
+ futures.add(executorService.submit(() -> {
+ List newVideos = new ArrayList<>();
+ StringBuffer sql = new StringBuffer();
+ sql.append("UPDATE filme SET last_seen = now() ")
+ .append("WHERE id = ? ")
+ .append("AND NOT( created_at::date = last_update::date and last_update::date <> CURRENT_DATE ) ")
+ .append("AND NOT( last_seen - last_update >= interval '").append(refreshIntervalInDays).append("' DAY)");
+ try (Connection con = dataSource.getConnection(); PreparedStatement ps = con.prepareStatement(sql.toString())) {
+ for (T video : batch) {
+ String id = idExtractor.apply(video);
+ if (id != null) {
+ ps.setString(1, id);
+ ps.addBatch();
+ } else {
+ LOG.error("filterNewVideos - Missing ID for Film {}", video);
+ }
+ }
+ int[] rs = ps.executeBatch();
+ con.commit();
+ for (int rsIndex = 0; rsIndex < rs.length; rsIndex++) {
+ if (rs[rsIndex] == 0) {
+ newVideos.add(batch.get(rsIndex));
+ }
+ }
+
+ } catch (SQLException e) {
+ LOG.error(e);
+ }
+ return newVideos;
+ }));
+ }
+ List result = new ArrayList<>();
+ for (Future> f : futures) {
+ result.addAll(f.get());
+ }
+ LOG.debug("Filtered {} in {} (in {} vs out {})",(videos.size()-result.size()), sender.getName(), videos.size(), result.size());
+ // CARP films pro Nacht. Die filme werden dann am nächsten tag gefunden. TODO: für die nächste runde.
+ if(result.size() > 100_000) {
+ LOG.debug("CARP - reduced number of films to 100000 in one job");
+ result = new ArrayList<>(result.subList(0, 100000));
+ }
+ return result;
+ } catch (Exception e) {
+ LOG.error("{}", e);
+ return videos;
+ }
+ }
+
+
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ public HashSet getAllVideoUrls() {
+ HashSet allVideoUrls = new HashSet();
+ String sql = """
+ SELECT
+ data ->> 'sender' sender,
+ data -> 'urls' -> 'SMALL' ->> 'url' aSmall,
+ data -> 'urls' -> 'NORMAL' ->> 'url' aNormal,
+ data -> 'urls' -> 'HD' ->> 'url' aHD
+ FROM filme
+ """;
+ try (Connection con = dataSource.getConnection(); PreparedStatement ps = con.prepareStatement(sql)) {
+ try (ResultSet rs = ps.executeQuery()) {
+ while (rs.next()) {
+ allVideoUrls.add(rs.getString(1)+rs.getString(2));
+ allVideoUrls.add(rs.getString(1)+rs.getString(3));
+ allVideoUrls.add(rs.getString(1)+rs.getString(4));
+ }
+ }
+ } catch (SQLException e) {
+ LOG.error("getAllVideoUrls failed", e);
+ }
+ return allVideoUrls;
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ /**
+ * Speichert alle Filme einer Filmlist parallel in der DB.
+ */
+ public void saveAll(Filmlist filmlist) throws Exception {
+ if(!PostgreSQLDataSourceProvider.isEnabled()) {
+ return;
+ }
+ // Map in List konvertieren
+ List films = new ArrayList<>(filmlist.getFilms().values());
+ films = makeUniqueIds(films);
+ AtomicInteger successCounter = new AtomicInteger(0);
+ List> futures = new ArrayList<>();
+
+ for (int i = 0; i < films.size(); i += batchSize) {
+ int from = i;
+ int to = Math.min(i + batchSize, films.size());
+ List batch = films.subList(from, to);
+
+ futures.add(executorService.submit(() -> {
+ try {
+ successCounter.addAndGet(saveBatch(batch));
+ } catch (SQLException | IOException e) {
+ LOG.error(e);
+ }
+ }));
+ }
+
+ for (Future> f : futures) {
+ f.get();
+ }
+
+ LOG.info("Stored {} films in DB", successCounter.get());
+ }
+
+ /**
+ * Speichert einen Batch von Filmen als Upsert in der DB.
+ */
+ private int saveBatch(List films) throws SQLException, IOException {
+ int successCounter = 0;
+
+ String sql = """
+ INSERT INTO filme (id, data)
+ VALUES (?, ?::jsonb)
+ ON CONFLICT (id) DO UPDATE
+ SET data = EXCLUDED.data,
+ last_update = now()
+ """;
+
+ try (Connection con = dataSource.getConnection(); PreparedStatement ps = con.prepareStatement(sql)) {
+
+ for (Film film : films) {
+ if(film.getId() != null) {
+ ps.setString(1, film.getId());
+ ps.setString(2, gson.toJson(film));
+ ps.addBatch();
+ successCounter++;
+ } else {
+ LOG.error("saveBatch - Missing ID for film {}", film);
+ }
+ }
+ ps.executeBatch();
+ con.commit();
+ }
+ return successCounter;
+ }
+
+ private static List makeUniqueIds(List films) {
+ Map idCount = new HashMap<>();
+
+ return films.stream().map(film -> {
+ String originalId = film.getId();
+ AtomicInteger count = idCount.computeIfAbsent(originalId, k -> new AtomicInteger(0));
+
+ int c = count.getAndIncrement();
+ if (c == 0) {
+ return film; // erste ID bleibt unverändert
+ } else {
+ // Duplikat → neue ID mit Suffix #1, #2 ...
+ film.setId(originalId + "#" + c);
+ return film;
+ }
+ }).collect(Collectors.toList());
+ }
+}
diff --git a/src/main/java/de/mediathekview/mserver/base/utils/PostgreSQLDataSourceProvider.java b/src/main/java/de/mediathekview/mserver/base/utils/PostgreSQLDataSourceProvider.java
new file mode 100644
index 000000000..d1daf8494
--- /dev/null
+++ b/src/main/java/de/mediathekview/mserver/base/utils/PostgreSQLDataSourceProvider.java
@@ -0,0 +1,61 @@
+package de.mediathekview.mserver.base.utils;
+
+import com.zaxxer.hikari.HikariConfig;
+import com.zaxxer.hikari.HikariDataSource;
+
+import de.mediathekview.mserver.base.config.MServerConfigManager;
+
+import javax.sql.DataSource;
+
+public final class PostgreSQLDataSourceProvider {
+ private static HikariDataSource DATA_SOURCE;
+ private static Boolean enabled = false;
+ private MServerConfigManager aMServerConfigManager;
+
+ public PostgreSQLDataSourceProvider(MServerConfigManager aMServerConfigManager) {
+ this.aMServerConfigManager = aMServerConfigManager;
+ init();
+ }
+
+ public static boolean isEnabled() {
+ return enabled;
+ }
+
+ public static DataSource get() {
+ return DATA_SOURCE;
+ }
+
+ public static void shutdown() {
+ DATA_SOURCE.close();
+ }
+
+ private void init() {
+ HikariConfig cfg = new HikariConfig();
+ enabled = aMServerConfigManager.getConfig().getDatabaseConfig().getActive();
+ if(!enabled) {
+ return;
+ }
+ cfg.setJdbcUrl(aMServerConfigManager.getConfig().getDatabaseConfig().getUrl());
+ cfg.setUsername(aMServerConfigManager.getConfig().getDatabaseConfig().getUsername());
+ cfg.setPassword(aMServerConfigManager.getConfig().getDatabaseConfig().getPassword());
+
+ // === Pool Sizing ===
+ cfg.setMaximumPoolSize(50);
+ cfg.setMinimumIdle(4);
+
+ // === Performance ===
+ cfg.setAutoCommit(false);
+ cfg.setConnectionTimeout(3000);
+ cfg.setIdleTimeout(600_000);
+ cfg.setMaxLifetime(1_800_000);
+
+ // === PostgreSQL Optimierungen ===
+ cfg.addDataSourceProperty("stringtype", "unspecified");
+ cfg.addDataSourceProperty("defaultRowFetchSize", "10000");
+
+ // === Debug (optional) ===
+ cfg.setPoolName("CrawlerPool");
+
+ DATA_SOURCE = new HikariDataSource(cfg);
+ }
+}
diff --git a/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java b/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java
index 3f3d5f87f..194d469ad 100644
--- a/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java
+++ b/src/main/java/de/mediathekview/mserver/crawler/CrawlerManager.java
@@ -15,6 +15,7 @@
import de.mediathekview.mserver.base.uploader.copy.FileCopyTarget;
import de.mediathekview.mserver.base.uploader.copy.FileCopyTask;
import de.mediathekview.mserver.base.utils.CheckUrlAvailability;
+import de.mediathekview.mserver.base.utils.FilmDBService;
import de.mediathekview.mserver.crawler.ard.ArdCrawler;
import de.mediathekview.mserver.crawler.arte.ArteCrawler;
import de.mediathekview.mserver.crawler.arte.ArteCrawler_EN;
@@ -92,6 +93,15 @@ public CrawlerManager(MServerConfigManager aMServerConfigManager) {
public MServerConfigManager getConfigManager() {
return rootConfig;
}
+
+ public void storeFilmsToDB() {
+ FilmDBService filmDBService = new FilmDBService(executorService, getConfigManager().getConfig().getDatabaseConfig().getBatchSize(), getConfigManager().getConfig().getDatabaseConfig().getRefreshIntervalInDays());
+ try {
+ filmDBService.saveAll(filmlist);
+ } catch (Exception e) {
+ LOG.error(e);
+ }
+ }
public void copyFilmlist() {
final MServerCopySettings copySettings = config.getCopySettings();
@@ -188,7 +198,9 @@ public void importLivestreamFilmlist(final FilmlistFormats aFormat, final String
public void importFilmlist(final ImportFilmlistConfiguration importFilmlistConfiguration) {
try {
Optional importedFilmlist;
- if (importFilmlistConfiguration.getPath().startsWith(HTTP)) {
+ if (importFilmlistConfiguration.getPath().startsWith("jdbc")) {
+ importedFilmlist = importFilmlistFromDB();
+ } else if (importFilmlistConfiguration.getPath().startsWith(HTTP)) {
importedFilmlist = importFilmListFromURl(importFilmlistConfiguration.getFormat(), importFilmlistConfiguration.getPath());
} else {
importedFilmlist = importFilmlistFromFile(importFilmlistConfiguration.getFormat(), importFilmlistConfiguration.getPath());
@@ -200,13 +212,14 @@ public void importFilmlist(final ImportFilmlistConfiguration importFilmlistConfi
config.getCheckImportListUrlMinSize(),
config.getCheckImportListUrlTimeoutInSec(),
config.getMaximumCpuThreads())
- .getAvaiableFilmlist(importedFilmlist.get())
+ .getAvailableFilmlist(importedFilmlist.get())
);
}
//
final Filmlist difflist = new Filmlist(UUID.randomUUID(), LocalDateTime.now());
- importedFilmlist.ifPresent(value -> Film.addAllToFilmlist(Film.mergeTwoFilmlists(filmlist,value),difflist));
+ importedFilmlist.ifPresent(value -> Film.addAllToFilmlist(Film.mergeTwoFilmlists(filmlist,value), difflist));
if (importFilmlistConfiguration.isCreateDiff()) {
+ difflist.getFilms().entrySet().removeIf(entry -> entry.getValue().getThema().equals("Livestream") && entry.getValue().getTitel().endsWith("Livestream") && entry.getValue().getTime().getHour() == 0);
Film.addAllToFilmlist(difflist, differenceList);
}
} catch (final IOException ioException) {
@@ -445,6 +458,12 @@ private Set getCrawlerToRun() {
return crawlerToRun;
}
+ private Optional importFilmlistFromDB() throws IOException {
+ FilmDBService filmDBService = new FilmDBService(getExecutorService(), getConfigManager().getConfig().getDatabaseConfig().getBatchSize(), getConfigManager().getConfig().getDatabaseConfig().getRefreshIntervalInDays());
+ Optional dbFilmlist = filmDBService.readFilmlistFromDB();
+ return dbFilmlist;
+ }
+
private Optional importFilmlistFromFile(
final FilmlistFormats aFormat, final String aFilmlistLocation) throws IOException {
final Path filmlistPath = Paths.get(aFilmlistLocation);
diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdConstants.java b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdConstants.java
index 1a85ac1d0..5d1d78b3f 100644
--- a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdConstants.java
+++ b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdConstants.java
@@ -20,7 +20,7 @@ public class ArdConstants {
public static final String DAY_PAGE_URL = "https://programm-api.ard.de/program/api/program?day=%s&channelIds=%s&mode=channel";
public static final int TOPICS_COMPILATION_PAGE_SIZE = 200;
- public static final int TOPIC_PAGE_SIZE = 50;
+ public static final int TOPIC_PAGE_SIZE = 200;
public static final String DEFAULT_CLIENT = "ard";
diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java
index fa4a2545f..fd43fd2c9 100644
--- a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java
+++ b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdCrawler.java
@@ -3,6 +3,7 @@
import de.mediathekview.mserver.daten.Film;
import de.mediathekview.mserver.daten.Sender;
import de.mediathekview.mserver.base.messages.listener.MessageListener;
+import de.mediathekview.mserver.base.utils.DateUtils;
import de.mediathekview.mserver.base.config.MServerConfigManager;
import de.mediathekview.mserver.base.messages.ServerMessages;
import de.mediathekview.mserver.crawler.ard.tasks.*;
@@ -12,10 +13,10 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
-import java.time.LocalDateTime;
-import java.time.format.DateTimeFormatter;
+import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
+import java.util.List;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.*;
@@ -23,8 +24,6 @@
public class ArdCrawler extends AbstractCrawler {
private static final Logger LOG = LogManager.getLogger(ArdCrawler.class);
- private static final DateTimeFormatter DAY_PAGE_DATE_FORMATTER =
- DateTimeFormatter.ofPattern("yyyy-MM-dd");
public ArdCrawler(
final ForkJoinPool aForkJoinPool,
@@ -41,23 +40,18 @@ public Sender getSender() {
private Queue createDayUrlsToCrawl() {
final Queue dayUrlsToCrawl = new ConcurrentLinkedQueue<>();
-
- final LocalDateTime now = LocalDateTime.now();
- for (int i = 0; i <= crawlerConfig.getMaximumDaysForSendungVerpasstSection(); i++) {
- final String day = now.minusDays(i).format(DAY_PAGE_DATE_FORMATTER);
-
+ final List days = DateUtils.generateDaysToCrawl(crawlerConfig);
+ days.forEach( dateString -> {
for (final String client : ArdConstants.CLIENTS_DAY) {
- final String url =
- String.format(ArdConstants.DAY_PAGE_URL, day, client);
+ final String url = String.format(ArdConstants.DAY_PAGE_URL, dateString, client);
dayUrlsToCrawl.offer(new CrawlerUrlDTO(url));
}
- }
+ });
return dayUrlsToCrawl;
}
@Override
protected RecursiveTask> createCrawlerTask() {
-
try {
final ForkJoinTask> dayTask =
forkJoinPool.submit(new ArdDayPageTask(this, createDayUrlsToCrawl()));
@@ -74,18 +68,26 @@ protected RecursiveTask> createCrawlerTask() {
senderTopicUrls.addAll(senderTopicTask.get());
}
LOG.debug("sender topic tasks: {}", senderTopicUrls.size());
+ final ArdTopicGroupsTask groupsToAsset = new ArdTopicGroupsTask(this, new ConcurrentLinkedQueue<>(senderTopicUrls));
+ final Set assitUrls = new HashSet<>();
+ assitUrls.addAll(forkJoinPool.submit(groupsToAsset).get());
+ LOG.debug("sender group assit tasks: {}", assitUrls.size());
+
final ArdTopicPageTask topicTask =
- new ArdTopicPageTask(this, new ConcurrentLinkedQueue<>(senderTopicUrls));
+ new ArdTopicPageTask(this, new ConcurrentLinkedQueue<>(assitUrls));
+
final int showsCountBefore = shows.size();
shows.addAll(forkJoinPool.submit(topicTask).get());
LOG.debug(
"ARD crawler found {} topics for all sub-sender.", shows.size() - showsCountBefore);
}
-
+ //
+ final Queue showsFiltered = this.filterExistingFilms(shows, ArdFilmInfoDto::getId);
+ //
printMessage(
- ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), shows.size());
- getAndSetMaxCount(shows.size());
- return new ArdFilmDetailTask(this, new ConcurrentLinkedQueue<>(shows));
+ ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), showsFiltered.size());
+ getAndSetMaxCount(showsFiltered.size());
+ return new ArdFilmDetailTask(this, new ConcurrentLinkedQueue<>(showsFiltered));
} catch (final InterruptedException ex) {
LOG.fatal("Exception in ARD crawler.", ex);
Thread.currentThread().interrupt();
@@ -113,14 +115,14 @@ private Set>> createSenderTopicTasks() {
}
private ForkJoinTask> getTopicEntriesBySender(final String sender) throws ExecutionException, InterruptedException {
- Set senderTopics = forkJoinPool.submit(
- new ArdTopicsTask(this, sender, createTopicsOverviewUrl(sender))).get();
+ Set senderSingleLetterUrls = forkJoinPool.submit(
+ new ArdTopicsTask(this, sender, CreateLetterUrlQuery(sender))).get();
- LOG.debug("topics task result {}", senderTopics.size());
- return forkJoinPool.submit(new ArdTopicsLetterTask(this, sender, new ConcurrentLinkedQueue<>(senderTopics)));
+ //LOG.debug("topics task result {}", senderSingleLetterUrls.size());
+ return forkJoinPool.submit(new ArdTopicsLetterTask(this, sender, new ConcurrentLinkedQueue<>(senderSingleLetterUrls)));
}
- private Queue createTopicsOverviewUrl(final String client) {
+ private Queue CreateLetterUrlQuery(final String client) {
final Queue urls = new ConcurrentLinkedQueue<>();
final String url = String.format(ArdConstants.TOPICS_URL, client);
@@ -128,4 +130,4 @@ private Queue createTopicsOverviewUrl(final String client) {
return urls;
}
-}
+}
\ No newline at end of file
diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/UrlOptimizer.java b/src/main/java/de/mediathekview/mserver/crawler/ard/UrlOptimizer.java
new file mode 100644
index 000000000..d9651d852
--- /dev/null
+++ b/src/main/java/de/mediathekview/mserver/crawler/ard/UrlOptimizer.java
@@ -0,0 +1,239 @@
+package de.mediathekview.mserver.crawler.ard;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.TreeMap;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+import org.apache.logging.log4j.LogManager;
+import de.mediathekview.mserver.crawler.basic.AbstractCrawler;
+import de.mediathekview.mserver.daten.Resolution;
+
+public class UrlOptimizer {
+ private static final org.apache.logging.log4j.Logger LOG = LogManager.getLogger(UrlOptimizer.class);
+ protected AbstractCrawler crawler;
+
+ public UrlOptimizer(AbstractCrawler aCrawler) {
+ crawler = aCrawler;
+ }
+
+ static AtomicInteger good = new AtomicInteger(0);
+ static AtomicInteger bad = new AtomicInteger(0);
+ public void debug2(String adaptive, Map allUrls) {
+ Map proposal = buildFilmUrlFromAdaptive(adaptive, allUrls.entrySet().stream().findFirst().get().getValue());
+
+ if(proposal.size() != allUrls.size() && !adaptive.contains("arte.")) {
+ System.out.println("asdf");
+ Map x = buildFromUrl(adaptive, allUrls.entrySet().stream().findFirst().get().getValue());
+ StringBuffer sb = new StringBuffer();
+ sb.append("#").append(adaptive).append("#").append(printMap(proposal)).append("#vs#").append(printMap(allUrls));
+ LOG.debug(sb.toString());
+ }
+
+ boolean isEqual = proposal.equals(allUrls);
+ if (!isEqual && proposal.size() < 3 && allUrls.size() < 3) {
+ StringBuffer sb = new StringBuffer();
+ sb.append(isEqual).append("#").append(good).append(":").append(bad).append("#").append(adaptive).append("#");
+ proposal.forEach((r,url) -> {
+ sb.append(r).append("|").append(url);
+ });
+ sb.append("#vs#");
+ allUrls.forEach((r,url) -> {
+ sb.append(r).append("|").append(url);
+ });
+ LOG.info(sb.toString());
+ bad.incrementAndGet();
+ } else {
+ good.incrementAndGet();
+ }
+ }
+
+ public static String printMap(Map urls) {
+ StringBuffer sb = new StringBuffer();
+ urls.forEach((r,url) -> {
+ sb.append(r).append("|").append(url);
+ });
+ return sb.toString();
+ }
+
+ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ record AdaptiveUrlStructure(String prefix, Map qualities, String suffix) {
+ }
+
+ private static AdaptiveUrlStructure parseAdaptiveUrlStructure(String adaptive) {
+ if (adaptive == null || !adaptive.contains(",")) {
+ return null;
+ }
+ // kein split by "/" weil rb diese verwendet
+ Pattern p = Pattern.compile("/(?=[^/]*?,)");
+ Matcher m = p.matcher(adaptive);
+ String metaSegment = "";
+ if (m.find()) {
+ int start = m.start() + 1;
+ int end = adaptive.lastIndexOf('/');
+ metaSegment = adaptive.substring(start, end);
+ }
+ //
+ List partsAndMeta = new ArrayList(Arrays.asList(metaSegment.split(",")));
+ String prefix = partsAndMeta.getFirst();
+ //
+ partsAndMeta.removeFirst();
+ String suffix = partsAndMeta.getLast();
+ suffix = suffix.replace(".csmil", "");
+ partsAndMeta.removeLast();
+ //
+ Map qualities = new HashMap();
+ for (int i = 0; i < partsAndMeta.size(); i++) {
+ qualities.put(i, partsAndMeta.get(i));
+ }
+ Map sortedByLength = qualities.entrySet().stream()
+ .sorted(Comparator.comparingInt(e -> e.getValue().length())).collect(Collectors.toMap(Map.Entry::getKey,
+ Map.Entry::getValue, (v1, v2) -> v1, LinkedHashMap::new));
+ //
+ if (qualities.values().stream().findAny().get().endsWith(suffix)) {
+ suffix = "";
+ }
+ //
+ return new AdaptiveUrlStructure(prefix, sortedByLength, suffix);
+ }
+
+ // ---------------------------------------------------------------------------------------------------------------------
+
+ public List extractResolutionHVFromAdaptive(String adaptive) {
+ List resolutions = new ArrayList<>();
+ String m3uContent;
+ try {
+ m3uContent = crawler.requestBodyAsString(adaptive);
+ } catch (IOException e) {
+ LOG.error("{}", e);
+ return resolutions;
+ }
+ String[] lines = m3uContent.split("\n");
+ for (String line : lines) {
+ line = line.trim();
+ if (line.startsWith("#EXT-X-STREAM-INF:")) {
+ // Extract the RESOLUTION part
+ String[] parts = line.substring("#EXT-X-STREAM-INF:".length()).split(",");
+ for (String part : parts) {
+ if (part.startsWith("RESOLUTION=")) {
+ String resolutionStr = part.substring("RESOLUTION=".length());
+ String[] dims = resolutionStr.split("x");
+ if (dims.length == 2) {
+ try {
+ int horizontal = Integer.parseInt(dims[0]);
+ int vertical = Integer.parseInt(dims[1]);
+ resolutions.add(new int[] { horizontal, vertical });
+ } catch (NumberFormatException e) {
+ resolutions.add(new int[] { 0, 0 });
+ }
+ }
+ break;
+ }
+ }
+ }
+ }
+ return resolutions;
+ }
+
+ // ----------------------------------------------------------------------------------------------------------------------------------------------------
+
+ public Map buildFilmUrlFromAdaptive(String adaptive, String aUrl) {
+ Map result = new HashMap<>();
+ Map rawStringUrlMap = buildFromUrl(adaptive, aUrl);
+ rawStringUrlMap.forEach( (resolutionVertical, url) -> {
+ try {
+ if (crawler.requestUrlExists(url)) {
+ Resolution r = ArdConstants.getResolutionFromWidth(resolutionVertical);
+ result.computeIfAbsent(r, k -> url);
+ } /*else {
+ LOG.debug("broken url {} from {}", url, adaptive);
+ }*/
+ } catch(Exception e) {
+ LOG.error("adaptive: {} url: {} error: {}", adaptive, aUrl, e);
+ }
+ });
+ return result;
+ }
+
+ public Map buildFromUrl(String adaptive, String aUrl) {
+ if (adaptive.startsWith("https://dra-dd.akamaized.net")) {
+ return buildFromUrlForDRA(adaptive, aUrl);
+ } else {
+ Map positionToUrl = buildUrlsFromPlaylist(adaptive, aUrl);
+ return addResolutionToUrls(adaptive, positionToUrl);
+ }
+ }
+
+ public Map buildUrlsFromPlaylist(String adaptive, String aUrl) {
+ if (adaptive.startsWith("https://dra-dd.akamaized.net")) {
+ return buildFromUrlForDRA(adaptive, aUrl);
+ } else {
+ return buildFromUrlForArdMediathek(adaptive, aUrl);
+ }
+ }
+
+ private Map addResolutionToUrls(String adaptive, Map positionToUrl) {
+ Map result = new TreeMap<>(Comparator.reverseOrder());
+ if(adaptive == null || adaptive.isBlank() || positionToUrl.size() == 0) {
+ return result;
+ }
+ List hv = extractResolutionHVFromAdaptive(adaptive);
+ for (int index = 0; index < hv.size(); index++) {
+ if (positionToUrl.containsKey(index)) {
+ result.put(hv.get(index)[0], positionToUrl.get(index));
+ }
+ }
+ //
+ return result;
+ }
+
+ private Map buildFromUrlForArdMediathek(String adaptive, String aUrl) {
+ Map positionToUrl= new HashMap<>();
+ if (adaptive == null || aUrl == null || adaptive.isBlank() || aUrl.isBlank() || !adaptive.contains(",")) {
+ return positionToUrl;
+ }
+ AdaptiveUrlStructure x = parseAdaptiveUrlStructure(adaptive);
+ // find the quality of sample url to determine base url
+ String matchingPart = "";
+ for (String qualityPart : x.qualities.values()) {
+ if (aUrl.contains(x.prefix + qualityPart + x.suffix)) {
+ matchingPart = x.prefix + qualityPart + x.suffix;
+ }
+ }
+ // url to position
+ String baseUrl = aUrl.substring(0, aUrl.length() - matchingPart.length());
+ for (Entry qualityPart : x.qualities.entrySet()) {
+ String newUrl = baseUrl + x.prefix + qualityPart.getValue() + x.suffix;
+ positionToUrl.put(qualityPart.getKey(), newUrl);
+ }
+ //
+ return positionToUrl;
+ }
+
+ private static Map buildFromUrlForDRA(String adaptive, String aUrl) {
+ String newUrl = adaptive.replace("/HLS/", "/mp4/");
+ Map result = new TreeMap<>(Comparator.reverseOrder());
+ result.put(360, newUrl.replace("_master.m3u8", "_vod.360.MP4"));
+ result.put(540, newUrl.replace("_master.m3u8", "_vod.540.MP4"));
+ result.put(720, newUrl.replace("_master.m3u8", "_vod.720.MP4"));
+ result.put(1080, newUrl.replace("_master.m3u8", "_vod.1080.MP4"));
+ return result;
+ }
+
+
+
+
+
+
+
+}
diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java
index eb182a208..aabf7c52a 100644
--- a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java
+++ b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdFilmDeserializer.java
@@ -11,6 +11,7 @@
import de.mediathekview.mserver.crawler.ard.ArdConstants;
import de.mediathekview.mserver.crawler.ard.ArdFilmDto;
import de.mediathekview.mserver.crawler.ard.ArdFilmInfoDto;
+import de.mediathekview.mserver.crawler.ard.UrlOptimizer;
import de.mediathekview.mserver.crawler.basic.AbstractCrawler;
import org.apache.logging.log4j.LogManager;
@@ -27,6 +28,7 @@
import java.time.ZonedDateTime;
import java.time.format.DateTimeParseException;
import java.util.*;
+import java.util.concurrent.atomic.AtomicInteger;
public class ArdFilmDeserializer implements JsonDeserializer> {
@@ -73,10 +75,12 @@ public class ArdFilmDeserializer implements JsonDeserializer> {
private final ArdVideoInfoJsonDeserializer videoDeserializer;
private final AbstractCrawler crawler;
-
+ private final UrlOptimizer urlOptimizer;
+
public ArdFilmDeserializer(final AbstractCrawler crawler) {
videoDeserializer = new ArdVideoInfoJsonDeserializer(crawler);
this.crawler = crawler;
+ this.urlOptimizer = new UrlOptimizer(crawler);
}
private static Optional getMediaCollectionObject(final JsonObject itemObject) {
@@ -168,6 +172,7 @@ public List deserialize(
final JsonObject itemObject = widgets.get(0).getAsJsonObject();
final Optional topic = parseTopic(itemObject);
+ Optional id = JsonUtils.getAttributeAsString(itemObject, "id");
Optional titleOriginal = JsonUtils.getAttributeAsString(itemObject, ATTRIBUTE_TITLE);
final Optional title = parseTitle(itemObject);
final Optional description = JsonUtils.getAttributeAsString(itemObject, ATTRIBUTE_SYNOPSIS);
@@ -197,6 +202,7 @@ public List deserialize(
final ArdFilmDto filmDto =
new ArdFilmDto(
createFilm(
+ id.get(),
sender,
topic.get(),
title.get(),
@@ -204,9 +210,6 @@ public List deserialize(
date.orElse(null),
duration.orElse(null),
videoInfo.get()));
- if (widgets.size() > 1) {
- parseRelatedFilms(filmDto, widgets.get(1).getAsJsonObject());
- }
films.add(filmDto);
}
// OV - long term this should go into Film as "OV"
@@ -217,6 +220,7 @@ public List deserialize(
final ArdFilmDto filmDtoOV =
new ArdFilmDto(
createFilm(
+ id.get(),
sender,
topic.get(),
title.get() + " (Originalversion)",
@@ -239,6 +243,13 @@ private Optional