-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathCode
More file actions
55 lines (46 loc) · 1.52 KB
/
Code
File metadata and controls
55 lines (46 loc) · 1.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from datetime import datetime, timedelta
lists = ['RPDA_RFPD.FACILITY','CDL_AMBIT.TMP_GEN_FAC_PROD_LIMITS',
'CDL_AMBIT.TMP_GEN_FACILITIES_LIMITS','CDL_AMBIT.TMP_GEN_FAC_TEST'] # <-- put your 40 tables here
base_template = """{{
"name": "sch_Package_solution_test_ml_job_{i}",
"timeout": "2880",
"type": "SCHEDULED",
"schedule": "cron({minute} {hour} {day} {month} ? {year})",
"parameters": "--job_config_file_path cdl_job_config_auto_suggestion_ML.csv --data_config_file_path {table_name}"
}}"""
# Starting cron time
start_time = datetime(2025, 9, 12, 10, 12, 0) # 12th Sept 2025 10:12
records = []
for i, table in enumerate(lists, start=1):
cron_time = start_time + timedelta(minutes=6 * (i - 1))
record = base_template.format(
i=i,
minute=cron_time.minute,
hour=cron_time.hour,
day=cron_time.day,
month=cron_time.month,
year=cron_time.year,
table_name=table
)
records.append(record)
# Join all with commas
final_output = ",\n".join(records)
print(final_output)
final_output = "{\n \"jobs\": [\n" + ",\n".join(records) + "\n ]\n}"
# First filter
filtered = df[
(df["schedule_name"].str.contains(job_schedule_type)) &
(df["file_name"] == table_name)
]
# Get top 10 latest run_dates
latest_run_dates = (
filtered["run_date"]
.drop_duplicates()
.sort_values(ascending=False)
.head(10)
)
# Apply again
result_df = (
filtered[filtered["run_date"].isin(latest_run_dates)]
.sort_values("run_date", ascending=False)
)