codility-python/17_min_abs_sum.py at main · jsueling/codility-python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""https://app.codility.com/programmers/lessons/17-dynamic_programming/min_abs_sum/"""

def solution_a(a: list[int]) -> int:
    """My attempt that passes 100% of correctness tests and 40% of performance tests"""

    if not a:
        return 0

    a = [abs(x) for x in a]

    # Build possible sums iteratively, storing only absolute sums at each step
    # and returning the minimum at the end.

    n = len(a)
    prev = set([a[0]])
    for i in range(1, n):
        cur = set()
        for val in prev:
            cur.add(abs(a[i] + val))
            cur.add(abs(a[i] - val))
        prev = cur
    return min(prev)

# Time complexity:
# - N is the length of array A
# - M is the maximum absolute value in A
# - The size of prev is upper bounded by O(N * M) since it theoretically could
#   hold every sum from 0 to N * M
# - Each iteration over N elements involves iterating over up to O(N * M) sums
# - Thus overall time complexity is O(N * (N * M)) = O(N^2 * M)

def solution_b(a: list[int]) -> int:
    """The optimal solution generated by gemini-2.5-pro (my explanations added)"""

    n = len(a)

    if n == 0:
        return 0

    # Goal: We are trying to split the array into two subsets with minimum
    # absolute sum difference.
    # Let P be the absolute sum of the positive subset,
    # N be the absolute sum of the negative subset,
    # then absolute total_sum = P + N which is also simply the sum of
    # absolute values of all elements.

    # We want to choose P and N where |P - N| is minimal, and return it.
    # Since N = total_sum - P, we want to minimise |P - (total_sum - P)| = |2*P - total_sum|.

    # To solve the problem, since total_sum is known,
    # we can equivalently find all possible subset sums P,
    # returning the minimal value of |2*P - total_sum|.

    counts = [0] * 101
    total_sum = 0
    max_val = float("-inf")

    for i in range(n):
        val = abs(a[i])
        counts[val] += 1
        total_sum += val
        max_val = max(max_val, val)

    # dp[j] = Is sum j reachable with the numbers (k values) seen so far?
    # -1 means unreachable, the default sum 0 is reachable with 0 items
    dp = [-1] * (total_sum + 1)
    dp[0] = 0

    # Iterate over counts of each possible absolute value in A, k
    for k in range(1, max_val + 1):

        count = counts[k]

        if count == 0:
            continue

        for j in range(total_sum + 1):

            # j was reachable with numbers < k, so it is reachable with 0 k's
            if dp[j] >= 0:
                dp[j] = 0

            # If j is not reachable, check if we can compose j by adding a single k,
            # checking that we do not exceed the count of k's available.
            elif j >= k and dp[j - k] >= 0 and dp[j - k] < count:
                # We can add a k to reach j
                dp[j] = dp[j - k] + 1

    # dp >= 0 now contains all reachable subset sums (P),
    # using any combination of absolute values

    min_diff = float("inf")
    # Iterate over all reachable subset sums j (P),
    # only checking up to half, as diff is symmetrical around total_sum / 2
    for j in range(total_sum // 2 + 1):
        if dp[j] >= 0:
            # j is a reachable subset sum
            diff = abs(2 * j - total_sum)
            # store where |2*P - total_sum| = |P - N| is minimal
            min_diff = min(min_diff, diff)
    return min_diff

# Time complexity:
# - N is the length of array A
# - M is the maximum absolute value in A
# - For each of the M possible absolute values, we iterate over all reachable sums,
#   up to total_sum (at most N * M)
# - Thus overall time complexity is O(M * (N * M)) = O(N * M^2), which is better
#   than O(N^2 * M) of solution_a, where N has range [0..20,000] and M has range [0..100]