You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
"""Compute the Huffman encoding of a provided text as input.Outputs the encoding for each symbol."""importsysimportheapqfromcollectionsimportnamedtuplefromcollectionsimportCounterNode=namedtuple("Node", "f s ascii children")
defhuffman(freq):
pq= [Node(f, s, ord(s), None) for (s, f) infreq.items()]
heapq.heapify(pq)
whilelen(pq) >1:
v1=heapq.heappop(pq)
v2=heapq.heappop(pq)
v=Node(v1.f+v2.f, "", -1, children=(v1, v2))
heapq.heappush(pq, v)
returnheapq.heappop(pq)
defdfs(root, prefix=""):
ifnotroot.children:
yield (root, prefix)
returnyieldfromdfs(root.children[0], prefix+"0")
yieldfromdfs(root.children[1], prefix+"1")
defget_codes(text):
freq=Counter(text)
tree=huffman(freq)
returnsorted(dfs(tree), key=lambdae: len(e[1]))
if__name__=="__main__":
text=sys.stdin.read().strip()
codes=get_codes(text)
fornode, codeincodes:
symbol=node.sifsymbol=="\n":
symbol="\\n"ifsymbol=="\t":
symbol="\\t"ifnode.ascii==13:
symbol="␍"ifnode.ascii==65279:
symbol="‗"lhs=f"{symbol.ljust(2)} ({str(node.ascii).rjust(4)})"print(lhs.ljust(10), "\t", len(code), "\t", code)
Interval-partitioning
"""Greedy interval partitioning (coloring an interval graph).Given a set of intervals with start time s and finish time f, assign eachinterval to a "color" (or resource) so that no two overlapping intervalsshare the same color. Equivalently, partition the intervals into the minimumnumber of non-overlapping subsets.The algorithm processes all start/end events in increasing time order. Whenan interval ends, its color becomes available again; when an interval starts,it is assigned any available color. This is optimal and uses exactly themaximum number of simultaneously active intervals.Runs in O(n log n) time due to sorting of event times.Returns a mapping: color -> list of intervals."""fromrandomimportrandintasRfromcollectionsimportnamedtupleasTfromcollectionsimportdefaultdictasDInterval=T("Interval", "s f")
defrand_interval():
a=R(1, 50)
b=R(3, 30)
returnInterval(a, a+b)
deflength(iv):
returniv.f-iv.sdefschedule_all(intervals):
start=D(list)
end=D(list)
timesteps= []
forivinintervals:
start[iv.s].append(iv)
end[iv.f].append(iv)
timesteps+= [iv.s, iv.f]
timesteps=sorted(set(timesteps)) # makes uniquecolors=list(reversed(range(len(intervals))))
coloring=D(list)
interval_color= {}
fortintimesteps:
forivinend[t]:
color=interval_color[iv]
colors.append(color)
forivinstart[t]:
color=colors.pop()
coloring[color].append(iv)
interval_color[iv] =colorreturncoloringI= [rand_interval() for_inrange(20)]
print(I)
coloring=schedule_all(I)
forxinsorted(coloring):
ivs=sorted(coloring[x])
s= [" "*ivs[0].s]
foriinrange(len(ivs) -1):
s+="("+"-"* (length(ivs[i]) -2) +")"s+=" "* (ivs[i+1].s-ivs[i].f)
s+="("+"-"* (length(ivs[-1]) -2) +")"print(x, " ".join(s))
Stable Matching
"""The Gale–Shapley algorithm for Stable Matching. The algorithm runsin linear time in the total size of preferences, or, $O(n^2)$ where $n$is the number of "hospitals"."""defstable_matching(hospital_preferences, student_preferences):
students= [sforsinstudent_preferences]
hospitals= [hforhinhospital_preferences]
proposals= {h: 0forhinhospitals}
unmatched_hospitals= [hforhinhospitals]
student= {h: Noneforhinhospitals}
hospital= {s: Noneforsinstudents}
inrank= {s: {} forsinstudents} # maps s to each hospital's s-rankingforsinstudents:
foridx, hinenumerate(student_preferences[s]):
inrank[s][h] =idxwhileunmatched_hospitals:
h=unmatched_hospitals.pop()
nxt=proposals[h] # we could pop here insteads=hospital_preferences[h][nxt]
proposals[h] +=1# h proposes to its best student not yet proposed toifnothospital[s]:
# s is availablehospital[s] =hstudent[h] =selse:
sh=hospital[s]
rank_sh=inrank[s][sh]
rank_h=inrank[s][h]
ifrank_h<rank_sh:
# s dumps sh for hhospital[s] =hstudent[sh] =Nonestudent[h] =sunmatched_hospitals.append(sh)
else:
# s rejectsunmatched_hospitals.append(h)
returnstudentdef_generate_instance(n):
fromrandomimportsampleasshufflehospitals= [f"h{i}"foriinrange(n)]
students= [f"s{i}"foriinrange(n)]
hospital_preferences= {h: students[:n] forhinhospitals[:n]}
student_preferences= {s: hospitals[:n] forsinstudents[:n]}
forhinhospitals[:n]:
hospital_preferences[h] =shuffle(hospital_preferences[h], n)
forsinstudents[:n]:
student_preferences[s] =shuffle(student_preferences[s], n)
returnhospital_preferences, student_preferencesif__name__=="__main__":
hospital_preferences, student_preferences=_generate_instance(20)
M=stable_matching(hospital_preferences, student_preferences)
forhinM:
print(f"Hospital {h} + Student {M[h]}")
02-graphs
Bfs
"""Breadth-first search (BFS) for shortest paths in an unweighted graph.BFS explores vertices in layers of increasing distance from a source vertex.It computes shortest-path distances measured in number of edges, and theparent pointers define a BFS tree that can be used to reconstruct paths.Runs in O(n + m) time."""fromcollectionsimportnamedtupleasTGraph=T("Graph", "V E")
defcreate_graph(V, E):
returnGraph(V=tuple(V), E=tuple(E))
defcreate_path(parent, t):
path= [t]
whiletinparent:
t=parent[t]
path.append(t)
returntuple(reversed(path))
defbfs(graph, s, t=None):
dist= {s: 0}
parent= {}
q= [s]
whileq:
v=q.pop(0)
ifv==t:
returncreate_path(parent, t), dist[t]
forx, uingraph.E:
ifx!=voruindist:
continuedist[u] =dist[v] +1parent[u] =vq.append(u)
iftisNone:
returndist, parentreturnNone, float("inf")
Bipartite Matching
"""Compute a maximum matching in an unweighted bipartite graph using DFS tofind augmenting paths. Implements the classic alternating-path approach forbipartite matching."""defmax_bipartite_matching(G, A, B):
matchA= {u: NoneforuinA}
matchB= {v: NoneforvinB}
defdfs(u, seen):
forvinG[u]:
ifvinseen:
continueseen.add(v)
ifmatchB[v] isNoneordfs(matchB[v], seen):
matchA[u] =vmatchB[v] =ureturnTruereturnFalseforuinA:
dfs(u, set())
returnmatchA, matchB
Dfs Tree
"""Run DFS and get a DFS tree"""fromcollectionsimportdefaultdictasDfromcollectionsimportnamedtupleasTDiGraph=T("DiGraph", "V E")
defoutneighbors(G, v):
# Yes we use edge list, the worst oneforx, yinG.E:
ifv==x:
yieldypre=D(int)
post= {}
time=0tree_edges=set()
defdfs(G, node):
globaltimetime+=1pre[node] =timeforvinoutneighbors(G, node):
ifnotpre[v]:
tree_edges.add((node, v))
dfs(G, v)
time+=1post[node] =timedefdfs_tree(G, pre, post, tree_edges):
edge_type= {}
foredgeinG.E:
u, v=edgeifedgeintree_edges:
edge_type[edge] ="tree"elifpre[u] <pre[v] <post[v] <post[u]:
edge_type[edge] ="forward"elifpre[v] <pre[u] <post[u] <post[v]:
edge_type[edge] ="back"elifpre[v] <post[v] <pre[u] <post[u]:
edge_type[edge] ="cross"returnedge_typedeftopological_sort(G, post):
toposort= [-1] * (2*len(G.V) +1)
forvinG.V:
toposort[post[v]] =vreturn [xforxinreversed(toposort) ifx>=0]
"""Kahn's algorithm produces a topological order of a DAG by repeatedlyremoving vertices with zero in-degree and updating their neighbors' in-degreesuntil none remain."""fromcollectionsimportdequedefkahns(G):
indegree= {u: 0foruinG.nodes}
foruinG.nodes:
forvinG[u]:
indegree[v] +=1queue=deque([uforuinG.nodesifindegree[u] ==0])
count=0whilequeue:
u=queue.popleft()
yielducount+=1forvinG[u]:
indegree[v] -=1ifindegree[v] ==0:
queue.append(v)
ifcount!=len(G.nodes):
raiseValueError("Graph is not a DAG")
Kosarajus Algorithm
"""A strongly connected component (SCC) is a maximal set of vertices whereevery vertex can reach every other by directed paths. Kosaraju's algorithmfinds all SCCs by doing a DFS to record finish times, reversing all edges, thenrunning DFS again in reverse finish order."""defkosaraju(G):
vis, order=set(), []
defdfs(u, graph, collect):
vis.add(u)
forvingraph[u]:
ifvnotinvis:
dfs(v, graph, collect)
collect.append(u)
foruinG:
ifunotinvis:
dfs(u, G, order)
R= {u: [] foruinG}
foruinG:
forvinG[u]:
R[v].append(u)
vis.clear()
whileorder:
u=order.pop()
ifunotinvis:
comp= []
dfs(u, R, comp)
yieldcomp
Kruskals Algorithm
"""Kruskal's algorithm for minimum spanning tree (MST).Given a weighted undirected graph, Kruskal's algorithm computes a minimumspanning tree by considering the edges in nondecreasing order of weight andadding an edge whenever it connects two previously disconnected components.The graph is given as a list of edges of the form $(w, u, v)$, where $w$ is theweight of the edge $uv$. Sorting this list yields the order in which edges areconsidered. A Union Find data structure maintains the connected components.Returns the edges of the MST together with its total weight.Runs in $O(m \\log m)$ time, dominated by sorting the m edges."""classUnionfind:
def__init__(self, size):
self._roots=list(range(size))
self._rank= [0] *sizedeffind(self, elt):
root=eltwhileroot!=self._roots[root]:
root=self._roots[root]
whileelt!=root: # path compressionparent=self._roots[elt]
self._roots[elt] =rootelt=parentreturnrootdef__getitem__(self, elt):
returnself.find(elt)
defis_connected(self, a, b):
returnself.find(a) ==self.find(b)
defunion(self, a, b):
r1=self.find(a)
r2=self.find(b)
ifr1==r2:
returnFalseifself._rank[r1] <self._rank[r2]:
self._roots[r1] =r2elifself._rank[r1] >self._rank[r2]:
self._roots[r2] =r1else:
self._roots[r2] =r1self._rank[r1] +=1returnTruedefkruskal(n, edges):
uf=Unionfind(n)
mst= []
weight=0forw, u, vinsorted(edges):
ifuf.union(u, v):
mst.append((u, v))
weight+=wiflen(mst) ==n-1:
breakreturnmst, weight
Layered-graph
"""Layered Graph algorithm for search with $k$ free edges.Suppose you have a directed graph with edge weights and we want to find ashortest path from $s$ to $t$. Suppose furthermore that we can use $k$ edgesfor free.The Layered Graph algorithm takes $k+1$ copies of $G$, $G_0, G_1, ..., G_k$,and creates additional edges from $u_i$ to $v_{i+1}$ with weight 0, for alledges $uv$ in $G$.A path from $s_0$ to some $t_i$ corresponds to an $s$-$t$ path in the originalgraph using exactly $i$ free edges. Thus, the shortest path from $s_0$ to anylayered copy of $t$ gives the optimal path using at most $k$ free edges."""fromcollectionsimportnamedtupleasTfromheapqimportheappop, heappushGraph=T("Graph", "V E w")
defcreate_graph(V, E, w):
returnGraph(V=tuple(V), E=tuple(E), w=w)
defcreate_path(parent, t):
path= [t]
whiletinparent:
t=parent[t]
path.append(t)
returntuple(reversed(path))
defdijkstra(graph, s, t):
dist= {s: 0}
parent= {}
q= [(0, s)]
whileq:
d, v=heappop(q)
ifd!=dist[v]:
continueifv==t:
returncreate_path(parent, t), dforuingraph.V:
if (v, u) notingraph.w:
continuealt=d+graph.w[(v, u)]
ifalt>=dist.get(u, float("inf")):
continuedist[u] =altparent[u] =vheappush(q, (alt, u))
deflayered_graph(graph, k):
V=tuple((v, i) foriinrange(k+1) forvingraph.V)
E= []
w= {}
foriinrange(k+1):
forv, uingraph.E:
E.append(((v, i), (u, i)))
w[((v, i), (u, i))] =graph.w[(v, u)]
foriinrange(k):
forv, uingraph.E:
E.append(((v, i), (u, i+1)))
w[((v, i), (u, i+1))] =0returncreate_graph(V, E, w)
defunwrap(path):
returntuple(vfor (v, _) inpath)
defshortest_path(graph, s, t, k):
H=layered_graph(graph, k)
best=Noneforiinrange(k+1):
P, d=dijkstra(H, (s, 0), (t, i))
ifPisNone:
continueifbestisNoneord<best[1]:
best= (P, d)
ifbestisNone:
returnNone, float("inf")
P, d=bestreturnunwrap(P), d
Maximum-spacing-clustering
"""Maximum-spacing k-clustering via Kruskal's algorithm.Given an undirected weighted graph on vertices {0, ..., n-1}, the goal ofmaximum-spacing k-clustering is to partition the vertices into exactly kclusters so that the minimum-weight edge joining two different clusters is aslarge as possible.Runs in $O(m \\log m)$ time, dominated by sorting the edges."""fromcollectionsimportdefaultdictasDdefmaximum_spacing_clustering(n, edges, k):
uf=Unionfind(n)
merges=0forw, u, vinsorted(edges):
ifuf.union(u, v):
merges+=1ifmerges==n-k:
breakclusters=D(set)
forvinrange(n):
clusters[uf[v]].add(v)
returnset(clusters.values())
"""Grow a rapidly-exploring random tree (RRT) toward a target while avoidingobstacles. Expands the tree incrementally by sampling feasible edges until thegoal region is reached or iterations are exhausted."""defrrt(start, target, obstacles, n=1, tree=None):
iftreeisNone:
tree=Tree(start, set([start]), set()) # rapidly exploring random tree (RRT)foriinrange(n):
edge=sample_tree_node(tree, obstacles)
ifedge:
q, v=edgetree.V.add(q)
tree.E.add((q, v))
ifdist(q, target) <CLOSE:
returntree, Truereturntree, False
Topological Sort With Dfs
"""Use postorder DFS to get a reversed topological ordering."""defdfs_toposort(G):
visited=set()
order= []
defdfs(u):
ifunotinvisited:
forvinG[u]:
dfs(v)
visited.add(u)
order.append(u)
foruinG.nodes:
ifunotinvisited:
dfs(u)
yieldfromreversed(order)
"""Maximum consecutive sum in an array with possibly negative values."""defconsecutive_sum(array, idx=0, con_sum=0, global_max=0):
ifidx>=len(array):
returnglobal_maxcopt=max(array[idx], array[idx] +con_sum)
gopt=max(global_max, copt)
returnconsecutive_sum(array, idx+1, copt, gopt)
print(consecutive_sum([4, 0, 3, -8, 1, 4, -2, -10]))
Longest Nonnegative
"""Find the length of the longest contiguous subarray with a non-negative sum.Uses prefix sums and two-pointer scanning over forward minima and reversemaxima."""deflongest_non_negative(data):
neginf=sum(abs(e) foreindata) *-1000# -inftydata= [neginf] +data+ [neginf]
presum= [0]
foridx, valinenumerate(data):
presum.append(val+presum[idx])
max_rev= [0for_inpresum]
max_rev[-1] =presum[-1]
foriinrange(len(presum) -2, -1, -1):
max_rev[i] =max(presum[i], max_rev[i+1])
min_fwd= [0for_inpresum]
foriinrange(1, len(presum)):
min_fwd[i] =min(presum[i], min_fwd[i-1])
lo=0hi=0mx=0whileTrue:
lv=min_fwd[lo]
rv=max_rev[hi]
mx=max(mx, (hi-lo-1))
ifrv-lv>=0:
hi+=1ifhi>=len(max_rev):
breakelse:
lo+=1iflo>=len(min_fwd):
breakreturnmx
Monotonic Queue
"""Monotonic queue for $O(1)$-amortized sliding-window min/max queries.Maintains a deque of candidate indices in monotone order as the window moves.Use it to compute all window minima/maxima in overall $O(n)$ time."""fromcollectionsimportdequedefsliding_window_min(arr, k):
"""Return list of minima of each length-k window of arr."""ifk<=0:
raiseValueError("k must be >= 1")
n=len(arr)
ifk>n:
return []
q=deque() # stores indices; arr[q[0]] is current minout= []
fori, xinenumerate(arr):
# drop indices that left the windowwhileqandq[0] <=i-k:
q.popleft()
# maintain increasing values in dequewhileqandarr[q[-1]] >=x:
q.pop()
q.append(i)
# window is formedifi>=k-1:
out.append(arr[q[0]])
returnoutdefsliding_window_max(arr, k):
"""Return list of maxima of each length-k window of arr."""ifk<=0:
raiseValueError("k must be >= 1")
n=len(arr)
ifk>n:
return []
q=deque() # stores indices; arr[q[0]] is current maxout= []
fori, xinenumerate(arr):
whileqandq[0] <=i-k:
q.popleft()
# maintain decreasing values in dequewhileqandarr[q[-1]] <=x:
q.pop()
q.append(i)
ifi>=k-1:
out.append(arr[q[0]])
returnoutif__name__=="__main__":
arr= [1, 3, -1, -3, 5, 3, 6, 7]
k=3mins=sliding_window_min(arr, k)
maxs=sliding_window_max(arr, k)
print("arr :", arr)
print("k :", k)
print("mins:", mins) # [-1, -3, -3, -3, 3, 3]print("maxs:", maxs) # [3, 3, 5, 5, 6, 7]
Prefix Sum
"""Compute the prefix sum of a given list as input."""fromsysimportstdinasinpa= [int(x) forxininp.readline().split()]
b= []
foridx, valinenumerate(a):
b.append(valifidx==0elseval+b[idx-1])
print(b)
Sliding Window
"""Sliding window to find best $k$ sized window in linear time."""data= [3.5, 7.5, 8.0, 5.7, 3.1, 4.2, 7.2, 0.1, 3.4, 1.2, -4]
k=3acc=sum(data[:k])
best=accwinstart=0forwinstartinrange(1, len(data) -k+1):
acc+=-data[winstart-1] +data[winstart+k-1]
ifacc>best:
best=accprint(best)
04-dynamic-programming
Bellman Ford
"""Single-source shortest paths (SSSP) with negative edges can be solved byBellman–Ford, which uses dynamic programming over path lengths to relax edgesup to $n-1$ times. If a further relaxation still decreases a distance, itindicates a negative-weight cycle reachable from the source."""defbellman_ford(n, G, s):
"""Return (dist, True) if successful, and (dist, False) if we have detected a negative cycle. """dist= [float("inf")] *ndist[s] =0for_inrange(n-1):
foru, v, winG.edges:
dist[v] =min(dist[v], dist[u] +w)
foru, v, winG.edges:
ifdist[u] +w<dist[v]:
dist, Falsereturndist, True
"""Steiner Tree is the problem of finding a minimum-weight connected subgraphthat spans a given set of terminal vertices in a weighted graph.Dreyfus--Wagner is a classic dynamic programming algorithm for Steiner Treeparameterized by the number of terminals.By storing `dp[mask][v]` as the minimum cost of a Steiner tree connecting theterminals in `mask` and ending at vertex `v`, it combines smaller terminalsubsets and then propagates costs through shortest-path distances.Using a precomputed all-pairs shortest-path matrix `dist`, it runs in $O(3^k n+ 2^k n^2)$ time, where $k$ is the number of terminals and $n$ is the number ofvertices."""fromcollectionsimportdefaultdictfrommathimportinfdefdreyfus_wagner(dist, terminals):
n=len(dist)
k=len(terminals)
dp=defaultdict(lambda: [inf] *n)
fori, tinenumerate(terminals):
mask=1<<iforvinrange(n):
dp[mask][v] =dist[v][t]
formaskinrange(1, 1<<k):
ifmask& (mask-1) ==0:
continuesub= (mask-1) &maskwhilesub:
other=mask^subifsub<other:
forvinrange(n):
cand=dp[sub][v] +dp[other][v]
ifcand<dp[mask][v]:
dp[mask][v] =candsub= (sub-1) &maskforvinrange(n):
dp[mask][v] =min(dp[mask][u] +dist[u][v] foruinrange(n))
returnmin(dp[(1<<k) -1])
"""SOS DP / Fast Zeta Transform over subsets (submasks).This module provides the classic *subset zeta transform* (a.k.a. SOS DP step).Given an array f indexed by bitmasks in {0..2^n-1}, the subset zeta transformproduces f' where: f'[mask] = sum_{sub ⊆ mask} f[sub].This is useful when you want, for every mask, the sum/aggregation over all itssubmasks, but you want all answers at once in O(n·2^n) rather than doing anO(3^n) total amount of submask iteration."""defzeta_submasks(f, n):
N=1<<nforiinrange(n):
bit=1<<iformaskinrange(N):
ifmask&bit:
f[mask] +=f[mask^bit]
returnfif__name__=="__main__":
n=3w= [0] * (1<<n)
w[0b001] =5# {0}w[0b101] =2# {0,2}w[0b110] =7# {1,2}g=w[:]
zeta_submasks(g, n)
print("g[0b101] =", g[0b101])
"""APSP (All-Pairs Shortest Paths) is the problem of finding the shortest pathdistances between every pair of vertices in a weighted graph.Floyd–Warshall is a classic dynamic programming algorithm for APSP.By updating `dist[i][j] = min(dist[i][j], dist[i][k] + dist[k][j])` for eachintermediate vertex `k`, it computes shortest paths for all pairs in $O(n^3)$time."""deffloyd_warshall(dist):
n=len(dist)
forkinrange(n):
foriinrange(n):
forjinrange(n):
ifdist[i][k] +dist[k][j] <dist[i][j]:
dist[i][j] =dist[i][k] +dist[k][j]
returndist
"""Given a set of $n$ $(x, y)$ points, compute the least squares for the dataset. In addition, create a data structure that can answer query`least_squares(i, j)` of all points in `data[i:j]`. Using dynamic programmingwe can compute the entire data structure in time $O(n^2)$."""fromcollectionsimportnamedtupleRegressionState=namedtuple("RegressionState", "a b n sum_x sum_y sum_xy sum_x2 a_nom denom b_nom")
defregression(x, y, state=None):
ifstateisNoneorstate.n==0:
n=1sum_x, sum_y=x, ysum_xy, sum_x2=x*y, x*xelse:
n=state.n+1sum_x=state.sum_x+xsum_y=state.sum_y+ysum_xy=state.sum_xy+x*ysum_x2=state.sum_x2+x*xa_nom=n*sum_xy-sum_x*sum_ydenom=n*sum_x2-sum_x*sum_xb_nom=sum_y*sum_x2-sum_x*sum_xyifdenom==0:
a=0.0b=sum_y/nelse:
a=a_nom/denomb=b_nom/denomreturnRegressionState(a, b, n, sum_x, sum_y, sum_xy, sum_x2, a_nom, denom, b_nom)
defbuild_dp(points):
N=len(points)
DP= {}
foriinrange(N):
DP[(i, i)] =regression(*points[i])
foriinrange(N):
forjinrange(i+1, N):
DP[(i, j)] =regression(*points[j], DP[(i, j-1)])
returnDP# RUNNING THE CODEimportrandomdefgenerate_points(N):
points= []
forxinrange(N):
y=round(2+x+ (random.randint(-200, 200) *0.12), 3)
points.append((1+x, y))
returnpointsdefaxb(state, r=5):
"""Print s.a and s.b as "a·x+b"."""a=round(state.a, r)
b=round(state.b, r)
ifb>=0:
returnf"{a}·x + {b} ({state.n} points)"returnf"{a}·x - {-b} ({state.n} points)"RegressionState.__str__=axbif__name__=="__main__":
importsysiflen(sys.argv) !=2:
exit("Usage: ./regression.py <N>")
N=int(sys.argv[1])
random.seed(1000*N%2**20-1)
points=generate_points(N)
DP=build_dp(points)
line1=DP[(0, N-1)]
print(line1)
"""Basic implementation for SQRT decomposition. The SQRT data structure cananswer range queries such as sum, max, min in time $O(\\sqrt n)$ and can do updateelement in $O(\\sqrt n)$ as well.In theory, this data structure can take any [associativeoperation](https://en.wikipedia.org/wiki/Associative_property) (e.g., gcd,lcm), and the elements can even be higher order structures such as matriceswith the operation being matrix multiplication. Indeed, you can take elementsover $\\text{GL}_n(\\mathbb{C})$, and answer such queries."""importmathclassSQRT:
"""The SQRT data structure. The SQRT data structure can answer range queries such as sum, max, min in time $O(\\sqrt n)$ and can do update element in $O(\\sqrt n)$ as well. The update element functionality makes this a data structure that outperforms prefix sum in some cases. """def_update(self, block):
ifblock<0:
returnself._block_data[block] = {
"sum": sum(self._blocks[block]),
"min": min(self._blocks[block]),
"max": max(self._blocks[block]),
}
def_initialize(self, data):
block=-1foridx, einenumerate(data):
if (idx%self._block_size) ==0:
self._update(block)
block+=1self._blocks.append([])
self._blocks[block].append(e)
ifblocknotinself._block_data:
# the trailing blockself._update(block)
def__init__(self, data):
"""Create a SQRT decomposition of iterable data. Complexity: O(n). """d= [eforeindata]
ifnotd:
raiseValueError("SQRT undefined for empty set")
self._glob_min=min(d) -1self._glob_max=max(d) +1self._len=len(d)
self._blocks= []
self._block_size=int(math.sqrt(len(d)))
self._block_data= {}
self._initialize(d)
def__len__(self):
returnself._lendef_pos(self, idx):
returnidx//self._block_size, idx%self._block_sizedef__getitem__(self, coord):
"""Get a statistics dictionary for either an index or a slice. SQRT(lst)[l:r] returns a dictionary with keys "sum", "min", and "max", mapping to their respective values for the given range. Complexity: $O(\\sqrt n)$. """ifisinstance(coord, int):
b, l=self._pos(coord)
returnself._blocks[b][l]
ifisinstance(coord, slice):
ifcoord.stepisnotNone:
raiseValueError("SQRT cannot stride")
coord= (coord.start, coord.stop)
ifnotisinstance(coord, tuple):
raiseValueError(f"cannot unpack {coord} in sqrt[(l, r)]")
iflen(coord) !=2:
raiseValueError(f"size mismatch for {coord} in sqrt[(l, r)]")
l, r=coordl_block, l_loc=self._pos(l)
r_block, r_loc=self._pos(r)
ifl_block==r_block:
# special case when l and r within same blockview=self._blocks[l_block][l_loc:r_loc]
return {
"sum": sum(view),
"min": min(view),
"max": max(view),
}
stats= {
"sum": 0,
"min": self._glob_max,
"max": self._glob_min,
}
forl_idxinrange(l_loc, self._block_size):
e=self._blocks[l_block][l_idx]
stats["sum"] +=estats["min"] =min(stats["min"], e)
stats["max"] =max(stats["max"], e)
forbinrange(l_block+1, r_block):
stats["sum"] +=self._block_data[b]["sum"]
stats["min"] =min(stats["min"], self._block_data[b]["min"])
stats["max"] =max(stats["max"], self._block_data[b]["max"])
forr_idxinrange(r_loc):
e=self._blocks[r_block][r_idx]
stats["sum"] +=estats["min"] =min(stats["min"], e)
stats["max"] =max(stats["max"], e)
returnstatsdef__setitem__(self, idx, val):
"""Update index idx to be val. Complexity: $O(\\sqrt n)$. """self._glob_min=min(self._glob_min, val-1)
self._glob_max=max(self._glob_max, val+1)
block, loc=self._pos(idx)
self._blocks[block][loc] =valself._update(block)
def__str__(self):
"""Return a verbose string representation of the SQRT decomposition."""retval="SQRT(\n"foridx, eltinenumerate(self._blocks):
s_i=str(idx).ljust(3)
s_o=str(self._block_data[idx]).ljust(4*12+4)
s_d="["+", ".join(str(x).rjust(5) forxinelt) +"]"retval+=f" {s_i}{s_o}{s_d}\n"returnretval+")"def__repr__(self):
data= [str(self[i]) foriinrange(len(self))]
returnf"SQRT([{', '.join(data)}])"
06-divide-and-conquer
Closest Pair
"""A divide and conquer algorithm for computing a closest pair of points in aset of $n$ points in the plane in time $O(n \\log n)$. It outputs both theirdistance and the pair itself."""importitertoolsasITimportmathfromcollectionsimportnamedtupleasTPoint=T("Point", "x y")
Sol=T("Sol", "delta pair")
defbruteforce(points):
returnmin(Sol(math.hypot(a.x-b.x, a.y-b.y), (a, b)) for (a, b) inIT.combinations(points, 2))
defclosest_pair(X, Y):
iflen(X) <=5:
returnbruteforce(X)
pivot=X[len(X) //2]
Xl= [pforpinXifp<=pivot]
Yl= [pforpinYifp<=pivot]
Xr= [pforpinXifp>pivot]
Yr= [pforpinYifp>pivot]
OPT=min(closest_pair(Xl, Yl), closest_pair(Xr, Yr))
S= [pforpinYifabs(p.x-pivot.x) <OPT.delta]
iflen(S) >1:
OPT=min(OPT, min(bruteforce(IT.islice(S[i:], 6)) foriinrange(len(S) -1)))
returnOPTdefclosest(points):
X=sorted(points)
Y=sorted(points, key=lambdap: (p.y, p.x))
returnclosest_pair(X, Y)
# DriverimportrandomN=100P= [Point(random.random() *1000, random.random() *1000) for_inrange(N)]
delta, (p1, p2) =closest(P)
print("delta:", round(delta, 3))
Counting Inversions
"""Counting inversions via merge sort (divide and conquer).An inversion in a list L is a pair of indices (i, j) with i < j and L[i] > L[j].This algorithm counts all inversions while simultaneously sorting the list.The list is recursively split into two halves. After counting inversions ineach half, the merge step counts *cross inversions*: whenever an element fromthe right half is placed before remaining elements of the left half, it formsinversions with all those remaining elements.Returns a pair (inv, sorted_L), where inv is the number of inversions andsorted_L is the sorted version of L.Runs in O(n log n) time and O(n) additional space."""defcount_and_sort(L):
iflen(L) <=1:
return0, Ln2=len(L) //2inv1, L1=count_and_sort(L[:n2])
inv2, L2=count_and_sort(L[n2:])
count=0L= []
p1=p2=0whilep1<len(L1) andp2<len(L2):
ifL1[p1] <L2[p2]:
L.append(L1[p1])
p1+=1else:
L.append(L2[p2])
p2+=1count+=len(L1) -p1returninv1+inv2+count, L+L1[p1:] +L2[p2:]
"""Karatsuba multiplication for large integers represented as strings.This implementation multiplies two nonnegative integers given as decimalstrings using a divide-and-conquer strategy. The inputs are padded to equallengths that are powers of two, then recursively split into high and lowhalves:$$x = x1 \cdot 10^{n/2} + x0$$$$y = y1 \cdot 10^{n/2} + y0$$The product is computed using three recursive multiplications:$$A = x1 \cdot y1$$$$B = x0 \cdot y0$$$$P = (x1 + x0)(y1 + y0)$$and combined via$$x \cdot y = A \cdot 10^n + (P - A - B) \cdot 10^{n/2} + B$$This reduces the number of recursive multiplications from four to three,yielding a time complexity of$$O(n^{\log_2 3}) \\approx O(n^{1.585}),$$which improvesover the quadratic grade-school method.The implementation uses string arithmetic with Python integers for base casesand addition/subtraction, and handles arbitrary-length inputs."""importmathdefadd(x, y, z="0"):
n=len(x)
returnstr(int(x) +int(y) +int(z))
defsub(x, y, z="0"):
n=len(x)
returnstr(int(x) -int(y) -int(z))
defshift(x, n):
"""Left shift x with n positions"""returnx+"0"*ndefalign(x, y):
"""Pad with 0s to get x and y same length and power of two"""n=max([len(x), len(y)])
N=2**math.ceil(math.log2(n))
x=x.zfill(N)
y=y.zfill(N)
returnx, ydefM(x, y):
x, y=align(x, y)
n=len(x)
ifn<=1:
returnstr(int(x) *int(y)) # single digit multiplicationx1, x0=x[: n//2], x[n//2 :]
y1, y0=y[: n//2], y[n//2 :]
A=M(x1, y1)
B=M(x0, y0)
P=M(add(x1, x0), add(y1, y0))
S=sub(P, A, B)
R=add(shift(A, n), shift(S, n//2), B)
returnRif__name__=="__main__":
importsysiflen(sys.argv) !=3:
exit("usage: karatsuba A B")
A, B=sys.argv[1:]
al=align(A, B)
n=len(al[0]) +1P=M(A, B)
print(A, "*", B, "=", P.lstrip("0") or"0")
07-geometry
Bentley Ottmann
"""Find all intersection points among a set of line segments using asimplified Bentley–Ottmann sweep. Maintains an active list of segments andschedules intersection checks through an event queue."""importheapqdefintersect(a, b):
(x1, y1), (x2, y2) =a
(x3, y3), (x4, y4) =bd= (x1-x2) * (y3-y4) - (y1-y2) * (x3-x4)
ifabs(d) <1e-9:
returnNonepx= ((x1*y2-y1*x2) * (x3-x4) - (x1-x2) * (x3*y4-y3*x4)) /dpy= ((x1*y2-y1*x2) * (y3-y4) - (y1-y2) * (x3*y4-y3*x4)) /dif (
min(x1, x2) <=px<=max(x1, x2)
andmin(y1, y2) <=py<=max(y1, y2)
andmin(x3, x4) <=px<=max(x3, x4)
andmin(y3, y4) <=py<=max(y3, y4)
):
returnpx, pydefbentley_ottmann(segments):
events= []
[heapq.heappush(events, (min(p[0], q[0]), p, q)) forp, qinsegments]
res=set()
active= []
whileevents:
x, p, q=heapq.heappop(events)
seg= [sforsinsegmentsifs== (p, q) ors== (q, p)][0]
ifsegnotinactive:
active.append(seg)
else:
active.remove(seg)
forsinactive:
ifs!=seg:
r=intersect(s, seg)
ifr:
res.add((round(r[0], 6), round(r[1], 6)))
returnres
"""The Edmonds–Karp variant of the Ford–Fulkerson algorithm for computingmaximum flow."""fromcollectionsimportnamedtupleasTGraph=T("Graph", "V E c F")
defcreate_graph(V, E, c):
n=len(V)
F= [[0] *nfor_inrange(n)]
forv, uinc:
F[v][u] =c[(v, u)]
returnGraph(V=V, E=E, c=c, F=F)
defcreate_path(parent, t):
path= [t]
v=twhilev:
v=parent[v]
path.append(v)
returntuple(reversed(path))
defbfs(graph, s, t):
q= [s]
parent= {}
whileq:
v=q.pop(0)
foruingraph.V:
ifuinparent:
continue# seen it beforeifgraph.F[v][u] <=0:
continue# vu saturatedparent[u] =vifu==t:
returncreate_path(parent, t)
q.append(u)
edges=lambdap: zip(p, p[1:])
defmaxflow(graph, s, t):
flow=0whileP:=bfs(graph, s, t):
bottleneck=min(graph.F[v][u] for (v, u) inedges(P))
print(P, bottleneck)
ifbottleneck==0:
returnflowflow+=bottleneckforiinrange(1, len(P)):
v, u=P[i-1], P[i]
graph.F[v][u] -=bottleneckgraph.F[u][v] +=bottleneckreturnflow
Hungarian
"""Hungarian algorithm for the assignment problem.Given an n-by-n cost matrix cost, the assignment problem asks for a bijectionbetween rows and columns minimizing the total cost sum(cost[i][ass[i]] for i in range(n)).Equivalently, this is a minimum-cost perfect matching problem in a completebipartite graph with rows on one side, columns on the other, and edge weightcost[i][j].The Hungarian algorithm maintains dual potentials on rows and columns andrepeatedly augments a partial matching along zero reduced-cost edges. In theimplementation below, this appears as the classical O(n^3) shortestaugmenting-path formulation.Input: cost: square matrix, where cost[i][j] is the cost of assigning row i to column j.Output: (value, ass), where value is the minimum total cost, ass[i] = j means row i is assigned to column j.Runs in O(n^3) time and O(n^2) space for the input matrix."""defcheck_square(cost):
n=len(cost)
ifany(len(row) !=nforrowincost):
raiseValueError("cost matrix must be square")
returnndefassignment_cost(cost, ass):
returnsum(cost[i][ass[i]] foriinrange(len(cost)))
defhungarian(cost):
n=check_square(cost)
ifn==0:
return0, []
# 1-indexed internally, following the standard compact formulation.u= [0] * (n+1) # row potentialsv= [0] * (n+1) # column potentialsp= [0] * (n+1) # matched row for each columnway= [0] * (n+1) # predecessor columns for augmentationforiinrange(1, n+1):
p[0] =iminv= [float("inf")] * (n+1)
used= [False] * (n+1)
j0=0whileTrue:
used[j0] =Truei0=p[j0]
bottleneck=float("inf")
j1=0forjinrange(1, n+1):
ifused[j]:
continuethis=cost[i0-1][j-1] -u[i0] -v[j]
ifthis<minv[j]:
minv[j] =thisway[j] =j0ifminv[j] <bottleneck:
bottleneck=minv[j]
j1=jforjinrange(n+1):
ifused[j]:
u[p[j]] +=bottleneckv[j] -=bottleneckelse:
minv[j] -=bottleneckj0=j1ifp[j0] ==0:
breakwhileTrue:
j1=way[j0]
p[j0] =p[j1]
j0=j1ifj0==0:
breakass= [-1] *nforjinrange(1, n+1):
ass[p[j] -1] =j-1value=assignment_cost(cost, ass)
returnvalue, ass
"""Knuth–Morris–Pratt (KMP) algorithm for pattern matching."""defmatch(text, pattern):
"""Yield indices i where text[i:len(P)] == P"""foriinrange(len(text)):
iftext[i : i+len(pattern)] ==pattern:
yieldidefbf(text, pattern):
"""Return the indices i where text[i:len(P)] == P"""iflen(text) <=len(pattern):
return [0] iftext==patternelse []
indices= []
foriinrange(len(text) -len(pattern) +1):
matches=Trueforjinrange(len(pattern)):
iftext[i+j] !=pattern[j]:
matches=Falsebreakifmatches:
indices.append(i)
returnindicesdeflongest_prefix_suffix(pattern):
"""Return lps table. `lps[i]` is where to start matching if first mismatch at i+1, or length of longest suffix that is also a prefix of the string from 0 to i. """n, k=0, len(pattern)
lps= [0] *kidx=1whileidx<k:
print(idx, n, pattern[idx], pattern[n])
ifpattern[idx] ==pattern[n]:
n+=1lps[idx] =nelifn!=0:
n=lps[n-1]
else:
lps[idx] =0idx+=1returnlpsdefkmp(text, pattern, lps=None):
"""Yield indices idx where text[idx:idx+N] == P"""N, M=len(text), len(pattern)
iflpsisNone:
lps=longest_prefix_suffix(pattern)
txt_idx=0pat_idx=0whiletxt_idx<N:
ifpattern[pat_idx] ==text[txt_idx]:
txt_idx+=1pat_idx+=1ifpat_idx==M:
yieldtxt_idx-pat_idxpat_idx=lps[pat_idx-1]
# mismatch after pat_idx matcheseliftxt_idx<Nandpattern[pat_idx] !=text[txt_idx]:
# Do not match lps[0..lps[pat_idx-1]] characters,# they will match anywayifpat_idx!=0:
pat_idx=lps[pat_idx-1]
else:
txt_idx+=1
"""Z algorithm for linear-time prefix matching.For each position i in a string s, let Z[i] be the length of the longestprefix of s matching the substring starting at i. The algorithm maintainsa rightmost matching interval [l, r) and reuses it to avoid redundantcomparisons.Runs in O(n) time."""defz_algorithm(s):
n=len(s)
Z= [0] *nl=r=0foriinrange(1, n):
ifi<r:
Z[i] =min(r-i, Z[i-l])
whilei+Z[i] <nands[Z[i]] ==s[i+Z[i]]:
Z[i] +=1ifi+Z[i] >r:
l, r=i, i+Z[i]
returnZ
"""Modular exponentiation by repeated squaring.Compute $a^b \\mod m$ by scanning the bits of $b$ from low to high. Repeatedlysquare the base, and whenever the current bit is set, multiply it into theanswer modulo $m$.Runs in $O(\log b)$ time."""defmodexp(a, b, m):
assertb>=0assertm>0x=a%my=1whileb:
ifb&1:
y= (y*x) %mx= (x*x) %mb>>=1returny
Primes
"""Factorize a number into prime factors."""frommathimportsqrtdeffactoring(n):
whilen%2==0:
yield2n//=2foriinrange(3, int(sqrt(n)) +1, 2):
whilen%i==0:
yieldin//=iifn>1:
yieldnN=2310*510510print(f"factors({N}) =", *factoring(N))
Sieve
"""The Sieve of Eratosthenes — an ancient algorithm for finding all prime numbers up to any given limit."""defsieve(n):
ifn<2:
returnprime= [True] * (n+1)
prime[0] =Falseprime[1] =Falseforiinrange(2, n+1):
ifnotprime[i]:
continueyieldiforjinrange(i+i, n, i):
prime[j] =Falseprint(*sieve(101))
Subset-xor
"""Subset XOR via Gaussian elimination over $\mathbb{F}_2$.Given a multiset of nonnegative integers, we may form the XOR of any subset.Viewing each integer as a bit vector, XOR is exactly vector addition over$\mathbb{F}_2$. Thus the set of all subset XOR values is the linear span ofthe input vectors.The core step is Gaussian elimination over $\mathbb{F}_2$: repeatedly pick apivot bit, use it to eliminate that bit from all other vectors, and therebyobtain a basis of the span. From this basis we can- enumerate all subset XOR values,- count how many distinct values are reachable,- test whether a target value is reachable, and- compute the maximum obtainable XOR.The elimination runs in $O(n B)$ time for $n$ integers of $B$ bits, up toconstant factors depending on the representation."""defxor_basis(nums):
"""Return a reduced XOR basis for the span of nums."""basis= []
forxinnums:
y=xforbinbasis:
y=min(y, y^b)
ify:
basis.append(y)
basis.sort(reverse=True)
returnbasisdefsubset_xor_values(nums):
"""Return all distinct subset XOR values."""values= [0]
forbinxor_basis(nums):
values+= [v^bforvinvalues]
returnvaluesdefcount_distinct_subset_xors(nums):
"""Return the number of distinct subset XOR values."""return1<<len(xor_basis(nums))
defmax_subset_xor(nums):
"""Return the maximum XOR obtainable from a subset of nums."""ans=0forbinsorted(xor_basis(nums), reverse=True):
ans=max(ans, ans^b)
returnansdefsubset_xor_reachable(nums, target):
"""Return whether target is the XOR of some subset of nums."""x=targetforbinsorted(xor_basis(nums), reverse=True):
x=min(x, x^b)
returnx==0defsubset_xor_sum(nums):
"""Return the sum of all subset XOR values, counted with multiplicity."""nums=list(nums)
ifnotnums:
return0bit_or=0forxinnums:
bit_or|=xreturnbit_or<< (len(nums) -1)