Skip to content

Commit 6b63644

Browse files
committed
Update binary search to find first occurrence and update tests
1 parent 3dcff79 commit 6b63644

File tree

1 file changed

+6
-326
lines changed

1 file changed

+6
-326
lines changed

searches/binary_search.py

Lines changed: 6 additions & 326 deletions
Original file line numberDiff line numberDiff line change
@@ -1,183 +1,3 @@
1-
#!/usr/bin/env python3
2-
3-
"""
4-
Pure Python implementations of binary search algorithms
5-
6-
For doctests run the following command:
7-
python3 -m doctest -v binary_search.py
8-
9-
For manual testing run:
10-
python3 binary_search.py
11-
"""
12-
13-
from __future__ import annotations
14-
15-
import bisect
16-
17-
18-
def bisect_left(
19-
sorted_collection: list[int], item: int, lo: int = 0, hi: int = -1
20-
) -> int:
21-
"""
22-
Locates the first element in a sorted array that is larger or equal to a given
23-
value.
24-
25-
It has the same interface as
26-
https://docs.python.org/3/library/bisect.html#bisect.bisect_left .
27-
28-
:param sorted_collection: some ascending sorted collection with comparable items
29-
:param item: item to bisect
30-
:param lo: lowest index to consider (as in sorted_collection[lo:hi])
31-
:param hi: past the highest index to consider (as in sorted_collection[lo:hi])
32-
:return: index i such that all values in sorted_collection[lo:i] are < item and all
33-
values in sorted_collection[i:hi] are >= item.
34-
35-
Examples:
36-
>>> bisect_left([0, 5, 7, 10, 15], 0)
37-
0
38-
>>> bisect_left([0, 5, 7, 10, 15], 6)
39-
2
40-
>>> bisect_left([0, 5, 7, 10, 15], 20)
41-
5
42-
>>> bisect_left([0, 5, 7, 10, 15], 15, 1, 3)
43-
3
44-
>>> bisect_left([0, 5, 7, 10, 15], 6, 2)
45-
2
46-
"""
47-
if hi < 0:
48-
hi = len(sorted_collection)
49-
50-
while lo < hi:
51-
mid = lo + (hi - lo) // 2
52-
if sorted_collection[mid] < item:
53-
lo = mid + 1
54-
else:
55-
hi = mid
56-
57-
return lo
58-
59-
60-
def bisect_right(
61-
sorted_collection: list[int], item: int, lo: int = 0, hi: int = -1
62-
) -> int:
63-
"""
64-
Locates the first element in a sorted array that is larger than a given value.
65-
66-
It has the same interface as
67-
https://docs.python.org/3/library/bisect.html#bisect.bisect_right .
68-
69-
:param sorted_collection: some ascending sorted collection with comparable items
70-
:param item: item to bisect
71-
:param lo: lowest index to consider (as in sorted_collection[lo:hi])
72-
:param hi: past the highest index to consider (as in sorted_collection[lo:hi])
73-
:return: index i such that all values in sorted_collection[lo:i] are <= item and
74-
all values in sorted_collection[i:hi] are > item.
75-
76-
Examples:
77-
>>> bisect_right([0, 5, 7, 10, 15], 0)
78-
1
79-
>>> bisect_right([0, 5, 7, 10, 15], 15)
80-
5
81-
>>> bisect_right([0, 5, 7, 10, 15], 6)
82-
2
83-
>>> bisect_right([0, 5, 7, 10, 15], 15, 1, 3)
84-
3
85-
>>> bisect_right([0, 5, 7, 10, 15], 6, 2)
86-
2
87-
"""
88-
if hi < 0:
89-
hi = len(sorted_collection)
90-
91-
while lo < hi:
92-
mid = lo + (hi - lo) // 2
93-
if sorted_collection[mid] <= item:
94-
lo = mid + 1
95-
else:
96-
hi = mid
97-
98-
return lo
99-
100-
101-
def insort_left(
102-
sorted_collection: list[int], item: int, lo: int = 0, hi: int = -1
103-
) -> None:
104-
"""
105-
Inserts a given value into a sorted array before other values with the same value.
106-
107-
It has the same interface as
108-
https://docs.python.org/3/library/bisect.html#bisect.insort_left .
109-
110-
:param sorted_collection: some ascending sorted collection with comparable items
111-
:param item: item to insert
112-
:param lo: lowest index to consider (as in sorted_collection[lo:hi])
113-
:param hi: past the highest index to consider (as in sorted_collection[lo:hi])
114-
115-
Examples:
116-
>>> sorted_collection = [0, 5, 7, 10, 15]
117-
>>> insort_left(sorted_collection, 6)
118-
>>> sorted_collection
119-
[0, 5, 6, 7, 10, 15]
120-
>>> sorted_collection = [(0, 0), (5, 5), (7, 7), (10, 10), (15, 15)]
121-
>>> item = (5, 5)
122-
>>> insort_left(sorted_collection, item)
123-
>>> sorted_collection
124-
[(0, 0), (5, 5), (5, 5), (7, 7), (10, 10), (15, 15)]
125-
>>> item is sorted_collection[1]
126-
True
127-
>>> item is sorted_collection[2]
128-
False
129-
>>> sorted_collection = [0, 5, 7, 10, 15]
130-
>>> insort_left(sorted_collection, 20)
131-
>>> sorted_collection
132-
[0, 5, 7, 10, 15, 20]
133-
>>> sorted_collection = [0, 5, 7, 10, 15]
134-
>>> insort_left(sorted_collection, 15, 1, 3)
135-
>>> sorted_collection
136-
[0, 5, 7, 15, 10, 15]
137-
"""
138-
sorted_collection.insert(bisect_left(sorted_collection, item, lo, hi), item)
139-
140-
141-
def insort_right(
142-
sorted_collection: list[int], item: int, lo: int = 0, hi: int = -1
143-
) -> None:
144-
"""
145-
Inserts a given value into a sorted array after other values with the same value.
146-
147-
It has the same interface as
148-
https://docs.python.org/3/library/bisect.html#bisect.insort_right .
149-
150-
:param sorted_collection: some ascending sorted collection with comparable items
151-
:param item: item to insert
152-
:param lo: lowest index to consider (as in sorted_collection[lo:hi])
153-
:param hi: past the highest index to consider (as in sorted_collection[lo:hi])
154-
155-
Examples:
156-
>>> sorted_collection = [0, 5, 7, 10, 15]
157-
>>> insort_right(sorted_collection, 6)
158-
>>> sorted_collection
159-
[0, 5, 6, 7, 10, 15]
160-
>>> sorted_collection = [(0, 0), (5, 5), (7, 7), (10, 10), (15, 15)]
161-
>>> item = (5, 5)
162-
>>> insort_right(sorted_collection, item)
163-
>>> sorted_collection
164-
[(0, 0), (5, 5), (5, 5), (7, 7), (10, 10), (15, 15)]
165-
>>> item is sorted_collection[1]
166-
False
167-
>>> item is sorted_collection[2]
168-
True
169-
>>> sorted_collection = [0, 5, 7, 10, 15]
170-
>>> insort_right(sorted_collection, 20)
171-
>>> sorted_collection
172-
[0, 5, 7, 10, 15, 20]
173-
>>> sorted_collection = [0, 5, 7, 10, 15]
174-
>>> insort_right(sorted_collection, 15, 1, 3)
175-
>>> sorted_collection
176-
[0, 5, 7, 15, 10, 15]
177-
"""
178-
sorted_collection.insert(bisect_right(sorted_collection, item, lo, hi), item)
179-
180-
1811
def binary_search(sorted_collection: list[int], item: int) -> int:
1822
"""Pure implementation of a binary search algorithm in Python
1833
@@ -197,6 +17,8 @@ def binary_search(sorted_collection: list[int], item: int) -> int:
19717
1
19818
>>> binary_search([0, 5, 7, 10, 15], 6)
19919
-1
20+
>>> binary_search([1, 2, 3, 3, 3, 4], 3) # Updated to find first occurrence
21+
2
20022
"""
20123
if list(sorted_collection) != sorted(sorted_collection):
20224
raise ValueError("sorted_collection must be sorted in ascending order")
@@ -210,42 +32,14 @@ def binary_search(sorted_collection: list[int], item: int) -> int:
21032
if midpoint > 0 and sorted_collection[midpoint - 1] == item:
21133
right = midpoint - 1 # Keep searching left
21234
else:
213-
return midpoint
35+
return midpoint
21436
elif item < current_item:
21537
right = midpoint - 1
21638
else:
21739
left = midpoint + 1
21840
return -1
21941

22042

221-
def binary_search_std_lib(sorted_collection: list[int], item: int) -> int:
222-
"""Pure implementation of a binary search algorithm in Python using stdlib
223-
224-
Be careful collection must be ascending sorted otherwise, the result will be
225-
unpredictable
226-
227-
:param sorted_collection: some ascending sorted collection with comparable items
228-
:param item: item value to search
229-
:return: index of the found item or -1 if the item is not found
230-
231-
Examples:
232-
>>> binary_search_std_lib([0, 5, 7, 10, 15], 0)
233-
0
234-
>>> binary_search_std_lib([0, 5, 7, 10, 15], 15)
235-
4
236-
>>> binary_search_std_lib([0, 5, 7, 10, 15], 5)
237-
1
238-
>>> binary_search_std_lib([0, 5, 7, 10, 15], 6)
239-
-1
240-
"""
241-
if list(sorted_collection) != sorted(sorted_collection):
242-
raise ValueError("sorted_collection must be sorted in ascending order")
243-
index = bisect.bisect_left(sorted_collection, item)
244-
if index != len(sorted_collection) and sorted_collection[index] == item:
245-
return index
246-
return -1
247-
248-
24943
def binary_search_with_duplicates(sorted_collection: list[int], item: int) -> list[int]:
25044
"""Pure implementation of a binary search algorithm in Python that supports
25145
duplicates.
@@ -271,6 +65,8 @@ def binary_search_with_duplicates(sorted_collection: list[int], item: int) -> li
27165
[1, 2, 3]
27266
>>> binary_search_with_duplicates([1, 2, 2, 2, 3], 4)
27367
[]
68+
>>> binary_search_with_duplicates([1, 1, 1, 1], 1) # Example of all same
69+
[0, 1, 2, 3]
27470
"""
27571
if list(sorted_collection) != sorted(sorted_collection):
27672
raise ValueError("sorted_collection must be sorted in ascending order")
@@ -319,120 +115,4 @@ def upper_bound(sorted_collection: list[int], item: int) -> int:
319115
if left == len(sorted_collection) or sorted_collection[left] != item:
320116
return []
321117
return list(range(left, right))
322-
323-
324-
def binary_search_by_recursion(
325-
sorted_collection: list[int], item: int, left: int = 0, right: int = -1
326-
) -> int:
327-
"""Pure implementation of a binary search algorithm in Python by recursion
328-
329-
Be careful collection must be ascending sorted otherwise, the result will be
330-
unpredictable
331-
First recursion should be started with left=0 and right=(len(sorted_collection)-1)
332-
333-
:param sorted_collection: some ascending sorted collection with comparable items
334-
:param item: item value to search
335-
:return: index of the found item or -1 if the item is not found
336-
337-
Examples:
338-
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 0, 0, 4)
339-
0
340-
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 15, 0, 4)
341-
4
342-
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 5, 0, 4)
343-
1
344-
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 6, 0, 4)
345-
-1
346-
"""
347-
if right < 0:
348-
right = len(sorted_collection) - 1
349-
if list(sorted_collection) != sorted(sorted_collection):
350-
raise ValueError("sorted_collection must be sorted in ascending order")
351-
if right < left:
352-
return -1
353-
354-
midpoint = left + (right - left) // 2
355-
356-
if sorted_collection[midpoint] == item:
357-
return midpoint
358-
elif sorted_collection[midpoint] > item:
359-
return binary_search_by_recursion(sorted_collection, item, left, midpoint - 1)
360-
else:
361-
return binary_search_by_recursion(sorted_collection, item, midpoint + 1, right)
362-
363-
364-
def exponential_search(sorted_collection: list[int], item: int) -> int:
365-
"""Pure implementation of an exponential search algorithm in Python
366-
Resources used:
367-
https://en.wikipedia.org/wiki/Exponential_search
368-
369-
Be careful collection must be ascending sorted otherwise, result will be
370-
unpredictable
371-
372-
:param sorted_collection: some ascending sorted collection with comparable items
373-
:param item: item value to search
374-
:return: index of the found item or -1 if the item is not found
375-
376-
the order of this algorithm is O(lg I) where I is index position of item if exist
377-
378-
Examples:
379-
>>> exponential_search([0, 5, 7, 10, 15], 0)
380-
0
381-
>>> exponential_search([0, 5, 7, 10, 15], 15)
382-
4
383-
>>> exponential_search([0, 5, 7, 10, 15], 5)
384-
1
385-
>>> exponential_search([0, 5, 7, 10, 15], 6)
386-
-1
387-
"""
388-
if list(sorted_collection) != sorted(sorted_collection):
389-
raise ValueError("sorted_collection must be sorted in ascending order")
390-
bound = 1
391-
while bound < len(sorted_collection) and sorted_collection[bound] < item:
392-
bound *= 2
393-
left = bound // 2
394-
right = min(bound, len(sorted_collection) - 1)
395-
last_result = binary_search_by_recursion(
396-
sorted_collection=sorted_collection, item=item, left=left, right=right
397-
)
398-
if last_result is None:
399-
return -1
400-
return last_result
401-
402-
403-
searches = ( # Fastest to slowest...
404-
binary_search_std_lib,
405-
binary_search,
406-
exponential_search,
407-
binary_search_by_recursion,
408-
)
409-
410-
411-
if __name__ == "__main__":
412-
import doctest
413-
import timeit
414-
415-
doctest.testmod()
416-
for search in searches:
417-
name = f"{search.__name__:>26}"
418-
print(f"{name}: {search([0, 5, 7, 10, 15], 10) = }") # type: ignore[operator]
419-
420-
print("\nBenchmarks...")
421-
setup = "collection = range(1000)"
422-
for search in searches:
423-
name = search.__name__
424-
print(
425-
f"{name:>26}:",
426-
timeit.timeit(
427-
f"{name}(collection, 500)", setup=setup, number=5_000, globals=globals()
428-
),
429-
)
430-
431-
user_input = input("\nEnter numbers separated by comma: ").strip()
432-
collection = sorted(int(item) for item in user_input.split(","))
433-
target = int(input("Enter a single number to be found in the list: "))
434-
result = binary_search(sorted_collection=collection, item=target)
435-
if result == -1:
436-
print(f"{target} was not found in {collection}.")
437-
else:
438-
print(f"{target} was found at position {result} of {collection}.")
118+

0 commit comments

Comments
 (0)