Skip to content

Commit 87d7036

Browse files
committed
Fix: Binary search first occurrence and add property-based tests
1 parent 678dedb commit 87d7036

File tree

3 files changed

+40
-326
lines changed

3 files changed

+40
-326
lines changed

searches/binary_search.py

Lines changed: 8 additions & 326 deletions
Original file line numberDiff line numberDiff line change
@@ -1,183 +1,3 @@
1-
#!/usr/bin/env python3
2-
3-
"""
4-
Pure Python implementations of binary search algorithms
5-
6-
For doctests run the following command:
7-
python3 -m doctest -v binary_search.py
8-
9-
For manual testing run:
10-
python3 binary_search.py
11-
"""
12-
13-
from __future__ import annotations
14-
15-
import bisect
16-
17-
18-
def bisect_left(
19-
sorted_collection: list[int], item: int, lo: int = 0, hi: int = -1
20-
) -> int:
21-
"""
22-
Locates the first element in a sorted array that is larger or equal to a given
23-
value.
24-
25-
It has the same interface as
26-
https://docs.python.org/3/library/bisect.html#bisect.bisect_left .
27-
28-
:param sorted_collection: some ascending sorted collection with comparable items
29-
:param item: item to bisect
30-
:param lo: lowest index to consider (as in sorted_collection[lo:hi])
31-
:param hi: past the highest index to consider (as in sorted_collection[lo:hi])
32-
:return: index i such that all values in sorted_collection[lo:i] are < item and all
33-
values in sorted_collection[i:hi] are >= item.
34-
35-
Examples:
36-
>>> bisect_left([0, 5, 7, 10, 15], 0)
37-
0
38-
>>> bisect_left([0, 5, 7, 10, 15], 6)
39-
2
40-
>>> bisect_left([0, 5, 7, 10, 15], 20)
41-
5
42-
>>> bisect_left([0, 5, 7, 10, 15], 15, 1, 3)
43-
3
44-
>>> bisect_left([0, 5, 7, 10, 15], 6, 2)
45-
2
46-
"""
47-
if hi < 0:
48-
hi = len(sorted_collection)
49-
50-
while lo < hi:
51-
mid = lo + (hi - lo) // 2
52-
if sorted_collection[mid] < item:
53-
lo = mid + 1
54-
else:
55-
hi = mid
56-
57-
return lo
58-
59-
60-
def bisect_right(
61-
sorted_collection: list[int], item: int, lo: int = 0, hi: int = -1
62-
) -> int:
63-
"""
64-
Locates the first element in a sorted array that is larger than a given value.
65-
66-
It has the same interface as
67-
https://docs.python.org/3/library/bisect.html#bisect.bisect_right .
68-
69-
:param sorted_collection: some ascending sorted collection with comparable items
70-
:param item: item to bisect
71-
:param lo: lowest index to consider (as in sorted_collection[lo:hi])
72-
:param hi: past the highest index to consider (as in sorted_collection[lo:hi])
73-
:return: index i such that all values in sorted_collection[lo:i] are <= item and
74-
all values in sorted_collection[i:hi] are > item.
75-
76-
Examples:
77-
>>> bisect_right([0, 5, 7, 10, 15], 0)
78-
1
79-
>>> bisect_right([0, 5, 7, 10, 15], 15)
80-
5
81-
>>> bisect_right([0, 5, 7, 10, 15], 6)
82-
2
83-
>>> bisect_right([0, 5, 7, 10, 15], 15, 1, 3)
84-
3
85-
>>> bisect_right([0, 5, 7, 10, 15], 6, 2)
86-
2
87-
"""
88-
if hi < 0:
89-
hi = len(sorted_collection)
90-
91-
while lo < hi:
92-
mid = lo + (hi - lo) // 2
93-
if sorted_collection[mid] <= item:
94-
lo = mid + 1
95-
else:
96-
hi = mid
97-
98-
return lo
99-
100-
101-
def insort_left(
102-
sorted_collection: list[int], item: int, lo: int = 0, hi: int = -1
103-
) -> None:
104-
"""
105-
Inserts a given value into a sorted array before other values with the same value.
106-
107-
It has the same interface as
108-
https://docs.python.org/3/library/bisect.html#bisect.insort_left .
109-
110-
:param sorted_collection: some ascending sorted collection with comparable items
111-
:param item: item to insert
112-
:param lo: lowest index to consider (as in sorted_collection[lo:hi])
113-
:param hi: past the highest index to consider (as in sorted_collection[lo:hi])
114-
115-
Examples:
116-
>>> sorted_collection = [0, 5, 7, 10, 15]
117-
>>> insort_left(sorted_collection, 6)
118-
>>> sorted_collection
119-
[0, 5, 6, 7, 10, 15]
120-
>>> sorted_collection = [(0, 0), (5, 5), (7, 7), (10, 10), (15, 15)]
121-
>>> item = (5, 5)
122-
>>> insort_left(sorted_collection, item)
123-
>>> sorted_collection
124-
[(0, 0), (5, 5), (5, 5), (7, 7), (10, 10), (15, 15)]
125-
>>> item is sorted_collection[1]
126-
True
127-
>>> item is sorted_collection[2]
128-
False
129-
>>> sorted_collection = [0, 5, 7, 10, 15]
130-
>>> insort_left(sorted_collection, 20)
131-
>>> sorted_collection
132-
[0, 5, 7, 10, 15, 20]
133-
>>> sorted_collection = [0, 5, 7, 10, 15]
134-
>>> insort_left(sorted_collection, 15, 1, 3)
135-
>>> sorted_collection
136-
[0, 5, 7, 15, 10, 15]
137-
"""
138-
sorted_collection.insert(bisect_left(sorted_collection, item, lo, hi), item)
139-
140-
141-
def insort_right(
142-
sorted_collection: list[int], item: int, lo: int = 0, hi: int = -1
143-
) -> None:
144-
"""
145-
Inserts a given value into a sorted array after other values with the same value.
146-
147-
It has the same interface as
148-
https://docs.python.org/3/library/bisect.html#bisect.insort_right .
149-
150-
:param sorted_collection: some ascending sorted collection with comparable items
151-
:param item: item to insert
152-
:param lo: lowest index to consider (as in sorted_collection[lo:hi])
153-
:param hi: past the highest index to consider (as in sorted_collection[lo:hi])
154-
155-
Examples:
156-
>>> sorted_collection = [0, 5, 7, 10, 15]
157-
>>> insort_right(sorted_collection, 6)
158-
>>> sorted_collection
159-
[0, 5, 6, 7, 10, 15]
160-
>>> sorted_collection = [(0, 0), (5, 5), (7, 7), (10, 10), (15, 15)]
161-
>>> item = (5, 5)
162-
>>> insort_right(sorted_collection, item)
163-
>>> sorted_collection
164-
[(0, 0), (5, 5), (5, 5), (7, 7), (10, 10), (15, 15)]
165-
>>> item is sorted_collection[1]
166-
False
167-
>>> item is sorted_collection[2]
168-
True
169-
>>> sorted_collection = [0, 5, 7, 10, 15]
170-
>>> insort_right(sorted_collection, 20)
171-
>>> sorted_collection
172-
[0, 5, 7, 10, 15, 20]
173-
>>> sorted_collection = [0, 5, 7, 10, 15]
174-
>>> insort_right(sorted_collection, 15, 1, 3)
175-
>>> sorted_collection
176-
[0, 5, 7, 15, 10, 15]
177-
"""
178-
sorted_collection.insert(bisect_right(sorted_collection, item, lo, hi), item)
179-
180-
1811
def binary_search(sorted_collection: list[int], item: int) -> int:
1822
"""Pure implementation of a binary search algorithm in Python
1833
@@ -197,6 +17,8 @@ def binary_search(sorted_collection: list[int], item: int) -> int:
19717
1
19818
>>> binary_search([0, 5, 7, 10, 15], 6)
19919
-1
20+
>>> binary_search([1, 2, 3, 3, 3, 4], 3) # Updated to find first occurrence
21+
2
20022
"""
20123
if list(sorted_collection) != sorted(sorted_collection):
20224
raise ValueError("sorted_collection must be sorted in ascending order")
@@ -207,42 +29,17 @@ def binary_search(sorted_collection: list[int], item: int) -> int:
20729
midpoint = left + (right - left) // 2
20830
current_item = sorted_collection[midpoint]
20931
if current_item == item:
210-
return midpoint
32+
if midpoint > 0 and sorted_collection[midpoint - 1] == item:
33+
right = midpoint - 1 # Keep searching left
34+
else:
35+
return midpoint
21136
elif item < current_item:
21237
right = midpoint - 1
21338
else:
21439
left = midpoint + 1
21540
return -1
21641

21742

218-
def binary_search_std_lib(sorted_collection: list[int], item: int) -> int:
219-
"""Pure implementation of a binary search algorithm in Python using stdlib
220-
221-
Be careful collection must be ascending sorted otherwise, the result will be
222-
unpredictable
223-
224-
:param sorted_collection: some ascending sorted collection with comparable items
225-
:param item: item value to search
226-
:return: index of the found item or -1 if the item is not found
227-
228-
Examples:
229-
>>> binary_search_std_lib([0, 5, 7, 10, 15], 0)
230-
0
231-
>>> binary_search_std_lib([0, 5, 7, 10, 15], 15)
232-
4
233-
>>> binary_search_std_lib([0, 5, 7, 10, 15], 5)
234-
1
235-
>>> binary_search_std_lib([0, 5, 7, 10, 15], 6)
236-
-1
237-
"""
238-
if list(sorted_collection) != sorted(sorted_collection):
239-
raise ValueError("sorted_collection must be sorted in ascending order")
240-
index = bisect.bisect_left(sorted_collection, item)
241-
if index != len(sorted_collection) and sorted_collection[index] == item:
242-
return index
243-
return -1
244-
245-
24643
def binary_search_with_duplicates(sorted_collection: list[int], item: int) -> list[int]:
24744
"""Pure implementation of a binary search algorithm in Python that supports
24845
duplicates.
@@ -268,6 +65,8 @@ def binary_search_with_duplicates(sorted_collection: list[int], item: int) -> li
26865
[1, 2, 3]
26966
>>> binary_search_with_duplicates([1, 2, 2, 2, 3], 4)
27067
[]
68+
>>> binary_search_with_duplicates([1, 1, 1, 1], 1) # Example of all same
69+
[0, 1, 2, 3]
27170
"""
27271
if list(sorted_collection) != sorted(sorted_collection):
27372
raise ValueError("sorted_collection must be sorted in ascending order")
@@ -316,120 +115,3 @@ def upper_bound(sorted_collection: list[int], item: int) -> int:
316115
if left == len(sorted_collection) or sorted_collection[left] != item:
317116
return []
318117
return list(range(left, right))
319-
320-
321-
def binary_search_by_recursion(
322-
sorted_collection: list[int], item: int, left: int = 0, right: int = -1
323-
) -> int:
324-
"""Pure implementation of a binary search algorithm in Python by recursion
325-
326-
Be careful collection must be ascending sorted otherwise, the result will be
327-
unpredictable
328-
First recursion should be started with left=0 and right=(len(sorted_collection)-1)
329-
330-
:param sorted_collection: some ascending sorted collection with comparable items
331-
:param item: item value to search
332-
:return: index of the found item or -1 if the item is not found
333-
334-
Examples:
335-
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 0, 0, 4)
336-
0
337-
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 15, 0, 4)
338-
4
339-
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 5, 0, 4)
340-
1
341-
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 6, 0, 4)
342-
-1
343-
"""
344-
if right < 0:
345-
right = len(sorted_collection) - 1
346-
if list(sorted_collection) != sorted(sorted_collection):
347-
raise ValueError("sorted_collection must be sorted in ascending order")
348-
if right < left:
349-
return -1
350-
351-
midpoint = left + (right - left) // 2
352-
353-
if sorted_collection[midpoint] == item:
354-
return midpoint
355-
elif sorted_collection[midpoint] > item:
356-
return binary_search_by_recursion(sorted_collection, item, left, midpoint - 1)
357-
else:
358-
return binary_search_by_recursion(sorted_collection, item, midpoint + 1, right)
359-
360-
361-
def exponential_search(sorted_collection: list[int], item: int) -> int:
362-
"""Pure implementation of an exponential search algorithm in Python
363-
Resources used:
364-
https://en.wikipedia.org/wiki/Exponential_search
365-
366-
Be careful collection must be ascending sorted otherwise, result will be
367-
unpredictable
368-
369-
:param sorted_collection: some ascending sorted collection with comparable items
370-
:param item: item value to search
371-
:return: index of the found item or -1 if the item is not found
372-
373-
the order of this algorithm is O(lg I) where I is index position of item if exist
374-
375-
Examples:
376-
>>> exponential_search([0, 5, 7, 10, 15], 0)
377-
0
378-
>>> exponential_search([0, 5, 7, 10, 15], 15)
379-
4
380-
>>> exponential_search([0, 5, 7, 10, 15], 5)
381-
1
382-
>>> exponential_search([0, 5, 7, 10, 15], 6)
383-
-1
384-
"""
385-
if list(sorted_collection) != sorted(sorted_collection):
386-
raise ValueError("sorted_collection must be sorted in ascending order")
387-
bound = 1
388-
while bound < len(sorted_collection) and sorted_collection[bound] < item:
389-
bound *= 2
390-
left = bound // 2
391-
right = min(bound, len(sorted_collection) - 1)
392-
last_result = binary_search_by_recursion(
393-
sorted_collection=sorted_collection, item=item, left=left, right=right
394-
)
395-
if last_result is None:
396-
return -1
397-
return last_result
398-
399-
400-
searches = ( # Fastest to slowest...
401-
binary_search_std_lib,
402-
binary_search,
403-
exponential_search,
404-
binary_search_by_recursion,
405-
)
406-
407-
408-
if __name__ == "__main__":
409-
import doctest
410-
import timeit
411-
412-
doctest.testmod()
413-
for search in searches:
414-
name = f"{search.__name__:>26}"
415-
print(f"{name}: {search([0, 5, 7, 10, 15], 10) = }") # type: ignore[operator]
416-
417-
print("\nBenchmarks...")
418-
setup = "collection = range(1000)"
419-
for search in searches:
420-
name = search.__name__
421-
print(
422-
f"{name:>26}:",
423-
timeit.timeit(
424-
f"{name}(collection, 500)", setup=setup, number=5_000, globals=globals()
425-
),
426-
)
427-
428-
user_input = input("\nEnter numbers separated by comma: ").strip()
429-
collection = sorted(int(item) for item in user_input.split(","))
430-
target = int(input("Enter a single number to be found in the list: "))
431-
result = binary_search(sorted_collection=collection, item=target)
432-
if result == -1:
433-
print(f"{target} was not found in {collection}.")
434-
else:
435-
print(f"{target} was found at position {result} of {collection}.")

tests/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)