-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhtmlLinks.py
More file actions
32 lines (28 loc) · 1.06 KB
/
htmlLinks.py
File metadata and controls
32 lines (28 loc) · 1.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# To run this, download the BeautifulSoup zip file
# http://www.py4e.com/code3/bs4.zip
# or pip install beautifulsoup4 to ensure you have the latest version
# and unzip it in the same directory as this file
import urllib.request, urllib.parse, urllib.error
from bs4 import BeautifulSoup
import ssl # defaults to certificate verification and most secure protocol (now TLS)
# Ignore SSL/TLS certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
count = 0
url = input('Enter - ') # http://py4e-data.dr-chuck.net/known_by_Corah.html
anchorTagCount = int(input('Enter count: ')) #7
anchorTagPosition = int(input('Enter position: ')) #18
while count < anchorTagCount:
print('Retrieving URL: ', url)
html = urllib.request.urlopen(url, context=ctx).read()
soup = BeautifulSoup(html, 'html.parser')
tags = soup('a')
for i, tag in enumerate(tags):
if i == anchorTagPosition - 1:
url = tag.get('href', None)
break
else:
continue
count += 1
print('URL: ', url)