-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathplaystore.py
More file actions
185 lines (169 loc) · 6.16 KB
/
playstore.py
File metadata and controls
185 lines (169 loc) · 6.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
from selenium import webdriver
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.common.keys import Keys
import time
import os
###########################
installThreshold = 500000
emailsNeeded = 200
scrollTimeout = 3
openBrowser = True
###########################
visited = {}
visitedEmails = {}
emailList = []
playstoreUrl = 'https://play.google.com'
def scroll(driver, timeout):
lastHeight = driver.execute_script("return document.body.scrollHeight")
while True:
driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")
time.sleep(timeout)
newHeight = driver.execute_script("return document.body.scrollHeight")
if newHeight == lastHeight:
break
lastHeight = newHeight
def initialize():
global urlsList
global collected
collected = 0
# Check for playstore.txt
if not os.path.exists("playstore.txt") or os.path.getsize("playstore.txt") == 0:
print("Starting browser in background..")
print("Browser started!")
driver.get(playstoreUrl + '/store/apps/new?hl=en')
print("Scrolling..")
scroll(driver,scrollTimeout)
print("Scrolling completed!")
html = driver.page_source
print("Initializing with links..")
try:
f = open("playstore.txt",'a')
for item in driver.find_elements_by_xpath("//a[@class='poRVub']"):
f.write(item.get_attribute('href') + '\n')
f.flush()
print("Initial links obtained!")
finally:
f.close()
print("Written to playstore.txt!")
else:
print("Already obtained initial list!")
try:
f = open("playstore.txt",'r')
urlsList = [line.strip() for line in f]
finally:
f.close()
#Check for visited.txt
if os.path.exists("visited.txt") and os.path.getsize("visited.txt") > 0:
try:
f = open("visited.txt")
for line in f:
visited[line.strip()] = True
finally:
f.close()
#Check for emails.txt
if os.path.exists("emails.txt") and os.path.getsize("emails.txt") > 0:
for line in open("emails.txt"):
collected += 1
idx = line.find('-')
email = line[idx+2:].strip()
visitedEmails[email] = True
emailList.append(line.strip())
#Check for temp.txt
if os.path.exists("temp.txt") and os.path.getsize("temp.txt") > 0:
print("Previously collected temp file found!")
for line in open("temp.txt"):
if not line.strip() in visited:
urlsList.append(line.strip())
def getSimilar(driver):
tempList = []
try:
f = open("temp.txt",'a')
seeMore = driver.find_element_by_xpath("//a[@class='LkLjZd ScJHi U8Ww7d xjAeve nMZKrb id-track-click ']")
url = seeMore.get_attribute('href')
driver.get(url)
scroll(driver,scrollTimeout)
for item in driver.find_elements_by_xpath("//a[@class='poRVub']"):
url = item.get_attribute('href')
id = url.strip()
id = id[id.find("=")+1:]
if not id in visited:
f.write(url + '\n')
f.flush()
tempList.append(url)
except:
similar = driver.find_elements_by_xpath("//div[@class='WHE7ib mpg5gc']")
for elements in similar:
url = elements.find_element_by_xpath(".//c-wiz/div/div/div/div/div/a").get_attribute('href')
id = url.strip()
id = id[id.find("=")+1:]
if not id in visited:
f.write(url + '\n')
f.flush()
tempList.append(url)
finally:
f.close()
return tempList
def process():
global collected
print("Total initial links -> " + str(len(urlsList)))
global emailList
try:
f = open('emails.txt','a',encoding="utf-8")
f2 = open('visited.txt','a')
for url in urlsList:
#print("Collected = " + str(collected))
if collected >= emailsNeeded:
print("Target reached!")
break
id = url.strip()
id = id[id.find("=")+1:]
if not id in visited:
visited[id] = True
f2.write(id+"\n")
f2.flush()
print("Processing " + id,end="")
driver.get(url)
try:
installsString = driver.find_element_by_xpath("//*[text()='Installs']/following-sibling::span").text
installs = 0
for d in installsString:
if d.isdigit():
installs = installs * 10 + int(d)
email = driver.find_element_by_xpath("//a[@class='hrTbp euBY6b']").text
developer = driver.find_element_by_xpath("//*[text()='Offered By']/following-sibling::span").text
print("..." + installsString)
if email in visitedEmails:
print("[-] Already encountered same developer")
elif installs <= installThreshold:
visitedEmails[email] = True
collected += 1
emailList.append(email)
devMail = developer + " - " + email
print("[+] " + devMail)
f.write(devMail + "\n")
f.flush()
urlsList.extend(getSimilar(driver))
except:
print("... skipped")
finally:
f2.close()
f.close()
print('Iterating list')
for email in emailList:
print(email)
def main():
global driver
options = FirefoxOptions()
if not openBrowser:
options.add_argument("--headless")
try:
driver = webdriver.Firefox(options=options)
initialize()
process()
except KeyboardInterrupt:
print("Program execution terminated abnormally with Ctrl+C!")
finally:
driver.quit()
print("Done!")
if __name__ == "__main__":
main()