-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathruby_scrapy_dribbble.rb
More file actions
65 lines (52 loc) · 1.59 KB
/
ruby_scrapy_dribbble.rb
File metadata and controls
65 lines (52 loc) · 1.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
require "mechanize"
require "Pry"
agent = Mechanize.new
agent.user_agent_alias = 'Mac Safari'
limit = ARGV.pop.to_i
img_count = 0
search_param = ARGV.join("+")
# puts search_param
url = "https://dribbble.com/search?q=#{search_param}"
page = agent.get(url)
# Pry.start(binding)
# 存放缩略图的 link 用于点击打开 modal
links_dribbble = []
page.links.each do |link|
next if links_dribbble.any? {|exist_link| exist_link.href == link.href }
if (link.href =~ /^\/shots\/\d+-.+/) && (link.href.count('/') == 2)
puts link.href + " Look at here!"
links_dribbble.push(link)
end
end
puts links_dribbble.length
links_dribbble.each do |link|
page_modal = link.click
# Pry.start(binding)
author = page_modal.css(".slat-header > a")[0].attributes["title"].value
unless page_modal.css(".single-img > picture").empty?
page_modal.css("picture > source").each do |node|
# puts node.to_html
if node.attributes["srcset"].value =~ /^https:\/\/cdn\.dribbble\.com\/users\/\d+\/screenshots\/\d+\/.+/
pic_src = node.attributes["srcset"].value
# puts author
puts pic_src
agent.get(pic_src).save "images/#{author}/#{File.basename(pic_src)}"
img_count += 1
puts img_count
if img_count > limit
system(exit)
end
end
end
else
pic_src = page_modal.css(".single-img > img")[0].attributes["src"].value
# puts author
puts pic_src
agent.get(pic_src).save "images/#{author}/#{File.basename(pic_src)}"
img_count += 1
puts img_count
if img_count > limit
system(exit)
end
end
end