-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathScrollDownAPI.java
More file actions
executable file
·113 lines (98 loc) · 4.49 KB
/
ScrollDownAPI.java
File metadata and controls
executable file
·113 lines (98 loc) · 4.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
package twitter;
import org.apache.http.HttpEntity;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.URLEncoder;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* Scrapping tweet from non-official API
* Same as clicking on "Showing more" button inside a tweeter account
*
* CONS: limitation to 830-850 tweets, see integration tests
* Some tweets are missed, since 'max_position' is not working correctly
*
* PROS: None
*/
public class ScrollDownAPI {
private static final HttpClient defaultHttpClient = HttpClients.createDefault();
/**
* Example <code>https://syndication.twitter.com/timeline/profile?callback=__twttrf.callback&dnt=false&screen_name=TwitterDev&suppress_response_codes=true&max_position=982346373969330177</code>
* using max_pos to get the latest tweets
*
* @param numberTweets to retreive
* @param userName account name
* @return list of tweets for the account <pre>userName</pre>
* @throws IOException when url malformed
*/
public List<Tweet> searchNumberTweetsByUser(int numberTweets, String userName) throws IOException {
if (numberTweets <= 0) {
return new ArrayList<>();
}
List<Tweet> foundTweets = new ArrayList<>(numberTweets);
String refreshCursor = "";
JSONObject json = new JSONObject(getURLResponse(userName, refreshCursor));
if (json.getJSONObject("headers").getInt("status") == 404) {
throw new IllegalArgumentException(
"Error to twitter: " + json.getJSONObject("headers").getString("message"));
}
while (foundTweets.size() < numberTweets) {
refreshCursor = json.getJSONObject("headers").getString("minPosition");
Document doc = Jsoup.parse((String) json.get("body"));
Elements tweets = doc.select(".timeline-Tweet");
if (tweets.size() == 0) {
return new ArrayList<>(foundTweets);
}
Stream<Tweet> nextTweets = tweets.stream().map(tweet -> {
String txt = tweet.select("p.timeline-Tweet-text").text().replaceAll("[^\\u0000-\\uFFFF]", "");
String dateString = tweet.select("time").attr("datetime");
String pattern = "yyyy-MM-dd'T'HH:mm:ssZ";
DateTimeFormatter dtf = DateTimeFormatter.ofPattern(pattern);
LocalDateTime dateTime = LocalDateTime.parse(dateString, dtf);
long id = Long.valueOf(tweet.attr("data-tweet-id"));
return new Tweet(id, "https://twitter.com/" + userName + "/status/" + id, userName, txt, dateTime);
});
foundTweets.addAll(nextTweets.collect(Collectors.toList()));
json = new JSONObject(getURLResponse(userName, refreshCursor));
}
return foundTweets.subList(0, numberTweets);
}
private static String getURLResponse(String username, String scrollCursor) throws IOException {
String max_position = scrollCursor != null ? "&max_position=" + scrollCursor : "";
String url = String.format("https://syndication.twitter.com/timeline/profile?callback=__twttrf.callback&"
+ "dnt=false&screen_name=%s&suppress_response_codes=true%s", URLEncoder.encode(username, "UTF-8"),
max_position);
HttpGet httpGet = new HttpGet(url);
HttpEntity resp = defaultHttpClient.execute(httpGet).getEntity();
String result = EntityUtils.toString(resp);
return result.substring("/**/__twttrf.callback(".length(), result.length() - 2);
}
public class Tweet {
private final long id;
private final String permalink;
private final String username;
private final String text;
private final LocalDateTime date;
public Tweet(long id, String permalink, String username, String text, LocalDateTime date) {
this.id = id;
this.permalink = permalink;
this.username = username;
this.text = text;
this.date = date;
}
public long getId() {
return this.id;
}
}
}