导入依赖:
1 | <dependency> |
代码:1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
/**
* @author len
* @describe 爬虫
* @createTime 2021/5/31
*/
public class PcTest {
public static void main(String[] args) throws IOException {
String url = "http://www.360doc.com/content/20/1206/10/9113704_949755169.shtml";
Document document = Jsoup.parse(new URL(url), 30000);
Element element = document.getElementById("artContent");
System.out.println(element.html());
Elements elements = document.getElementsByTag("p");
for (Element el:elements){
Elements els = el.getElementsByTag("p");
els.stream().forEach(e -> System.out.println(e.text()));
}
}
}