Len's Study-Log

集中一点,登峰造极!

0%

Jsoup爬虫随笔

导入依赖:

1
2
3
4
5
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;

/**
* @author len
* @describe 爬虫
* @createTime 2021/5/31
*/
public class PcTest {

public static void main(String[] args) throws IOException {
String url = "http://www.360doc.com/content/20/1206/10/9113704_949755169.shtml";
Document document = Jsoup.parse(new URL(url), 30000);
Element element = document.getElementById("artContent");
System.out.println(element.html());
Elements elements = document.getElementsByTag("p");
for (Element el:elements){
Elements els = el.getElementsByTag("p");
els.stream().forEach(e -> System.out.println(e.text()));
}
}
}