lingyin-api/src/main/java/com/dengqn/app/lingyinapi/html/HtmlTool.java

83 lines
2.9 KiB
Java
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package com.dengqn.app.lingyinapi.html;
import com.dengqn.app.lingyinapi.beans.SeedListItem;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
/**
*
* @author dengqn
* @since 2025/9/25 18:28
*/
@Slf4j
public class HtmlTool {
public static List<SeedListItem> getSeedListItem(Document doc) {
Element torrents;
try {
torrents = doc.getElementsByClass("torrents").getFirst();
} catch (Exception e) {
log.info("搜不到东西:{}", e.getMessage());
return new ArrayList<>();
}
return torrents.select("tr").parallelStream().skip(1)
.map(tr -> {
try {
/**
* 标题部分
*/
Element torrentNames = tr.getElementsByTag("table").first();
if (torrentNames == null) {
return null;
}
Elements seedNames = torrentNames.getElementsByTag("td");
String seedCover = seedNames.get(0).getElementsByTag("img").first().attr("data-src");
String seedName = seedNames.get(1).getElementsByTag("a").first().attr("title");
String seedUrl = seedNames.get(1).getElementsByTag("a").first().attr("href");
String seedDesc = seedNames.text().replaceFirst(seedName, "");
boolean isFree = seedNames.get(1).getElementsByClass("pro_free").size() > 0;
boolean isHitAndRun = seedNames.get(1).getElementsByClass("hitandrun").size() > 0;
String downloadUrl = seedNames.get(3).getElementsByTag("a").first().attr("href");
/**
* 数据
*/
Elements tds = tr.selectXpath("td");
// skip 3
// 时间title属性
String time = tds.get(3).getElementsByTag("span").first().attr("title");
String size = tds.get(4).text().trim();
String seeders = tds.get(5).getElementsByTag("b").first().text().trim();
String views = tds.get(7).getElementsByTag("b") == null || tds.get(7).getElementsByTag("b").size() == 0 ? "0" : tds.get(7).getElementsByTag("b").first().text().trim();
boolean isAnonymous = tds.get(8).text().trim().contains("匿名");
return new SeedListItem(seedCover, seedName, seedUrl, seedDesc, isFree, isHitAndRun, downloadUrl, time, size, seeders, views, isAnonymous);
} catch (Exception e) {
log.error("解析报错跳过:" + e.getMessage());
return null;
}
}).filter(Objects::nonNull).toList();
}
public static Integer getPageTotal(Document document) {
Elements paginationEl = document.getElementsByClass("nexus-pagination");
if (paginationEl == null) return 0;
Elements pages = paginationEl.first().getElementsByTag("a");
if (pages == null || pages.size() == 0) return 0;
Element lastA = pages.last();
String text = lastA.text();
if ("".equalsIgnoreCase(text.trim())) return 0;
return Integer.valueOf(text.trim()
.replaceAll("\"", "")
.replaceAll(" ", "")
.split("-")[1]);
}
}