本文将展示如何优化航班数据爬取代码,提升代码结构和可维护性。
首先定义航班数据实体类:
import org.apache.commons.lang.StringUtils;
import net.minidev.json.JSONObject;
public class FlightData {
private String airlineName;
private String flightNumber;
private String scheduledDeparture;
private String actualDeparture;
private String departureLocation;
private String flightStatus;
private String scheduledArrival;
private String actualArrival;
private String destination;
private String punctualityRate;
private String flightInfo;
private String additionalInfo;
public JSONObject getDataJson() {
JSONObject data = new JSONObject();
data.put("起飞时间", actualDeparture);
data.put("计划起飞", StringUtils.isEmpty(scheduledDeparture) ? "" : scheduledDeparture.replace("计划起飞", ""));
data.put("当前状态", flightStatus);
data.put("到达时间", actualArrival);
data.put("计划到达", StringUtils.isEmpty(scheduledArrival) ? "" : scheduledArrival.replace("计划到达", ""));
data.put("出发地", departureLocation);
data.put("目的地", destination);
data.put("航空公司", airlineName);
data.put("航班号", flightNumber);
data.put("准点率", punctualityRate);
return data;
}
// 省略getter和setter方法
}
然后实现航班数据查询工具类:
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Timer;
import java.util.TimerTask;
import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
public class FlightDataProvider {
public static Map cacheMap = new HashMap<>();
public static Timer cacheTimer;
static {
startCacheCleanup();
}
private static synchronized Timer getTimer() {
if (cacheTimer == null) {
return cacheTimer = new Timer();
}
return cacheTimer;
}
private static void startCacheCleanup() {
TimerTask task = new TimerTask() {
@Override
public void run() {
cacheMap.clear();
}
};
Timer timer = getTimer();
timer.schedule(task, 0, 600000); // 每10分钟执行一次
}
public static Document fetchDocument(String url) throws Exception {
try {
return Jsoup.connect(url).get();
} catch (IOException e) {
throw new RuntimeException("无法获取网页内容");
}
}
public static JSONObject getFlightData(String flightNumber) throws Exception {
if (StringUtils.isEmpty(flightNumber)) {
throw new RuntimeException("请输入有效的航班号");
}
if (cacheMap.containsKey(flightNumber)) {
return cacheMap.get(flightNumber).getDataJson();
}
String currentDate = new SimpleDateFormat("yyyyMMdd").format(new Date());
Document doc = fetchDocument("http://flights.ctrip.com/actualtime/fno--" + flightNumber.trim() + "-" + currentDate + ".html");
// 解析航班信息
FlightData data = new FlightData();
parseFlightInfo(doc, data);
cacheMap.put(flightNumber, data);
return data.getDataJson();
}
private static void parseFlightInfo(Document doc, FlightData data) {
// 解析起飞时间
Elements departureElements = doc.select("[class=time]");
data.setActualDeparture(getTextOrNull(departureElements));
// 解析计划起飞时间
Elements scheduleDeparture = doc.select("[class=gray]");
data.setScheduledDeparture(getTextOrNull(scheduleDeparture));
// 解析航班状态
Elements status = doc.select("[class=gray]");
data.setFlightStatus(getTextOrNull(status));
// 解析到达时间
Elements arrivalTime = doc.select("[class=time]");
data.setActualArrival(getTextOrNull(arrivalTime));
// 解析计划到达时间
Elements scheduleArrival = doc.select("[class=gray]");
data.setScheduledArrival(getTextOrNull(scheduleArrival));
// 解析其他信息
Elements routeInfo = doc.select("p");
data.setDepartureLocation(getTextOrNull(routeInfo));
data.setDestination(getTextOrNull(routeInfo));
data.setFlightInfo(getTextOrNull(routeInfo));
data.setAdditionalInfo(getTextOrNull(routeInfo));
data.setAirlineName(getTextOrNull(doc.select("[class=ml5]")));
data.setFlightNumber(getTextOrNull(doc.select("strong")));
data.setPunctualityRate(getTextOrNull(doc.select("[class=f14 gray ml10]")));
}
private static String getTextOrNull(Elements elements) {
if (elements != null && elements.size() > 0) {
return elements.get(0).text();
}
return "";
}
}