From 996829b8e96ac04dd8f3dee8cc3fe37ba714bc92 Mon Sep 17 00:00:00 2001 From: mykola2312 <49044616+mykola2312@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:28:14 +0300 Subject: [PATCH] now store each crawler wave in crawl table as well as datetime in unix epoch timestamp, force jooq use bigint for datetime types. fetch source list from database instead of json, allowing hot reload --- pom.xml | 11 ++++++ src/main/java/com/mykola2312/mptv/Main.java | 3 +- .../com/mykola2312/mptv/crawler/Crawler.java | 35 ++++++++++++++----- .../com/mykola2312/mptv/db/pojo/Source.java | 23 ++++++++++++ 4 files changed, 63 insertions(+), 9 deletions(-) create mode 100644 src/main/java/com/mykola2312/mptv/db/pojo/Source.java diff --git a/pom.xml b/pom.xml index d3400c8..b56131e 100644 --- a/pom.xml +++ b/pom.xml @@ -144,6 +144,17 @@ org.jooq.meta.sqlite.SQLiteDatabase .* flyway_schema_history + + + + CRAWLED_AT + BIGINT + + + LAST_TIME + BIGINT + + true diff --git a/src/main/java/com/mykola2312/mptv/Main.java b/src/main/java/com/mykola2312/mptv/Main.java index 229b2cd..4c9fd6b 100644 --- a/src/main/java/com/mykola2312/mptv/Main.java +++ b/src/main/java/com/mykola2312/mptv/Main.java @@ -69,7 +69,8 @@ public class Main { flyway.migrate(); // load sources, start crawlers - Crawler crawler = new Crawler(config.sources); + Crawler crawler = new Crawler(); + crawler.updateSources(config.sources); crawler.crawl(); // initialize ui diff --git a/src/main/java/com/mykola2312/mptv/crawler/Crawler.java b/src/main/java/com/mykola2312/mptv/crawler/Crawler.java index fe3a76c..6452b9e 100644 --- a/src/main/java/com/mykola2312/mptv/crawler/Crawler.java +++ b/src/main/java/com/mykola2312/mptv/crawler/Crawler.java @@ -7,6 +7,7 @@ import org.apache.log4j.Logger; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; +import java.time.Instant; import java.util.ArrayList; import java.io.IOException; import java.util.HashMap; @@ -17,22 +18,23 @@ import org.jooq.impl.*; import static com.mykola2312.mptv.tables.Category.*; import static com.mykola2312.mptv.tables.Channel.*; import static com.mykola2312.mptv.tables.Source.*; +import static com.mykola2312.mptv.tables.Crawl.*; import com.mykola2312.mptv.config.SourceItem; import com.mykola2312.mptv.parser.M3U; import com.mykola2312.mptv.parser.M3UException; import com.mykola2312.mptv.parser.M3UParser; import com.mykola2312.mptv.db.DB; +import com.mykola2312.mptv.db.pojo.Source; import com.mykola2312.mptv.tables.records.ChannelRecord; import com.mykola2312.mptv.tables.records.SourceRecord; public class Crawler { private static final Logger logger = Logger.getLogger(Crawler.class); - private final List sources; + private Integer crawlId; - public Crawler(List sources) { - this.sources = sources; + public Crawler() { } public void updateSources(List sourceItems) { @@ -53,6 +55,13 @@ public class Crawler { .execute(); } + private List loadSources() { + return DSL.using(DB.CONFIG) + .select() + .from(SOURCE) + .fetchInto(Source.class); + } + private Integer ensureRootCategory(String rootName) { try { return DSL.using(DB.CONFIG) @@ -112,6 +121,7 @@ public class Crawler { channel.set(CHANNEL.TITLE, item.title); channel.set(CHANNEL.URL, item.url); channel.set(CHANNEL.LOGO, item.tvgLogo); + channel.set(CHANNEL.CRAWL, crawlId); channels.add(channel); } @@ -120,15 +130,24 @@ public class Crawler { .execute(); } + private Integer beginCrawl() { + return DSL.using(DB.CONFIG) + .insertInto(CRAWL, CRAWL.CRAWLED_AT) + .values(Instant.now().toEpochMilli() / 1000L) + .returningResult(CRAWL.ID) + .fetchSingleInto(Integer.class); + } + public void crawl() { - for (SourceItem source : sources) { + crawlId = beginCrawl(); + for (Source source : loadSources()) { switch (source.type) { - case M3U_LOCAL -> { + case "m3u-local" -> { try { if (source.path == null) { logger.error("m3u local has to have \"path\" variable"); continue; - } else if (source.rootCategory == null) { + } else if (source.rootName == null) { logger.error("source has to have \"rootCategory\""); continue; } @@ -136,7 +155,7 @@ public class Crawler { String m3uData = Files.readString(Paths.get(source.path), StandardCharsets.UTF_8); ArrayList m3u = M3UParser.parse(m3uData); - updateAllChannels(m3u, source.rootCategory); + updateAllChannels(m3u, source.rootName); } catch (IOException e) { logger.error(e); logger.error(String.format("failed to read local m3u file: %s", e.getMessage())); @@ -147,7 +166,7 @@ public class Crawler { } default -> { - logger.error(String.format("source type %s is not implemented yet :(", source.type.name())); + logger.error(String.format("source type %s is not implemented yet :(", source.type)); } } } diff --git a/src/main/java/com/mykola2312/mptv/db/pojo/Source.java b/src/main/java/com/mykola2312/mptv/db/pojo/Source.java new file mode 100644 index 0000000..48191e0 --- /dev/null +++ b/src/main/java/com/mykola2312/mptv/db/pojo/Source.java @@ -0,0 +1,23 @@ +package com.mykola2312.mptv.db.pojo; + +import jakarta.persistence.Column; + +public class Source { + @Column(name = "ID") + public int id; + + @Column(name = "TYPE") + public String type; + + @Column(name = "ROOT_NAME") + public String rootName; + + @Column(name = "URL") + public String url; + + @Column(name = "PATH") + public String path; + + @Column(name = "COOKIES") + public String cookies; +}