now store each crawler wave in crawl table as well as datetime in unix epoch timestamp, force jooq use bigint for datetime types. fetch source list from database instead of json, allowing hot reload

This commit is contained in:
mykola2312 2024-04-23 15:28:14 +03:00
parent 7b96dd5cf9
commit 996829b8e9
4 changed files with 63 additions and 9 deletions

11
pom.xml
View file

@ -144,6 +144,17 @@
<name>org.jooq.meta.sqlite.SQLiteDatabase</name>
<includes>.*</includes>
<excludes>flyway_schema_history</excludes>
<!-- force jooq use long type for sqlite's integer timestamps -->
<forcedTypes>
<forcedType>
<includeExpression>CRAWLED_AT</includeExpression>
<name>BIGINT</name>
</forcedType>
<forcedType>
<includeExpression>LAST_TIME</includeExpression>
<name>BIGINT</name>
</forcedType>
</forcedTypes>
</database>
<generate>
<records>true</records>

View file

@ -69,7 +69,8 @@ public class Main {
flyway.migrate();
// load sources, start crawlers
Crawler crawler = new Crawler(config.sources);
Crawler crawler = new Crawler();
crawler.updateSources(config.sources);
crawler.crawl();
// initialize ui

View file

@ -7,6 +7,7 @@ import org.apache.log4j.Logger;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.time.Instant;
import java.util.ArrayList;
import java.io.IOException;
import java.util.HashMap;
@ -17,22 +18,23 @@ import org.jooq.impl.*;
import static com.mykola2312.mptv.tables.Category.*;
import static com.mykola2312.mptv.tables.Channel.*;
import static com.mykola2312.mptv.tables.Source.*;
import static com.mykola2312.mptv.tables.Crawl.*;
import com.mykola2312.mptv.config.SourceItem;
import com.mykola2312.mptv.parser.M3U;
import com.mykola2312.mptv.parser.M3UException;
import com.mykola2312.mptv.parser.M3UParser;
import com.mykola2312.mptv.db.DB;
import com.mykola2312.mptv.db.pojo.Source;
import com.mykola2312.mptv.tables.records.ChannelRecord;
import com.mykola2312.mptv.tables.records.SourceRecord;
public class Crawler {
private static final Logger logger = Logger.getLogger(Crawler.class);
private final List<SourceItem> sources;
private Integer crawlId;
public Crawler(List<SourceItem> sources) {
this.sources = sources;
public Crawler() {
}
public void updateSources(List<SourceItem> sourceItems) {
@ -53,6 +55,13 @@ public class Crawler {
.execute();
}
private List<Source> loadSources() {
return DSL.using(DB.CONFIG)
.select()
.from(SOURCE)
.fetchInto(Source.class);
}
private Integer ensureRootCategory(String rootName) {
try {
return DSL.using(DB.CONFIG)
@ -112,6 +121,7 @@ public class Crawler {
channel.set(CHANNEL.TITLE, item.title);
channel.set(CHANNEL.URL, item.url);
channel.set(CHANNEL.LOGO, item.tvgLogo);
channel.set(CHANNEL.CRAWL, crawlId);
channels.add(channel);
}
@ -120,15 +130,24 @@ public class Crawler {
.execute();
}
private Integer beginCrawl() {
return DSL.using(DB.CONFIG)
.insertInto(CRAWL, CRAWL.CRAWLED_AT)
.values(Instant.now().toEpochMilli() / 1000L)
.returningResult(CRAWL.ID)
.fetchSingleInto(Integer.class);
}
public void crawl() {
for (SourceItem source : sources) {
crawlId = beginCrawl();
for (Source source : loadSources()) {
switch (source.type) {
case M3U_LOCAL -> {
case "m3u-local" -> {
try {
if (source.path == null) {
logger.error("m3u local has to have \"path\" variable");
continue;
} else if (source.rootCategory == null) {
} else if (source.rootName == null) {
logger.error("source has to have \"rootCategory\"");
continue;
}
@ -136,7 +155,7 @@ public class Crawler {
String m3uData = Files.readString(Paths.get(source.path), StandardCharsets.UTF_8);
ArrayList<M3U> m3u = M3UParser.parse(m3uData);
updateAllChannels(m3u, source.rootCategory);
updateAllChannels(m3u, source.rootName);
} catch (IOException e) {
logger.error(e);
logger.error(String.format("failed to read local m3u file: %s", e.getMessage()));
@ -147,7 +166,7 @@ public class Crawler {
}
default -> {
logger.error(String.format("source type %s is not implemented yet :(", source.type.name()));
logger.error(String.format("source type %s is not implemented yet :(", source.type));
}
}
}

View file

@ -0,0 +1,23 @@
package com.mykola2312.mptv.db.pojo;
import jakarta.persistence.Column;
public class Source {
@Column(name = "ID")
public int id;
@Column(name = "TYPE")
public String type;
@Column(name = "ROOT_NAME")
public String rootName;
@Column(name = "URL")
public String url;
@Column(name = "PATH")
public String path;
@Column(name = "COOKIES")
public String cookies;
}