reimu/Release/registry.lua
2018-07-10 13:54:56 +03:00

163 lines
No EOL
3.4 KiB
Lua

dofile("base.lua")
task.setThreadCount(35)
--task.setDelay(15000)
maxErrors = 30
domain = "http://hentai-chan.me/"
oldDomain = domain
jsEmpty = [[<script type='text/javascript'> var smartphone_true = 0;</script>]]
function index_add(link,tags)
print(link)
local f = io.open("registry.txt","ab")
f:write(("%s = "):format(link))
u8.write(f,tags)
f:write("\n")
f:close()
end
function dumpTags(data)
local rs = tohtml(data)
local tags = {}
for k,v in pairs(rs:toTable()) do
if v:isTag() and v:tagName() == "div"
and v:attribute("class") == "genre" then
local childs = rs:getChildsOf(v)
for i,j in pairs(childs) do
if j:tagName() == "a" then
tags[#tags+1] = rs:contentOf(j)
end
end
end
end
end
function getMangaLink(rs,row) --manga_row1
for k,v in pairs(rs:getChildsOf(row)) do
if v:isTag() and v:tagName() == "h2" then
local tag = rs:getChildsOf(v)[1]
if tag:isTag() and tag:tagName() == "a" then
local hrefs = tag:attribute("href"):split("/")
if hrefs[#hrefs-1] == "manga" then
return tag:attribute("href")
end
end
end
end
return nil
end
function getMangaTags(rs,row) --manga_row3
local childs = rs:getChildsOf(row)
local genre = nil
for k,v in pairs(childs) do
if v:tagName() == "div" and
v:attribute("class") == "row3_left" then
local j = rs:getChildsOf(v)[2]
if j:tagName() == "div" and
j:attribute("class") == "item4" then
genre = rs:getChildsOf(j)[2]
--print(genre:tagName())
end
end
end
if genre == nil then return nil end
local tags = {}
childs = rs:getChildsOf(genre)
for k,v in pairs(childs) do
if v:tagName() == "a" then
local href = v:attribute("href")
if href ~= nil then
tags[#tags+1] = rs:contentOf(v) end
end
end
return tags
end
function dumpRowContent(prs,content)
local rows = prs:getChildsOf(content)
local tags = {}
local row3skip = false
for k,v in pairs(rows) do
if v:tagName() == "div" then
local class = v:attribute("class")
--print(class)
if class == "manga_row1" then
link = getMangaLink(prs,v)
elseif class == "manga_row3" then
if not row3skip then row3skip = true
elseif row3skip == true then
tags = getMangaTags(prs,v)
end
end
end
end
return link,implode(tags,",")
end
function dumpSearch(data)
local prs = tohtml(data)
local rows = nil
local link = nil
local tags = nil
for k,v in pairs(prs:toTable()) do
if v:tagName() == "div" and
v:attribute("class") == "content_row" then
link,tags = dumpRowContent(prs,v)
index_add(link,tags)
end
end
end
function parseMain(page)
local curl = curl_open()
local cur = 0
curl:setOpt(CURLOPT_REFERER,"http://hentai-chan.me/")
curl:setOpt(CURLOPT_AUTOREFERER,1)
repeat
print(("=================\ncur %d\n"):format(cur))
if cur == 0 then
curl:setOpt(CURLOPT_URL,"http://hentai-chan.me/manga/")
else
curl:setOpt(CURLOPT_URL,
("http://hentai-chan.me/manga/new?offset=%d"):format(cur*20))
end
local data = nil
local code = 500
local res = 1
local en = 0
repeat
data,res = curl:perform()
if res ~= 0 then
print(("CURL Error %d"):format(res))
en = en + 1
else
code = curl:getInfo(CURLINFO_HTTP_CODE)
if code ~= 200 then
print(("HTTP Error %d"))
en = en + 1
end
end
until (res == 0 and code == 200) or en == maxErrors
if res ~= 0 or data == nil then
print"parseMain failed"
else dumpSearch(data) end
cur = cur + 1
until cur == page
end
parseMain(653)