reimu/htmlcxx/htmlcxx.cc
2018-07-10 13:54:56 +03:00

206 lines
4.1 KiB
C++

#include "html/ParserDom.h"
#include "html/utils.h"
#include "html/wincstring.h"
#include "css/parser_pp.h"
#ifndef WIN32
#include "config.h"
#else
#define VERSION "0.6"
#endif
#include <cstdlib>
#include <fstream>
#include <stdexcept>
#include <iostream>
#include <cstdio>
#include "wingetopt.h"
using namespace std;
using namespace htmlcxx;
void usage(string prg) {
cerr << "usage:\t" << prg << " [-h] [-V] file.html [file.css]" << endl;
return;
}
void usage_long(string prg) {
usage(prg);
cerr << "Html and css parser" << endl << endl;
cerr << " -V\t print version number and exit" << endl;
cerr << " -h\t print this help text" << endl;
cerr << " -C\t disable css parsing" << endl;
return;
}
int main(int argc, char **argv)
{
tree<HTML::Node> tr;
bool parse_css = true;
string css_code;
try
{
while (1)
{
signed char c = getopt(argc, argv, "hVC");
if(c == -1) break;
switch(c) {
case 'h':
usage_long(argv[0]);
exit(0);
break;
case 'V':
cerr << VERSION << endl;
exit(0);
case 'C':
parse_css = false;
break;
default:
usage(argv[0]);
exit(1);
break;
}
}
if (argc != optind + 1 && argc != optind + 2)
{
usage(argv[0]);
exit(1);
}
ifstream file(argv[optind]);
if (!file.is_open())
{
cerr << "Unable to open file " << argv[optind] << endl;
exit(1);
}
string html;
while (1)
{
char buf[BUFSIZ];
file.read(buf, BUFSIZ);
if(file.gcount() == 0) {
break;
}
html.append(buf, file.gcount());
}
file.close();
if(argc == optind + 2) //we have a separate css file
{
ifstream fcss(argv[optind + 1]);
if(!fcss.is_open())
{
cerr << "Unable to open file " << argv[optind] << endl;
exit(1);
}
while (1)
{
char buf[BUFSIZ];
fcss.read(buf, BUFSIZ);
if(fcss.gcount() == 0) {
break;
}
css_code.append(buf, fcss.gcount());
}
fcss.close();
}
HTML::ParserDom parser;
parser.parse(html);
tr = parser.getTree();
cout << tr << endl;
} catch (exception &e) {
cerr << "Exception " << e.what() << " caught" << endl;
exit(1);
} catch (...) {
cerr << "Unknow exception caught " << endl;
}
#ifdef WIN32
if(parse_css)
{
cerr << "Css parsing not supported in win32" << endl;
return 1;
}
return 0;
#else
if (parse_css) try
{
if(!parse_css) exit(0);
CSS::Parser css_parser;
tree<HTML::Node>::iterator it = tr.begin();
tree<HTML::Node>::iterator end = tr.end();
if(css_code.length()) {
css_parser.parse(css_code);
}
cout << "CSS attributes:" << endl;
cout << endl;
while (it != end)
{
if (it->isTag())
{
it->parseAttributes();
vector<CSS::Parser::Selector> v;
tree<HTML::Node>::iterator k = it;
while (k != tr.begin())
{
CSS::Parser::Selector s;
s.setElement(k->tagName());
s.setId(k->attribute("id").second);
s.setClass(k->attribute("class").second);
s.setPseudoClass(CSS::Parser::NONE_CLASS);
s.setPseudoElement(CSS::Parser::NONE_ELEMENT);
v.push_back(s);
k = tr.parent(k);
}
map<string, string> attributes = css_parser.getAttributes(v);
map<string, string>::const_iterator mit = attributes.begin();
map<string, string>::const_iterator mend = attributes.end();
string tag = it->tagName();
for(unsigned int i = 0; i < tag.size(); ++i) tag[i] = ::toupper(tag[i]);
cout << tag << "@[" << it->offset() << ":" << it->offset() + it->length() << ")" << endl;
for(; mit != mend; ++mit) cout << mit->first << ": " << mit->second << endl;
cout << endl;
if (strcasecmp(it->tagName().c_str(), "STYLE") == 0)
{
tree<HTML::Node>::iterator begin, end;
begin = it;
end = it;
end.skip_children();
++end;
string css_snippet;
for (; begin != end; ++begin)
{
if (!(begin->isTag())) css_snippet.append(begin->text());
}
css_parser.parse(css_snippet);
}
}
++it;
}
} catch (exception &e) {
cerr << "Exception " << e.what() << " caught" << endl;
exit(1);
} catch (...) {
cerr << "Unknow exception caught " << endl;
}
exit(0);
#endif
}