#include #include #include #include #include #include "CharsetConverter.h" #include "Uri.h" #include "ParserDom.h" #include "utils.h" using namespace std; using namespace htmlcxx; #define myassert(x) \ do {\ if(!(x)) {\ fprintf(stderr, "Test at %s:%d failed!\n", __FILE__, __LINE__);\ exit(1);\ }\ } while(0) bool my_tree_compare(tree::iterator begin, tree::iterator end, tree::iterator ref) { tree::iterator it(begin); while (it != end && ref != end) { if (it.number_of_children() != ref.number_of_children()) return false; if (it->text() != ref->text()) return false; ++it; ++ref; } return true; } class HtmlTest { public: bool parse() { cerr << "Parsing some html... "; tree tr; string html = "\n\n\n\n
\n\n\n\n\n\n
\"adtAustralian Digital Theses Program
\n
\n
\n
\n"; HTML::ParserDom parser; parser.parse(html); tr = parser.getTree(); cerr << tr << endl; cerr << " ok" << endl; return true; } bool string_manip() { string root_link = "http://www.akwan.com.br/teste/acton.asp?q=atletico"; string root_link2 = "http://answerbook.ime.usp.br:8888/ab2"; string link1 = "../a.html"; string link2 = "//b.html"; string link3 = "serviço.html"; string link4 = "./d/c.html"; string link5 = "http://www.fadazan.com.br/../../../../../Download/teste/../jacobmacanhan,%203276.jpg"; string link6 = "search?q=galo"; string link7 = "http://casadebruxa.com.br/anuncio/../banner/vai.asp?id=21&url=http://www.clickdirect.com.br"; string link8 = "/ab2/Help_C/ONLINEACCESS/@Ab2HelpView/idmatch(help-library-info)"; string link9 = "/ab2/coll.67.3/@Ab2CollView?"; string link10 = "http://www.a.com.br"; string link11 = "'http://www.b.com.br"; string link12 = "?q=mineiro"; string entities = "nos somos do clube atletico mineiro á á brasil   &teste; ãä á â &end "; string comments = "hello brazil"; string multiblank = " 1 2 3\r\n 4 5 \r\n 6 \n"; string justblank = " \r\n \r\n \n"; string nonblank = "dsadasdada"; myassert(HTML::strip_comments(comments) == "hello brazil"); myassert(HTML::single_blank(multiblank) == "1 2 3 4 5 6"); myassert(HTML::single_blank(justblank) == ""); myassert(HTML::single_blank(nonblank) == nonblank); myassert(HTML::decode_entities(entities) == "nos somos do clube atletico mineiro á á brasil &teste; ãä á â &end "); myassert(HTML::convert_link(link1, root_link) == "http://www.akwan.com.br/a.html"); myassert(HTML::convert_link(link2, root_link) == "http://www.akwan.com.br/b.html"); myassert(HTML::convert_link(link3, root_link) == "http://www.akwan.com.br/teste/serviço.html"); myassert(HTML::convert_link(link4, root_link) == "http://www.akwan.com.br/teste/d/c.html"); myassert(HTML::convert_link(link5, root_link) == "http://www.fadazan.com.br/Download/jacobmacanhan,%203276.jpg"); myassert(HTML::convert_link(link6, root_link) == "http://www.akwan.com.br/teste/search?q=galo"); myassert(HTML::convert_link(link7, root_link) == "http://casadebruxa.com.br/banner/vai.asp?id=21&url=http://www.clickdirect.com.br"); myassert(HTML::convert_link(link8, root_link2) == "http://answerbook.ime.usp.br:8888/ab2/Help_C/ONLINEACCESS/@Ab2HelpView/idmatch(help-library-info)"); myassert(HTML::convert_link(link9, root_link2) == "http://answerbook.ime.usp.br:8888/ab2/coll.67.3/@Ab2CollView?"); myassert(HTML::convert_link(link10, root_link2) == "http://www.a.com.br/"); myassert(HTML::convert_link(link11, root_link) == "http://www.akwan.com.br/teste/'http:/www.b.com.br"); myassert(HTML::convert_link(link12, root_link) == "http://www.akwan.com.br/teste/acton.asp?q=mineiro"); Uri root(root_link); Uri fragment("#top"); Uri result(fragment.absolute(root)); myassert(result.unparse() == "http://www.akwan.com.br/teste/acton.asp?q=atletico#top"); return true; } }; class TagInitTest { public: void test(void) { string html(""); tree reference; HTML::Node n; reference.clear(); tree::iterator current = reference.begin(); n.offset(0); n.length(html.size()); n.isTag(true); n.isComment(false); current = reference.insert(current,n); n.offset(0); n.length(html.size()); n.isTag(true); n.isComment(false); n.text(""); n.tagName("html"); n.closingText(""); current = reference.append_child(current,n); n.offset(6); n.length(71); n.isTag(true); n.isComment(false); n.text(""); n.tagName("head"); n.closingText(""); current = reference.append_child(current,n); n.offset(12); n.length(58); n.isTag(true); n.isComment(false); n.text(""); current = reference.append_child(current,n); n.offset(40); n.length(21); n.isTag(false); n.isComment(false); n.text("if (0 < 2) saida = 1;"); n.tagName("if (0 < 2) saida = 1;"); n.closingText(""); current = reference.append_child(current,n); tree tr; HTML::ParserDom parser; parser.parse(html); tr = parser.getTree(); // cerr << reference << endl; // cerr << tr << endl; myassert(my_tree_compare(tr.begin(), tr.end(), reference.begin())); } }; class ParseAttrTest { public: bool test() { string html("