moenavigatorengine-tools is a collection of small programs which use MoeNavigatorEngine to perform certain tasks regarding HTML or XML handling, data extraction or network actions.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

148 lines
4.2 KiB

/*
This file is part of moenavigatorengine-tools
Copyright (C) 2017-2020 Moritz Strohm <ncc1988@posteo.de>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <MoeNavigatorEngine/MNERenderer/MNERenderer.h>
#include "./StrippedHtmlRenderer.h"
StrippedHtmlRenderer::StrippedHtmlRenderer()
{
this->output_document = "";
this->persistent_nodes.push_back("html");
/*
"html", "head", "title", "body",
"table", "thead", "tr", "th", "tbody", "td",
"dl", "dt", "dd",
"ul", "ol", "li",
"a", "s", "b", "i",
"strong",
"h1", "h2", "h3", "h4", "h5", "h6",
"main", "aside", "nav", "header", "footer",
"article", "section",
"div", "span", "img"
*/
}
uint32_t StrippedHtmlRenderer::getSupportedOutputModes()
{
return MNERenderer::OutputMode::RICHTEXT;
}
void StrippedHtmlRenderer::setOutputMode(MNERenderer::OutputMode mode)
{
if (mode != MNERenderer::OutputMode::RICHTEXT) {
//throw new MNERendererException
}
}
void StrippedHtmlRenderer::setDrawingAreaDimensions(
uint32_t width,
uint32_t height
)
{
//Left blank on purpose.
}
void StrippedHtmlRenderer::setDocument(std::shared_ptr<DocumentNode> document)
{
this->document = document;
}
void StrippedHtmlRenderer::stripNodeContent(std::shared_ptr<DocumentNode> node)
{
//Delete most attributes of the current node.
for (std::map<std::string, std::string>::iterator i = node->attributes.begin();
i != node->attributes.end();
++i
) {
if ((node->name == "label") and (i->first == "for")) {
//We must keep the "for" attribute
//if the node is a HTML "label" node.
continue;
} else if ((node->name == "a") and (i->first == "href")) {
//The "href" attribute holds an URL or another kind of link
//in the HTML "a" node.
continue;
}
//No special attribute: Delete attribute:
node->attributes.erase(i);
}
bool keep_node = false;
//Check if the child nodes are one of the nodes we want to keep:
for (unsigned long int i = 0; i < node->children.size(); i++) {
keep_node = false;
for (unsigned long j = 0; j < this->persistent_nodes.size(); j++) {
if (node->children[i]->name == this->persistent_nodes[j]) {
keep_node = true;
}
continue;
}
if (keep_node == true) {
//Traverse the child node's attributes and child nodes:
this->stripNodeContent(node->children[i]);
} else {
//Delete children (and its content):
node->children.erase(node->children.begin() + i);
}
}
}
void StrippedHtmlRenderer::buildXHTMLDocument()
{
//TODO:: add xmlns attribute to HTML node.
std::cerr << "StrippedHtmlRenderer:DEBUG: creating XHTML document" << std::endl;
this->output_document = this->document->toXML();
std::cerr << "StrippedHtmlRenderer:DEBUG: finished creating XHTML document" << std::endl;
}
void StrippedHtmlRenderer::render()
{
//For early development stage: get only text nodes:
//TODO: strip nodes
//Final step: generate an XHTML document
//out of the processed document:
this->buildXHTMLDocument();
}
std::shared_ptr<MNERenderedData> StrippedHtmlRenderer::getRenderedData()
{
std::shared_ptr<MNERenderedData> data(new MNERenderedData());
if (data == nullptr) {
return nullptr;
}
data->data = this->output_document;
return data;
}