moenavigatorengine-tools is a collection of small programs which use MoeNavigatorEngine to perform certain tasks regarding HTML or XML handling, data extraction or network actions.
This file is part of moenavigatorengine-tools
Copyright (C) 2017-2020 Moritz Strohm <>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <>.
#include <MoeNavigatorEngine/MNERenderer/MNERenderer.h>
#include "./StrippedHtmlRenderer.h"
this->output_document = "";
"html", "head", "title", "body",
"table", "thead", "tr", "th", "tbody", "td",
"dl", "dt", "dd",
"ul", "ol", "li",
"a", "s", "b", "i",
"h1", "h2", "h3", "h4", "h5", "h6",
"main", "aside", "nav", "header", "footer",
"article", "section",
"div", "span", "img"
uint32_t StrippedHtmlRenderer::getSupportedOutputModes()
return MNERenderer::OutputMode::RICHTEXT;
void StrippedHtmlRenderer::setOutputMode(MNERenderer::OutputMode mode)
if (mode != MNERenderer::OutputMode::RICHTEXT) {
//throw new MNERendererException
void StrippedHtmlRenderer::setDrawingAreaDimensions(
uint32_t width,
uint32_t height
//Left blank on purpose.
void StrippedHtmlRenderer::setDocument(std::shared_ptr<DocumentNode> document)
this->document = document;
void StrippedHtmlRenderer::stripNodeContent(std::shared_ptr<DocumentNode> node)
//Delete most attributes of the current node.
for (std::map<std::string, std::string>::iterator i = node->attributes.begin();
i != node->attributes.end();
) {
if ((node->name == "label") and (i->first == "for")) {
//We must keep the "for" attribute
//if the node is a HTML "label" node.
} else if ((node->name == "a") and (i->first == "href")) {
//The "href" attribute holds an URL or another kind of link
//in the HTML "a" node.
//No special attribute: Delete attribute:
bool keep_node = false;
//Check if the child nodes are one of the nodes we want to keep:
for (unsigned long int i = 0; i < node->children.size(); i++) {
keep_node = false;
for (unsigned long j = 0; j < this->persistent_nodes.size(); j++) {
if (node->children[i]->name == this->persistent_nodes[j]) {
keep_node = true;
if (keep_node == true) {
//Traverse the child node's attributes and child nodes:
} else {
//Delete children (and its content):
node->children.erase(node->children.begin() + i);
void StrippedHtmlRenderer::buildXHTMLDocument()
//TODO:: add xmlns attribute to HTML node.
std::cerr << "StrippedHtmlRenderer:DEBUG: creating XHTML document" << std::endl;
this->output_document = this->document->toXML();
std::cerr << "StrippedHtmlRenderer:DEBUG: finished creating XHTML document" << std::endl;
void StrippedHtmlRenderer::render()
//For early development stage: get only text nodes:
//TODO: strip nodes
//Final step: generate an XHTML document
//out of the processed document:
std::shared_ptr<MNERenderedData> StrippedHtmlRenderer::getRenderedData()
std::shared_ptr<MNERenderedData> data(new MNERenderedData());
if (data == nullptr) {
return nullptr;
data->data = this->output_document;
return data;