/*
|
|
* vertretungsplan.io custom crawler
|
|
* Copyright (C) 2019 Jonas Lochmann
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Affero General Public License as
|
|
* published by the Free Software Foundation, version 3 of the
|
|
* License.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Affero General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
import * as cheerio from 'cheerio'
|
|
import * as request from 'request-promise-native'
|
|
import * as trimNewlines from 'trim-newlines'
|
|
|
|
const footer = '\n\nInformationen zum Projekt \"Campus Kastanienallee\" gibt es hier.'
|
|
|
|
function parseBody (body: string) {
|
|
let $ = cheerio.load(body)
|
|
$('br').replaceWith('\n')
|
|
|
|
let content = $('div[id=content]').find('p').first()
|
|
|
|
if (!content) {
|
|
return []
|
|
}
|
|
|
|
let text = trimNewlines(content.text().trim()).trim()
|
|
|
|
if (text.lastIndexOf(footer) === text.length - footer.length) {
|
|
text = text.substring(0, text.length - footer.length)
|
|
}
|
|
|
|
return [{
|
|
title: 'Wichtige Termine demnächst',
|
|
content: text
|
|
}]
|
|
}
|
|
|
|
export async function getSmallDates (url: string) {
|
|
const body = await request(url)
|
|
|
|
return parseBody(body)
|
|
}
|