You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

52 lines
1.5 KiB

  1. /*
  2. * vertretungsplan.io custom crawler
  3. * Copyright (C) 2019 Jonas Lochmann
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU Affero General Public License as
  7. * published by the Free Software Foundation, version 3 of the
  8. * License.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU Affero General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Affero General Public License
  16. * along with this program. If not, see <https://www.gnu.org/licenses/>.
  17. */
  18. import * as cheerio from 'cheerio'
  19. import * as request from 'request-promise-native'
  20. import * as trimNewlines from 'trim-newlines'
  21. const footer = '\n\nInformationen zum Projekt \"Campus Kastanienallee\" gibt es hier.'
  22. function parseBody (body: string) {
  23. let $ = cheerio.load(body)
  24. $('br').replaceWith('\n')
  25. let content = $('div[id=content]').find('p').first()
  26. if (!content) {
  27. return []
  28. }
  29. let text = trimNewlines(content.text().trim()).trim()
  30. if (text.lastIndexOf(footer) === text.length - footer.length) {
  31. text = text.substring(0, text.length - footer.length)
  32. }
  33. return [{
  34. title: 'Wichtige Termine demnächst',
  35. content: text
  36. }]
  37. }
  38. export async function getSmallDates (url: string) {
  39. const body = await request(url)
  40. return parseBody(body)
  41. }