You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

61 lines
1.7 KiB

  1. /*
  2. * vertretungsplan.io custom crawler
  3. * Copyright (C) 2019 Jonas Lochmann
  4. *
  5. * This program is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU Affero General Public License as
  7. * published by the Free Software Foundation, version 3 of the
  8. * License.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU Affero General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Affero General Public License
  16. * along with this program. If not, see <https://www.gnu.org/licenses/>.
  17. */
  18. import * as cheerio from 'cheerio'
  19. import { parse } from 'path'
  20. import * as request from 'request-promise-native'
  21. import { resolve } from 'url'
  22. interface RequestParams {
  23. url: string
  24. username?: string
  25. password?: string
  26. }
  27. async function doRequest ({ username, password, url }: RequestParams) {
  28. if (username || password) {
  29. return request.post(url, { form: { login: username, pass: password } })
  30. } else {
  31. return request(url)
  32. }
  33. }
  34. export async function getFileList (params: RequestParams) {
  35. const $ = cheerio.load(await doRequest(params))
  36. const list = $('div[id=content] > table')
  37. if (!list.length) {
  38. throw new Error('illegal access data')
  39. }
  40. let files: Array<{url: string; title: string}> = []
  41. list.find('td > a').each((_, itemBase) => {
  42. const item = $(itemBase)
  43. const itemUrl = resolve(params.url, item.attr('href'))
  44. const filename = item.text()
  45. const title = parse(filename).name
  46. files.push({ url: itemUrl, title })
  47. })
  48. return files
  49. }