You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

60 lines
1.7 KiB

/*
* vertretungsplan.io custom crawler
* Copyright (C) 2019 Jonas Lochmann
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, version 3 of the
* License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import * as cheerio from 'cheerio'
import { parse } from 'path'
import * as request from 'request-promise-native'
import { resolve } from 'url'
interface RequestParams {
url: string
username?: string
password?: string
}
async function doRequest ({ username, password, url }: RequestParams) {
if (username || password) {
return request.post(url, { form: { login: username, pass: password } })
} else {
return request(url)
}
}
export async function getFileList (params: RequestParams) {
const $ = cheerio.load(await doRequest(params))
const list = $('div[id=content] > table')
if (!list.length) {
throw new Error('illegal access data')
}
let files: Array<{url: string; title: string}> = []
list.find('td > a').each((_, itemBase) => {
const item = $(itemBase)
const itemUrl = resolve(params.url, item.attr('href'))
const filename = item.text()
const title = parse(filename).name
files.push({ url: itemUrl, title })
})
return files
}