Fetch mileage by screen scraping web site

This commit is contained in:
Matt Cosand
2024-03-08 00:28:41 -08:00
parent c1b53312bf
commit 56cf628b9e
5 changed files with 141 additions and 13 deletions
+12 -12
View File
@@ -22,7 +22,7 @@ export default class D4HRequest {
if (!token) {
throw new Error('Token cannot be empty')
}
this._fetchLimit = fetchLimit
this._token = token
}
@@ -32,8 +32,8 @@ export default class D4HRequest {
'Authorization': `Bearer ${this._token}`,
'Content-Type': 'application/json'
}
console.log(`${method}: ${url.toString()}\n${JSON.stringify(body)}`)
//console.log(`${method}: ${url.toString()}\n${JSON.stringify(body)}`)
const options: RequestInit = {
method,
@@ -43,42 +43,42 @@ export default class D4HRequest {
if (body) {
options.body = JSON.stringify(body)
}
const rawResponse = await fetch(url.toString(), options)
const response = await rawResponse.json() as D4HResponse<TResponse> & D4HError
if (response.statusCode !== 200) {
const d4hError = response as D4HError
throw new Error(`${d4hError.statusCode}: ${d4hError.error}: ${d4hError.message}`)
}
return response.data
}
async getAsync<DataType>(url: URL): Promise<DataType> {
return this.requestAsync<never, DataType>(url, HttpMethod.Get)
}
async getManyAsync<DataType>(url: URL): Promise<DataType[]> {
let results: DataType[] = []
let offset = 0
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition, no-constant-condition
while (true) {
const urlWithOffset = new URL(url)
urlWithOffset.searchParams.append('offset', offset.toString())
urlWithOffset.searchParams.append('limit', this._fetchLimit.toString())
const newResults = await this.getAsync<DataType[]>(urlWithOffset)
results = results.concat(newResults)
offset += this._fetchLimit
if (newResults.length < this._fetchLimit) {
break
}
}
return results
}
+99
View File
@@ -0,0 +1,99 @@
import Axios, { AxiosInstance } from 'axios'
import { wrapper } from 'axios-cookiejar-support'
import { CookieJar } from 'tough-cookie'
import { parse as parseHtml, HTMLElement } from 'node-html-parser'
const MONTHS = [ '', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ]
function d4hWebTimeToDate(d4hTime: string) :Date {
const parts = d4hTime.trim().split(' ')
if (parts.length < 3 || !parts[1] || !parts[2]) throw new Error('unrecognized web date format: ' + d4hTime)
return new Date(`${parts[3]}-${MONTHS.indexOf(parts[2])}-${/^\d+/.exec(parts[1])?.[0]} ${parts[0]}`)
}
export class MissionPage {
id: string
root: HTMLElement
constructor(id: string|number, body: string) {
this.id = id+''
this.root = parseHtml(body)
}
getTimeline() {
const headers = this.root.querySelectorAll('#timestamps th')
return headers.reduce(
(a, c) => {
const t = d4hWebTimeToDate(c.parentNode.querySelector('.timestamp_title')?.text ?? '')
if (!isNaN(t.getTime())) {
a[c.text.trim()] = t
}
return a
},
{} as Record<string, Date>
)
}
}
export class D4HWebClient {
jar: CookieJar = new CookieJar()
axios: AxiosInstance = Axios.create()
async setup() {
if (!this.axios.defaults.baseURL) {
if (!process.env['D4H_DOMAIN'] || !process.env['D4H_USER'] || !process.env['D4H_PASSWORD']) throw new Error('Missing D4H web config')
this.axios = wrapper(Axios.create({ jar: this.jar, baseURL: `https://${process.env['D4H_DOMAIN']}.team-manager.us.d4h.com` }))
await this.axios.get<string>('/')
const crumb = (await this.jar.getCookies('https://accounts.us.d4h.com/')).find(f => f.key == 'crumb')?.value
if (!crumb) throw new Error('did not find crumb during login')
const params = new URLSearchParams()
params.append('crumb', crumb)
params.append('email', process.env['D4H_USER'])
params.append('password', process.env['D4H_PASSWORD'])
await this.axios.post<string>('https://accounts.us.d4h.com/password',
params,
{ headers: { 'content-type': 'application/x-www-form-urlencoded'}}
)
await this.axios.get('/team')
}
}
async getMissionPage(id: string) {
await this.setup()
const response = await this.axios.get<string>(`/team/incidents/view/${id}`)
return new MissionPage(id, response.data)
}
async getMissionReportContributorNames(missionId: string|number) {
await this.setup()
const response = await this.axios.get<string>(`/audit/embed?entity_type=incident&entity_id=${missionId}`)
const root = parseHtml(response.data)
const rows = root.querySelectorAll('tr')
const uniqueList = Array.from(new Set(rows.map(tr => tr.querySelectorAll('td')?.[1]?.text.trim())).values()).sort()
return uniqueList
}
async getMileage(activityId: string|number): Promise<Record<number, number>> {
// The costings table can get out of sync with the attendance table, which breaks the ability to export the CSV
// and match rows in that file to rows from the API. If you load the costings edit page, the site shows you which rows
// are out of date, and you have to click an icon to sync each row.
// The data for both the stale and current values is in the page HTML as JSON, so we grab the JSON from the page source
// and parse out the entity_id (matches the attendance row id from the API) and the distance in miles.
await this.setup()
const response = await this.axios.get<string>(`/team/incidents/edit/${activityId}/costing`)
const match = /existing_data *?= *?(.*?original_members.*?);/.exec(response.data)
if (match) {
const rawData = JSON.parse(match[1] ?? '{}') as { member: { entity_id: number, distance: number }[] }
return rawData.member.reduce((a,c) => ({ ...a, [c.entity_id]: c.distance }), {} as Record<number, number>)
}
return {}
}
}
+23
View File
@@ -0,0 +1,23 @@
export interface Incident {
id: number,
ref_desc: string,
date: string,
enddate?: string,
description?: string,
lat?: number,
lng?: number,
tags?: string[],
}
export interface IncidentRoster {
id: number,
status: string,
date: string,
enddate: string,
role: {
id: number,
title: string,
bundle: string,
},
member: { id: number, name: string }
}