|
| 1 | +import http from 'http' |
| 2 | +import https from 'https' |
| 3 | +import { Readable } from 'stream' |
| 4 | + |
| 5 | +import { getServiceLogger } from '@crowd/logging' |
| 6 | + |
| 7 | +import { IDatasetDescriptor, IDiscoverySource, IDiscoverySourceRow } from '../types' |
| 8 | + |
| 9 | +const log = getServiceLogger() |
| 10 | + |
| 11 | +const DEFAULT_API_URL = 'https://hypervascular-nonduplicative-vern.ngrok-free.dev' |
| 12 | +const PAGE_SIZE = 100 |
| 13 | + |
| 14 | +interface LfApiResponse { |
| 15 | + page: number |
| 16 | + pageSize: number |
| 17 | + total: number |
| 18 | + totalPages: number |
| 19 | + data: LfApiRow[] |
| 20 | +} |
| 21 | + |
| 22 | +interface LfApiRow { |
| 23 | + runDate: string |
| 24 | + repoUrl: string |
| 25 | + owner: string |
| 26 | + repoName: string |
| 27 | + contributors: number |
| 28 | + organizations: number |
| 29 | + sizeSloc: number |
| 30 | + lastUpdated: number |
| 31 | + age: number |
| 32 | + commitFreq: number |
| 33 | + score: number |
| 34 | +} |
| 35 | + |
| 36 | +function getApiBaseUrl(): string { |
| 37 | + return (process.env.LF_CRITICALITY_SCORE_API_URL ?? DEFAULT_API_URL).replace(/\/$/, '') |
| 38 | +} |
| 39 | + |
| 40 | +async function fetchPage( |
| 41 | + baseUrl: string, |
| 42 | + startDate: string, |
| 43 | + endDate: string, |
| 44 | + page: number, |
| 45 | +): Promise<LfApiResponse> { |
| 46 | + const url = `${baseUrl}/projects/scores?startDate=${startDate}&endDate=${endDate}&page=${page}&pageSize=${PAGE_SIZE}` |
| 47 | + |
| 48 | + return new Promise((resolve, reject) => { |
| 49 | + const client = url.startsWith('https://') ? https : http |
| 50 | + |
| 51 | + const req = client.get(url, (res) => { |
| 52 | + if (res.statusCode !== 200) { |
| 53 | + reject(new Error(`LF Criticality Score API returned status ${res.statusCode} for ${url}`)) |
| 54 | + res.resume() |
| 55 | + return |
| 56 | + } |
| 57 | + |
| 58 | + const chunks: Uint8Array[] = [] |
| 59 | + res.on('data', (chunk: Uint8Array) => chunks.push(chunk)) |
| 60 | + res.on('end', () => { |
| 61 | + try { |
| 62 | + resolve(JSON.parse(Buffer.concat(chunks).toString('utf8')) as LfApiResponse) |
| 63 | + } catch (err) { |
| 64 | + reject(new Error(`Failed to parse LF Criticality Score API response: ${err}`)) |
| 65 | + } |
| 66 | + }) |
| 67 | + res.on('error', reject) |
| 68 | + }) |
| 69 | + |
| 70 | + req.on('error', reject) |
| 71 | + req.end() |
| 72 | + }) |
| 73 | +} |
| 74 | + |
| 75 | +/** |
| 76 | + * Generates the first day and last day of a given month. |
| 77 | + * monthOffset = 0 → current month, -1 → previous month, etc. |
| 78 | + */ |
| 79 | +function monthRange(monthOffset: number): { startDate: string; endDate: string } { |
| 80 | + const now = new Date() |
| 81 | + const year = now.getUTCFullYear() |
| 82 | + const month = now.getUTCMonth() + monthOffset // can be negative; Date handles rollover |
| 83 | + |
| 84 | + const first = new Date(Date.UTC(year, month, 1)) |
| 85 | + const last = new Date(Date.UTC(year, month + 1, 0)) // last day of month |
| 86 | + |
| 87 | + const pad = (n: number) => String(n).padStart(2, '0') |
| 88 | + const fmt = (d: Date) => |
| 89 | + `${d.getUTCFullYear()}-${pad(d.getUTCMonth() + 1)}-${pad(d.getUTCDate())}` |
| 90 | + |
| 91 | + return { startDate: fmt(first), endDate: fmt(last) } |
| 92 | +} |
| 93 | + |
| 94 | +export class LfCriticalityScoreSource implements IDiscoverySource { |
| 95 | + public readonly name = 'lf-criticality-score' |
| 96 | + public readonly format = 'json' as const |
| 97 | + |
| 98 | + async listAvailableDatasets(): Promise<IDatasetDescriptor[]> { |
| 99 | + const baseUrl = getApiBaseUrl() |
| 100 | + |
| 101 | + // Return one dataset per month for the last 12 months (newest first) |
| 102 | + const datasets: IDatasetDescriptor[] = [] |
| 103 | + |
| 104 | + for (let offset = 0; offset >= -11; offset--) { |
| 105 | + const { startDate, endDate } = monthRange(offset) |
| 106 | + const id = startDate.slice(0, 7) // e.g. "2026-02" |
| 107 | + |
| 108 | + datasets.push({ |
| 109 | + id, |
| 110 | + date: startDate, |
| 111 | + url: `${baseUrl}/projects/scores?startDate=${startDate}&endDate=${endDate}`, |
| 112 | + }) |
| 113 | + } |
| 114 | + |
| 115 | + return datasets |
| 116 | + } |
| 117 | + |
| 118 | + /** |
| 119 | + * Returns an object-mode Readable that fetches all pages from the API |
| 120 | + * and pushes each row as a plain object. Activities.ts iterates this |
| 121 | + * directly (no csv-parse) because format === 'json'. |
| 122 | + */ |
| 123 | + async fetchDatasetStream(dataset: IDatasetDescriptor): Promise<Readable> { |
| 124 | + const baseUrl = getApiBaseUrl() |
| 125 | + |
| 126 | + // Extract startDate and endDate from the stored URL |
| 127 | + const parsed = new URL(dataset.url) |
| 128 | + const startDate = parsed.searchParams.get('startDate') ?? '' |
| 129 | + const endDate = parsed.searchParams.get('endDate') ?? '' |
| 130 | + |
| 131 | + const stream = new Readable({ objectMode: true, read() {} }) |
| 132 | + |
| 133 | + // Fetch pages asynchronously and push rows into the stream |
| 134 | + ;(async () => { |
| 135 | + try { |
| 136 | + let page = 1 |
| 137 | + let totalPages = 1 |
| 138 | + |
| 139 | + do { |
| 140 | + const response = await fetchPage(baseUrl, startDate, endDate, page) |
| 141 | + totalPages = response.totalPages |
| 142 | + |
| 143 | + for (const row of response.data) { |
| 144 | + stream.push(row) |
| 145 | + } |
| 146 | + |
| 147 | + log.debug( |
| 148 | + { datasetId: dataset.id, page, totalPages, rowsInPage: response.data.length }, |
| 149 | + 'LF Criticality Score page fetched.', |
| 150 | + ) |
| 151 | + |
| 152 | + page++ |
| 153 | + } while (page <= totalPages) |
| 154 | + |
| 155 | + stream.push(null) // signal end of stream |
| 156 | + } catch (err) { |
| 157 | + stream.destroy(err instanceof Error ? err : new Error(String(err))) |
| 158 | + } |
| 159 | + })() |
| 160 | + |
| 161 | + return stream |
| 162 | + } |
| 163 | + |
| 164 | + parseRow(rawRow: Record<string, unknown>): IDiscoverySourceRow | null { |
| 165 | + const repoUrl = rawRow['repoUrl'] as string | undefined |
| 166 | + if (!repoUrl) { |
| 167 | + return null |
| 168 | + } |
| 169 | + |
| 170 | + let repoName = '' |
| 171 | + let projectSlug = '' |
| 172 | + |
| 173 | + try { |
| 174 | + const urlPath = new URL(repoUrl).pathname.replace(/^\//, '').replace(/\/$/, '') |
| 175 | + projectSlug = urlPath |
| 176 | + repoName = urlPath.split('/').pop() || '' |
| 177 | + } catch { |
| 178 | + const parts = repoUrl.replace(/\/$/, '').split('/') |
| 179 | + projectSlug = parts.slice(-2).join('/') |
| 180 | + repoName = parts.pop() || '' |
| 181 | + } |
| 182 | + |
| 183 | + if (!projectSlug || !repoName) { |
| 184 | + return null |
| 185 | + } |
| 186 | + |
| 187 | + const score = rawRow['score'] |
| 188 | + const lfCriticalityScore = typeof score === 'number' ? score : parseFloat(score as string) |
| 189 | + |
| 190 | + return { |
| 191 | + projectSlug, |
| 192 | + repoName, |
| 193 | + repoUrl, |
| 194 | + lfCriticalityScore: Number.isNaN(lfCriticalityScore) ? undefined : lfCriticalityScore, |
| 195 | + } |
| 196 | + } |
| 197 | +} |
0 commit comments