'use strict' const util = require('util') const pMap = require('p-map') const contentPath = require('./content/path') const fixOwner = require('./util/fix-owner') const fs = require('@npmcli/fs') const fsm = require('fs-minipass') const glob = util.promisify(require('glob')) const index = require('./entry-index') const path = require('path') const rimraf = util.promisify(require('rimraf')) const ssri = require('ssri') const globify = pattern => pattern.split('\\').join('/') const hasOwnProperty = (obj, key) => Object.prototype.hasOwnProperty.call(obj, key) const verifyOpts = (opts) => ({ concurrency: 20, log: { silly () {} }, ...opts, }) module.exports = verify async function verify (cache, opts) { opts = verifyOpts(opts) opts.log.silly('verify', 'verifying cache at', cache) const steps = [ markStartTime, fixPerms, garbageCollect, rebuildIndex, cleanTmp, writeVerifile, markEndTime, ] const stats = {} for (const step of steps) { const label = step.name const start = new Date() const s = await step(cache, opts) if (s) { Object.keys(s).forEach((k) => { stats[k] = s[k] }) } const end = new Date() if (!stats.runTime) { stats.runTime = {} } stats.runTime[label] = end - start } stats.runTime.total = stats.endTime - stats.startTime opts.log.silly( 'verify', 'verification finished for', cache, 'in', `${stats.runTime.total}ms` ) return stats } async function markStartTime (cache, opts) { return { startTime: new Date() } } async function markEndTime (cache, opts) { return { endTime: new Date() } } async function fixPerms (cache, opts) { opts.log.silly('verify', 'fixing cache permissions') await fixOwner.mkdirfix(cache, cache) // TODO - fix file permissions too await fixOwner.chownr(cache, cache) return null } // Implements a naive mark-and-sweep tracing garbage collector. // // The algorithm is basically as follows: // 1. Read (and filter) all index entries ("pointers") // 2. Mark each integrity value as "live" // 3. Read entire filesystem tree in `content-vX/` dir // 4. If content is live, verify its checksum and delete it if it fails // 5. If content is not marked as live, rimraf it. // async function garbageCollect (cache, opts) { opts.log.silly('verify', 'garbage collecting content') const indexStream = index.lsStream(cache) const liveContent = new Set() indexStream.on('data', (entry) => { if (opts.filter && !opts.filter(entry)) { return } liveContent.add(entry.integrity.toString()) }) await new Promise((resolve, reject) => { indexStream.on('end', resolve).on('error', reject) }) const contentDir = contentPath.contentDir(cache) const files = await glob(globify(path.join(contentDir, '**')), { follow: false, nodir: true, nosort: true, }) const stats = { verifiedContent: 0, reclaimedCount: 0, reclaimedSize: 0, badContentCount: 0, keptSize: 0, } await pMap( files, async (f) => { const split = f.split(/[/\\]/) const digest = split.slice(split.length - 3).join('') const algo = split[split.length - 4] const integrity = ssri.fromHex(digest, algo) if (liveContent.has(integrity.toString())) { const info = await verifyContent(f, integrity) if (!info.valid) { stats.reclaimedCount++ stats.badContentCount++ stats.reclaimedSize += info.size } else { stats.verifiedContent++ stats.keptSize += info.size } } else { // No entries refer to this content. We can delete. stats.reclaimedCount++ const s = await fs.stat(f) await rimraf(f) stats.reclaimedSize += s.size } return stats }, { concurrency: opts.concurrency } ) return stats } async function verifyContent (filepath, sri) { const contentInfo = {} try { const { size } = await fs.stat(filepath) contentInfo.size = size contentInfo.valid = true await ssri.checkStream(new fsm.ReadStream(filepath), sri) } catch (err) { if (err.code === 'ENOENT') { return { size: 0, valid: false } } if (err.code !== 'EINTEGRITY') { throw err } await rimraf(filepath) contentInfo.valid = false } return contentInfo } async function rebuildIndex (cache, opts) { opts.log.silly('verify', 'rebuilding index') const entries = await index.ls(cache) const stats = { missingContent: 0, rejectedEntries: 0, totalEntries: 0, } const buckets = {} for (const k in entries) { /* istanbul ignore else */ if (hasOwnProperty(entries, k)) { const hashed = index.hashKey(k) const entry = entries[k] const excluded = opts.filter && !opts.filter(entry) excluded && stats.rejectedEntries++ if (buckets[hashed] && !excluded) { buckets[hashed].push(entry) } else if (buckets[hashed] && excluded) { // skip } else if (excluded) { buckets[hashed] = [] buckets[hashed]._path = index.bucketPath(cache, k) } else { buckets[hashed] = [entry] buckets[hashed]._path = index.bucketPath(cache, k) } } } await pMap( Object.keys(buckets), (key) => { return rebuildBucket(cache, buckets[key], stats, opts) }, { concurrency: opts.concurrency } ) return stats } async function rebuildBucket (cache, bucket, stats, opts) { await fs.truncate(bucket._path) // This needs to be serialized because cacache explicitly // lets very racy bucket conflicts clobber each other. for (const entry of bucket) { const content = contentPath(cache, entry.integrity) try { await fs.stat(content) await index.insert(cache, entry.key, entry.integrity, { metadata: entry.metadata, size: entry.size, }) stats.totalEntries++ } catch (err) { if (err.code === 'ENOENT') { stats.rejectedEntries++ stats.missingContent++ } else { throw err } } } } function cleanTmp (cache, opts) { opts.log.silly('verify', 'cleaning tmp directory') return rimraf(path.join(cache, 'tmp')) } function writeVerifile (cache, opts) { const verifile = path.join(cache, '_lastverified') opts.log.silly('verify', 'writing verifile to ' + verifile) try { return fs.writeFile(verifile, `${Date.now()}`) } finally { fixOwner.chownr.sync(cache, verifile) } } module.exports.lastRun = lastRun async function lastRun (cache) { const data = await fs.readFile(path.join(cache, '_lastverified'), { encoding: 'utf8' }) return new Date(+data) }