Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: use IPNI advertisements from the miner only #55

Merged
merged 8 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions lib/constants.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
export const SPARK_VERSION = '1.9.0'
export const MAX_CAR_SIZE = 200 * 1024 * 1024 // 200 MB
export const APPROX_ROUND_LENGTH_IN_MS = 20 * 60_000 // 20 minutes

export const RPC_REQUEST = new Request('https://api.node.glif.io/', {
headers: {
authorization: 'Bearer 6bbc171ebfdd78b2644602ce7463c938'
juliangruber marked this conversation as resolved.
Show resolved Hide resolved
}
})
9 changes: 3 additions & 6 deletions lib/ipni-client.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@ import { decodeBase64, decodeVarint } from '../vendor/deno-deps.js'
/**
*
* @param {string} cid
* @param {string} providerId
* @returns {Promise<{
* indexerResult: string;
* provider?: { address: string; protocol: string };
* }>}
*/
export async function queryTheIndex (cid) {
export async function queryTheIndex (cid, providerId) {
const url = `https://cid.contact/cid/${encodeURIComponent(cid)}`

let providerResults
Expand All @@ -29,11 +30,7 @@ export async function queryTheIndex (cid) {

let graphsyncProvider
for (const p of providerResults) {
// TODO: find only the contact advertised by the SP handling this deal
// See https://filecoinproject.slack.com/archives/C048DLT4LAF/p1699958601915269?thread_ts=1699956597.137929&cid=C048DLT4LAF
// bytes of CID of dag-cbor encoded DealProposal
// https://github.com/filecoin-project/boost/blob/main/indexprovider/wrapper.go#L168-L172
// https://github.com/filecoin-project/boost/blob/main/indexprovider/wrapper.go#L195
if (p.Provider.ID !== providerId) continue

const [protocolCode] = decodeVarint(decodeBase64(p.Metadata))
const protocol = {
Expand Down
50 changes: 50 additions & 0 deletions lib/miner-lookup.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { RPC_REQUEST } from './constants.js'

/**
* @param {string} minerId A miner actor id, e.g. `f0142637`
* @returns {Promise<string>} Miner's PeerId, e.g. `12D3KooWMsPmAA65yHAHgbxgh7CPkEctJHZMeM3rAvoW8CZKxtpG`
*/
export async function lookupMinerPeerId (minerId) {
juliangruber marked this conversation as resolved.
Show resolved Hide resolved
try {
const res = await rpc('Filecoin.StateMinerInfo', minerId, null)
return res.PeerId
} catch (err) {
err.message = `Cannot lookup miner ${minerId}: ${err.message}`
juliangruber marked this conversation as resolved.
Show resolved Hide resolved
throw err
}
}

/**
* @param {string} method
* @param {unknown[]} params
*/
async function rpc (method, ...params) {
const req = new Request(RPC_REQUEST, {
method: 'POST',
headers: {
'content-type': 'application/json',
accepts: 'application/json'
},
body: JSON.stringify({
jsonrpc: '2.0',
id: 1,
method,
params
})
})
const res = await fetch(req)

if (!res.ok) {
throw new Error(`JSON RPC failed with ${res.code}: ${await res.text()}`)
}

const body = await res.json()
if (body.error) {
const err = new Error(body.error.message)
err.name = 'FilecoinRpcError'
err.code = body.code
throw err
}

return body.result
}
49 changes: 42 additions & 7 deletions lib/spark.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import { ActivityState } from './activity-state.js'
import { SPARK_VERSION, MAX_CAR_SIZE, APPROX_ROUND_LENGTH_IN_MS } from './constants.js'
import { queryTheIndex } from './ipni-client.js'
import { lookupMinerPeerId } from './miner-lookup.js'
import {
encodeHex
} from '../vendor/deno-deps.js'
Expand All @@ -16,6 +17,7 @@ export default class Spark {

constructor ({ fetch = globalThis.fetch } = {}) {
this.#fetch = fetch
this.lookupMinerPeerId = lookupMinerPeerId
juliangruber marked this conversation as resolved.
Show resolved Hide resolved
}

async getRetrieval () {
Expand All @@ -39,8 +41,34 @@ export default class Spark {
}

async executeRetrievalCheck (retrieval, stats) {
console.log(`Calling Filecoin JSON-RPC to get PeerId of miner ${retrieval.minerId}`)
try {
const peerId = await this.lookupMinerPeerId(retrieval.minerId)
console.log(`Found peer id: ${peerId}`)
stats.providerId = peerId
} catch (err) {
console.error(err)
// There are three common error cases:
// 1. We are offline
// 2. The JSON RPC provider is down
// 3. JSON RPC errors like when Miner ID is not a known actor
// There isn't much we can do in the first two cases. We can notify the user that we are not
// performing any jobs and wait until the problem is resolved.
// The third case should not happen unless we made a mistake, so we want to learn about it
if (err.name === 'FilecoinRpcError') {
// TODO: report the error to Sentry
console.error('The error printed above was not expected, please report it on GitHub:')
console.error('https://github.com/filecoin-station/spark/issues/new')
} else {
this.#activity.onError()
}
err.reported = true
// Abort the check, no measurement should be recorded
throw err
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand the need for this custom property .reported here.

If we remove the console.error(err) above, then we can let the outer catch handler log it. Also, the outer catch handler can call #activity.onError() itself.

I think the only custom error handling we need here is when err.name === 'FilecoinRpcError', and in that case we can simply add a new log event. I'm proposing to refactor this catch handler like this:

Suggested change
console.error(err)
// There are three common error cases:
// 1. We are offline
// 2. The JSON RPC provider is down
// 3. JSON RPC errors like when Miner ID is not a known actor
// There isn't much we can do in the first two cases. We can notify the user that we are not
// performing any jobs and wait until the problem is resolved.
// The third case should not happen unless we made a mistake, so we want to learn about it
if (err.name === 'FilecoinRpcError') {
// TODO: report the error to Sentry
console.error('The error printed above was not expected, please report it on GitHub:')
console.error('https://github.com/filecoin-station/spark/issues/new')
} else {
this.#activity.onError()
}
err.reported = true
// Abort the check, no measurement should be recorded
throw err
// There are three common error cases:
// 1. We are offline
// 2. The JSON RPC provider is down
// 3. JSON RPC errors like when Miner ID is not a known actor
// There isn't much we can do in the first two cases. We can notify the user that we are not
// performing any jobs and wait until the problem is resolved.
// The third case should not happen unless we made a mistake, so we want to learn about it
if (err.name === 'FilecoinRpcError') {
// TODO: report the error to Sentry
console.error('Unexpected error, please report it on GitHub:')
console.error('https://github.com/filecoin-station/spark/issues/new')
}
// Abort the check, no measurement should be recorded
throw err

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's discuss.

In my proposed implementation:

  • FilecoinRpcError does not trigger this.#activity.onError(), therefore Spark stays in "online" mode
  • We print the call to report the issue after the error stack.

In your proposal:

  • FilecoinRpcError triggers this.#activity.onError() and puts Spark to offline mode.
  • The call to report the issue is printed before the error to report. (I guess that's fine, we just need to tweak the message.)

Here is the important question we need to answer: How should we handle the case when we cannot get a miner's Peer ID because Filecoin RPC returns an error?

  • This can be either because we are calling the RPC API incorrectly or because the miner ID is not a valid one. (I suppose we could also receive this error if the RPC API server has an internal problem; it can process the JSON-RPC part, but the called RPC method fails.)
  • Do we want to indicate offline mode? The next retrieval check will likely be able fetch the round details from spark-api, which means Spark goes back to online mode, only to go offline again when the JSON RPC call fails.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah understood! I thought the purpose of err.reported was to prevent double log lines, now I see it's also to prevent reporting it to the activity state handler.

Could we be more explicit about this?

  1. Rename to error.setActivityToOffline = false
  2. Let the outer error handler handle logging, to be more consistent with other errors

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See db3b2ae

}

console.log(`Querying IPNI to find retrieval providers for ${retrieval.cid}`)
const { indexerResult, provider } = await queryTheIndex(retrieval.cid)
const { indexerResult, provider } = await queryTheIndex(retrieval.cid, stats.providerId)
stats.indexerResult = indexerResult

if (indexerResult !== 'OK') return
Expand Down Expand Up @@ -170,6 +198,7 @@ export default class Spark {
byteLength: 0,
carChecksum: null,
statusCode: null,
providerId: null,
indexerResult: null
}

Expand All @@ -187,12 +216,7 @@ export default class Spark {
await this.nextRetrieval()
this.#activity.onHealthy()
} catch (err) {
if (err.statusCode === 400 && err.serverMessage === 'OUTDATED CLIENT') {
this.#activity.onOutdatedClient()
} else {
this.#activity.onError()
}
console.error(err)
this.handleRunError(err)
}
const duration = Date.now() - started
const baseDelay = APPROX_ROUND_LENGTH_IN_MS / this.#maxTasksPerNode
Expand All @@ -203,6 +227,17 @@ export default class Spark {
}
}
}

handleRunError (err) {
if (err.reported) return

if (err.statusCode === 400 && err.serverMessage === 'OUTDATED CLIENT') {
this.#activity.onOutdatedClient()
} else {
this.#activity.onError()
}
console.error(err)
}
}

async function assertOkResponse (res, errorMsg) {
Expand Down
1 change: 1 addition & 0 deletions test.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import './test/ipni-client.test.js'
import './test/miner-lookup.test.js'
import './test/integration.js'
import './test/spark.js'
13 changes: 11 additions & 2 deletions test/integration.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import Spark from '../lib/spark.js'
import { test } from 'zinnia:test'

import { assert, assertEquals } from 'zinnia:assert'
import { test } from 'zinnia:test'
bajtos marked this conversation as resolved.
Show resolved Hide resolved

const KNOWN_CID = 'bafkreih25dih6ug3xtj73vswccw423b56ilrwmnos4cbwhrceudopdp5sq'
const OUR_FAKE_MINER_ID = 'f01spark'
const FRISBEE_PEER_ID = '12D3KooWN3zbfCjLrjBB7uxYThRTCFM9nxinjb5j9fYFZ6P5RUfP'

test('integration', async () => {
const spark = new Spark()
Expand All @@ -16,14 +19,20 @@ test('integration', async () => {

test('retrieval check for our CID', async () => {
const spark = new Spark()
spark.getRetrieval = async () => ({ cid: KNOWN_CID })
spark.getRetrieval = async () => ({ cid: KNOWN_CID, minerId: OUR_FAKE_MINER_ID })
spark.lookupMinerPeerId = async (minerId) => {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wouldn't it be safer to push the function calls into an array, and then assert this function was called at all? This is the advice you give me with all these mocks I believe

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this particular test, if my stubbed function isn't called, the retrieval check fails because Filecoin's RPC API call will reject the fake miner id.

The purpose of this test is not to verify how Spark works under the hood (and whether it calls all other functions we think it should call) but whether it can find the provider and retrieve the data for the CID served by our Frisbii instance.

However, if you think it's worth asserting that both functions were called, then I am fine with adding that. Let me know!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was also my understanding when I wrote tests like this, but through your reviews I understood that it's clearer and safer to assert the function calls. If in the future a condition is introduced which skips lookupMinerPeerId(), then this test will still pass.

However,iIn my previous review I missed that this was an integration test and not a unit test. Please choose the testing method that you see fits best.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

push the function calls into an array, and then assert this function was called

added in 0a48986

assertEquals(minerId, OUR_FAKE_MINER_ID)
return FRISBEE_PEER_ID
}
const measurementId = await spark.nextRetrieval()
const res = await fetch(`https://api.filspark.com/measurements/${measurementId}`)
assert(res.ok)
const m = await res.json()
const assertProp = (prop, expectedValue) => assertEquals(m[prop], expectedValue, prop)

assertProp('cid', KNOWN_CID)
assertProp('minerId', OUR_FAKE_MINER_ID)
assertProp('providerId', FRISBEE_PEER_ID)
assertProp('indexerResult', 'OK')
assertProp('providerAddress', '/dns/frisbii.fly.dev/tcp/443/https')
assertProp('protocol', 'http')
Expand Down
8 changes: 7 additions & 1 deletion test/ipni-client.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ import { assertEquals } from 'zinnia:assert'
import { queryTheIndex } from '../lib/ipni-client.js'

const KNOWN_CID = 'bafkreih25dih6ug3xtj73vswccw423b56ilrwmnos4cbwhrceudopdp5sq'
const FRISBEE_PEER_ID = '12D3KooWN3zbfCjLrjBB7uxYThRTCFM9nxinjb5j9fYFZ6P5RUfP'

test('query advertised CID', async () => {
const result = await queryTheIndex(KNOWN_CID)
const result = await queryTheIndex(KNOWN_CID, FRISBEE_PEER_ID)
assertEquals(result, {
indexerResult: 'OK',
provider: {
Expand All @@ -14,3 +15,8 @@ test('query advertised CID', async () => {
}
})
})

test('ignore advertisements from other miners', async () => {
const result = await queryTheIndex(KNOWN_CID, '12D3KooWsomebodyelse')
assertEquals(result.indexerResult, 'NO_VALID_ADVERTISEMENT')
})
21 changes: 21 additions & 0 deletions test/miner-lookup.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import { test } from 'zinnia:test'
import { assertMatch, AssertionError } from 'zinnia:assert'
import { lookupMinerPeerId } from '../lib/miner-lookup.js'

const KNOWN_MINER_ID = 'f0142637'

test('lookup peer id of a known miner', async () => {
const result = await lookupMinerPeerId(KNOWN_MINER_ID)
assertMatch(result, /^12D3KooW/)
})

test('lookup peer id of a miner that does not exist', async () => {
try {
const result = await lookupMinerPeerId('f010')
throw new AssertionError(
`Expected "lookupMinerPeerId()" to fail, but it resolved with "${result}" instead.`
)
} catch (err) {
assertMatch(err.toString(), /\bf010\b.*\bactor code is not miner/)
}
})
Loading