'use strict'; const util = require('util'); const urlutil = require('url'); const http = require('http'); const https = require('https'); const debug = require('debug')('urllib'); const ms = require('humanize-ms'); let REQUEST_ID = 0; const MAX_VALUE = Math.pow(2, 31) - 10; const PROTO_RE = /^https?:\/\//i; function getAgent(agent, defaultAgent) { return agent === undefined ? defaultAgent : agent; } function parseContentType(str) { if (!str) { return ''; } return str.split(';')[0].trim().toLowerCase(); } function makeCallback(resolve, reject) { return function (err, data, res) { if (err) { return reject(err); } resolve({ data: data, status: res.statusCode, headers: res.headers, res: res }); }; } // exports.TIMEOUT = ms('5s'); exports.TIMEOUTS = [ms('300s'), ms('300s')]; const TEXT_DATA_TYPES = ['json', 'text']; exports.request = function request(url, args, callback) { // request(url, callback) if (arguments.length === 2 && typeof args === 'function') { callback = args; args = null; } if (typeof callback === 'function') { return exports.requestWithCallback(url, args, callback); } return new Promise(function (resolve, reject) { exports.requestWithCallback(url, args, makeCallback(resolve, reject)); }); }; exports.requestWithCallback = function requestWithCallback(url, args, callback) { if (!url || (typeof url !== 'string' && typeof url !== 'object')) { const msg = util.format('expect request url to be a string or a http request options, but got' + ' %j', url); throw new Error(msg); } if (arguments.length === 2 && typeof args === 'function') { callback = args; args = null; } args = args || {}; if (REQUEST_ID >= MAX_VALUE) { REQUEST_ID = 0; } const reqId = ++REQUEST_ID; args.requestUrls = args.requestUrls || []; const reqMeta = { requestId: reqId, url: url, args: args, ctx: args.ctx }; if (args.emitter) { args.emitter.emit('request', reqMeta); } args.timeout = args.timeout || exports.TIMEOUTS; args.maxRedirects = args.maxRedirects || 10; args.streaming = args.streaming || args.customResponse; const requestStartTime = Date.now(); let parsedUrl; if (typeof url === 'string') { if (!PROTO_RE.test(url)) { // Support `request('www.server.com')` url = 'https://' + url; } parsedUrl = urlutil.parse(url); } else { parsedUrl = url; } const method = (args.type || args.method || parsedUrl.method || 'GET').toUpperCase(); let port = parsedUrl.port || 80; let httplib = http; let agent = getAgent(args.agent, exports.agent); const fixJSONCtlChars = args.fixJSONCtlChars; if (parsedUrl.protocol === 'https:') { httplib = https; agent = getAgent(args.httpsAgent, exports.httpsAgent); if (!parsedUrl.port) { port = 443; } } // request through proxy tunnel // var proxyTunnelAgent = detectProxyAgent(parsedUrl, args); // if (proxyTunnelAgent) { // agent = proxyTunnelAgent; // } const options = { host: parsedUrl.hostname || parsedUrl.host || 'localhost', path: parsedUrl.path || '/', method: method, port: port, agent: agent, headers: args.headers || {}, // default is dns.lookup // https://github.com/nodejs/node/blob/master/lib/net.js#L986 // custom dnslookup require node >= 4.0.0 // https://github.com/nodejs/node/blob/archived-io.js-v0.12/lib/net.js#L952 lookup: args.lookup }; if (Array.isArray(args.timeout)) { options.requestTimeout = args.timeout[args.timeout.length - 1]; } else if (typeof args.timeout !== 'undefined') { options.requestTimeout = args.timeout; } // const sslNames = [ // 'pfx', // 'key', // 'passphrase', // 'cert', // 'ca', // 'ciphers', // 'rejectUnauthorized', // 'secureProtocol', // 'secureOptions', // ]; // for (let i = 0; i < sslNames.length; i++) { // const name = sslNames[i]; // if (args.hasOwnProperty(name)) { // options[name] = args[name]; // } // } // don't check ssl // if (options.rejectUnauthorized === false && !options.hasOwnProperty('secureOptions')) { // options.secureOptions = require('constants').SSL_OP_NO_TLSv1_2; // } const auth = args.auth || parsedUrl.auth; if (auth) { options.auth = auth; } // content undefined data 有值 let body = args.content || args.data; const dataAsQueryString = method === 'GET' || method === 'HEAD' || args.dataAsQueryString; if (!args.content) { if (body && !(typeof body === 'string' || Buffer.isBuffer(body))) { if (dataAsQueryString) { // read: GET, HEAD, use query string body = args.nestedQuerystring ? qs.stringify(body) : querystring.stringify(body); } else { let contentType = options.headers['Content-Type'] || options.headers['content-type']; // auto add application/x-www-form-urlencoded when using urlencode form request if (!contentType) { if (args.contentType === 'json') { contentType = 'application/json'; } else { contentType = 'application/x-www-form-urlencoded'; } options.headers['Content-Type'] = contentType; } if (parseContentType(contentType) === 'application/json') { body = JSON.stringify(body); } else { // 'application/x-www-form-urlencoded' body = args.nestedQuerystring ? qs.stringify(body) : querystring.stringify(body); } } } } // if it's a GET or HEAD request, data should be sent as query string if (dataAsQueryString && body) { options.path += (parsedUrl.query ? '&' : '?') + body; body = null; } let requestSize = 0; if (body) { let length = body.length; if (!Buffer.isBuffer(body)) { length = Buffer.byteLength(body); } requestSize = options.headers['Content-Length'] = length; } if (args.dataType === 'json') { options.headers.Accept = 'application/json'; } if (typeof args.beforeRequest === 'function') { // you can use this hook to change every thing. args.beforeRequest(options); } let connectTimer = null; let responseTimer = null; let __err = null; let connected = false; // socket connected or not let keepAliveSocket = false; // request with keepalive socket let responseSize = 0; let statusCode = -1; let responseAborted = false; let remoteAddress = ''; let remotePort = ''; let timing = null; if (args.timing) { timing = { // socket assigned queuing: 0, // dns lookup time dnslookup: 0, // socket connected connected: 0, // request sent requestSent: 0, // Time to first byte (TTFB) waiting: 0, contentDownload: 0 }; } function cancelConnectTimer() { if (connectTimer) { clearTimeout(connectTimer); connectTimer = null; } } function cancelResponseTimer() { if (responseTimer) { clearTimeout(responseTimer); responseTimer = null; } } function done(err, data, res) { cancelResponseTimer(); if (!callback) { console.warn( '[urllib:warn] [%s] [%s] [worker:%s] %s %s callback twice!!!', Date(), reqId, process.pid, options.method, url ); // https://github.com/node-modules/urllib/pull/30 if (err) { console.warn( '[urllib:warn] [%s] [%s] [worker:%s] %s: %s\nstack: %s', Date(), reqId, process.pid, err.name, err.message, err.stack ); } return; } const cb = callback; callback = null; let headers = {}; if (res) { statusCode = res.statusCode; headers = res.headers; } // handle digest auth // if (statusCode === 401 && headers['www-authenticate'] // && (!args.headers || !args.headers.Authorization) && args.digestAuth) { // const authenticate = headers['www-authenticate']; // if (authenticate.indexOf('Digest ') >= 0) { // debug('Request#%d %s: got digest auth header WWW-Authenticate: %s', reqId, url, authenticate); // args.headers = args.headers || {}; // args.headers.Authorization = digestAuthHeader(options.method, options.path, authenticate, args.digestAuth); // debug('Request#%d %s: auth with digest header: %s', reqId, url, args.headers.Authorization); // if (res.headers['set-cookie']) { // args.headers.Cookie = res.headers['set-cookie'].join(';'); // } // return exports.requestWithCallback(url, args, cb); // } // } const requestUseTime = Date.now() - requestStartTime; if (timing) { timing.contentDownload = requestUseTime; } debug( '[%sms] done, %s bytes HTTP %s %s %s %s, keepAliveSocket: %s, timing: %j', requestUseTime, responseSize, statusCode, options.method, options.host, options.path, keepAliveSocket, timing ); const response = { status: statusCode, statusCode: statusCode, headers: headers, size: responseSize, aborted: responseAborted, rt: requestUseTime, keepAliveSocket: keepAliveSocket, data: data, requestUrls: args.requestUrls, timing: timing, remoteAddress: remoteAddress, remotePort: remotePort }; if (err) { let agentStatus = ''; if (agent && typeof agent.getCurrentStatus === 'function') { // add current agent status to error message for logging and debug agentStatus = ', agent status: ' + JSON.stringify(agent.getCurrentStatus()); } err.message += ', ' + options.method + ' ' + url + ' ' + statusCode + ' (connected: ' + connected + ', keepalive socket: ' + keepAliveSocket + agentStatus + ')' + '\nheaders: ' + JSON.stringify(headers); err.data = data; err.path = options.path; err.status = statusCode; err.headers = headers; err.res = response; } cb(err, data, args.streaming ? res : response); if (args.emitter) { // keep to use the same reqMeta object on request event before reqMeta.url = url; reqMeta.socket = req && req.connection; reqMeta.options = options; reqMeta.size = requestSize; args.emitter.emit('response', { requestId: reqId, error: err, ctx: args.ctx, req: reqMeta, res: response }); } } function handleRedirect(res) { let err = null; if (args.followRedirect && statuses.redirect[res.statusCode]) { // handle redirect args._followRedirectCount = (args._followRedirectCount || 0) + 1; const location = res.headers.location; if (!location) { err = new Error('Got statusCode ' + res.statusCode + ' but cannot resolve next location from headers'); err.name = 'FollowRedirectError'; } else if (args._followRedirectCount > args.maxRedirects) { err = new Error('Exceeded maxRedirects. Probably stuck in a redirect loop ' + url); err.name = 'MaxRedirectError'; } else { const newUrl = args.formatRedirectUrl ? args.formatRedirectUrl(url, location) : urlutil.resolve(url, location); debug('Request#%d %s: `redirected` from %s to %s', reqId, options.path, url, newUrl); // make sure timer stop cancelResponseTimer(); // should clean up headers.Host on `location: http://other-domain/url` if (args.headers && args.headers.Host && PROTO_RE.test(location)) { args.headers.Host = null; } // avoid done will be execute in the future change. const cb = callback; callback = null; exports.requestWithCallback(newUrl, args, cb); return { redirect: true, error: null }; } } return { redirect: false, error: err }; } if (args.gzip) { if (!options.headers['Accept-Encoding'] && !options.headers['accept-encoding']) { options.headers['Accept-Encoding'] = 'gzip'; } } function decodeContent(res, body, cb) { const encoding = res.headers['content-encoding']; // if (body.length === 0) { // return cb(null, body, encoding); // } // if (!encoding || encoding.toLowerCase() !== 'gzip') { return cb(null, body, encoding); // } // debug('gunzip %d length body', body.length); // zlib.gunzip(body, cb); } const writeStream = args.writeStream; debug('Request#%d %s %s with headers %j, options.path: %s', reqId, method, url, options.headers, options.path); args.requestUrls.push(url); function onResponse(res) { if (timing) { timing.waiting = Date.now() - requestStartTime; } debug('Request#%d %s `req response` event emit: status %d, headers: %j', reqId, url, res.statusCode, res.headers); if (args.streaming) { const result = handleRedirect(res); if (result.redirect) { res.resume(); return; } if (result.error) { res.resume(); return done(result.error, null, res); } return done(null, null, res); } res.on('close', function () { debug('Request#%d %s: `res close` event emit, total size %d', reqId, url, responseSize); }); res.on('error', function () { debug('Request#%d %s: `res error` event emit, total size %d', reqId, url, responseSize); }); res.on('aborted', function () { responseAborted = true; debug('Request#%d %s: `res aborted` event emit, total size %d', reqId, url, responseSize); }); if (writeStream) { // If there's a writable stream to recieve the response data, just pipe the // response stream to that writable stream and call the callback when it has // finished writing. // // NOTE that when the response stream `res` emits an 'end' event it just // means that it has finished piping data to another stream. In the // meanwhile that writable stream may still writing data to the disk until // it emits a 'close' event. // // That means that we should not apply callback until the 'close' of the // writable stream is emited. // // See also: // - https://github.com/TBEDP/urllib/commit/959ac3365821e0e028c231a5e8efca6af410eabb // - http://nodejs.org/api/stream.html#stream_event_end // - http://nodejs.org/api/stream.html#stream_event_close_1 const result = handleRedirect(res); if (result.redirect) { res.resume(); return; } if (result.error) { res.resume(); // end ths stream first writeStream.end(); return done(result.error, null, res); } // you can set consumeWriteStream false that only wait response end if (args.consumeWriteStream === false) { res.on('end', done.bind(null, null, null, res)); } else { // node 0.10, 0.12: only emit res aborted, writeStream close not fired // if (isNode010 || isNode012) { // first([ // [ writeStream, 'close' ], // [ res, 'aborted' ], // ], function(_, stream, event) { // debug('Request#%d %s: writeStream or res %s event emitted', reqId, url, event); // done(__err || null, null, res); // }); if (false) { } else { writeStream.on('close', function () { debug('Request#%d %s: writeStream close event emitted', reqId, url); done(__err || null, null, res); }); } } return res.pipe(writeStream); } // Otherwise, just concat those buffers. // // NOTE that the `chunk` is not a String but a Buffer. It means that if // you simply concat two chunk with `+` you're actually converting both // Buffers into Strings before concating them. It'll cause problems when // dealing with multi-byte characters. // // The solution is to store each chunk in an array and concat them with // 'buffer-concat' when all chunks is recieved. // // See also: // http://cnodejs.org/topic/4faf65852e8fb5bc65113403 const chunks = []; res.on('data', function (chunk) { debug('Request#%d %s: `res data` event emit, size %d', reqId, url, chunk.length); responseSize += chunk.length; chunks.push(chunk); }); res.on('end', function () { const body = Buffer.concat(chunks, responseSize); debug('Request#%d %s: `res end` event emit, total size %d, _dumped: %s', reqId, url, responseSize, res._dumped); if (__err) { // req.abort() after `res data` event emit. return done(__err, body, res); } const result = handleRedirect(res); if (result.error) { return done(result.error, body, res); } if (result.redirect) { return; } decodeContent(res, body, function (err, data, encoding) { if (err) { return done(err, body, res); } // if body not decode, dont touch it if (!encoding && TEXT_DATA_TYPES.indexOf(args.dataType) >= 0) { // try to decode charset try { data = decodeBodyByCharset(data, res); } catch (e) { debug('decodeBodyByCharset error: %s', e); // if error, dont touch it return done(null, data, res); } if (args.dataType === 'json') { if (responseSize === 0) { data = null; } else { const r = parseJSON(data, fixJSONCtlChars); if (r.error) { err = r.error; } else { data = r.data; } } } } if (responseAborted) { // err = new Error('Remote socket was terminated before `response.end()` was called'); // err.name = 'RemoteSocketClosedError'; debug('Request#%d %s: Remote socket was terminated before `response.end()` was called', reqId, url); } done(err, data, res); }); }); } let connectTimeout, responseTimeout; if (Array.isArray(args.timeout)) { connectTimeout = ms(args.timeout[0]); responseTimeout = ms(args.timeout[1]); } else { // set both timeout equal connectTimeout = responseTimeout = ms(args.timeout); } debug('ConnectTimeout: %d, ResponseTimeout: %d', connectTimeout, responseTimeout); function startConnectTimer() { debug('Connect timer ticking, timeout: %d', connectTimeout); connectTimer = setTimeout(function () { connectTimer = null; if (statusCode === -1) { statusCode = -2; } let msg = 'Connect timeout for ' + connectTimeout + 'ms'; let errorName = 'ConnectionTimeoutError'; if (!req.socket) { errorName = 'SocketAssignTimeoutError'; msg += ', working sockets is full'; } __err = new Error(msg); __err.name = errorName; __err.requestId = reqId; debug('ConnectTimeout: Request#%d %s %s: %s, connected: %s', reqId, url, __err.name, msg, connected); abortRequest(); }, connectTimeout); } function startResposneTimer() { debug('Response timer ticking, timeout: %d', responseTimeout); responseTimer = setTimeout(function () { responseTimer = null; const msg = 'Response timeout for ' + responseTimeout + 'ms'; const errorName = 'ResponseTimeoutError'; __err = new Error(msg); __err.name = errorName; __err.requestId = reqId; debug('ResponseTimeout: Request#%d %s %s: %s, connected: %s', reqId, url, __err.name, msg, connected); abortRequest(); }, responseTimeout); } let req; // request headers checker will throw error options.mode = args.mode ? args.mode : ''; try { req = httplib.request(options, onResponse); } catch (err) { return done(err); } // environment detection: browser or nodejs if (typeof window === 'undefined') { // start connect timer just after `request` return, and just in nodejs environment startConnectTimer(); } else { req.on('requestTimeout', function () { if (statusCode === -1) { statusCode = -2; } const msg = 'Connect timeout for ' + connectTimeout + 'ms'; const errorName = 'ConnectionTimeoutError'; __err = new Error(msg); __err.name = errorName; __err.requestId = reqId; abortRequest(); }); } function abortRequest() { debug('Request#%d %s abort, connected: %s', reqId, url, connected); // it wont case error event when req haven't been assigned a socket yet. if (!req.socket) { __err.noSocket = true; done(__err); } req.abort(); } if (timing) { // request sent req.on('finish', function () { timing.requestSent = Date.now() - requestStartTime; }); } req.once('socket', function (socket) { if (timing) { // socket queuing time timing.queuing = Date.now() - requestStartTime; } // https://github.com/nodejs/node/blob/master/lib/net.js#L377 // https://github.com/nodejs/node/blob/v0.10.40-release/lib/net.js#L352 // should use socket.socket on 0.10.x // if (isNode010 && socket.socket) { // socket = socket.socket; // } const readyState = socket.readyState; if (readyState === 'opening') { socket.once('lookup', function (err, ip, addressType) { debug('Request#%d %s lookup: %s, %s, %s', reqId, url, err, ip, addressType); if (timing) { timing.dnslookup = Date.now() - requestStartTime; } if (ip) { remoteAddress = ip; } }); socket.once('connect', function () { if (timing) { // socket connected timing.connected = Date.now() - requestStartTime; } // cancel socket timer at first and start tick for TTFB cancelConnectTimer(); startResposneTimer(); debug('Request#%d %s new socket connected', reqId, url); connected = true; if (!remoteAddress) { remoteAddress = socket.remoteAddress; } remotePort = socket.remotePort; }); return; } debug('Request#%d %s reuse socket connected, readyState: %s', reqId, url, readyState); connected = true; keepAliveSocket = true; if (!remoteAddress) { remoteAddress = socket.remoteAddress; } remotePort = socket.remotePort; // reuse socket, timer should be canceled. cancelConnectTimer(); startResposneTimer(); }); req.on('error', function (err) { //TypeError for browser fetch api, Error for browser xmlhttprequest api if (err.name === 'Error' || err.name === 'TypeError') { err.name = connected ? 'ResponseError' : 'RequestError'; } err.message += ' (req "error")'; debug('Request#%d %s `req error` event emit, %s: %s', reqId, url, err.name, err.message); done(__err || err); }); if (writeStream) { writeStream.once('error', function (err) { err.message += ' (writeStream "error")'; __err = err; debug('Request#%d %s `writeStream error` event emit, %s: %s', reqId, url, err.name, err.message); abortRequest(); }); } if (args.stream) { args.stream.pipe(req); args.stream.once('error', function (err) { err.message += ' (stream "error")'; __err = err; debug('Request#%d %s `readStream error` event emit, %s: %s', reqId, url, err.name, err.message); abortRequest(); }); } else { req.end(body); } req.requestId = reqId; return req; };