-
Notifications
You must be signed in to change notification settings - Fork 107
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #11913 from vkuznet/fix-issue-11899
New DBSConcurrency module for concurrent execution of HTTP queries to DBS via pycurl manager
- Loading branch information
Showing
3 changed files
with
112 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#!/usr/bin/env python | ||
""" | ||
File : DBSConcurrency.py | ||
Author : Valentin Kuznetsov <vkuznet AT gmail dot com> | ||
Description: dedicated module to holds DBS related functions executed | ||
concurrent calls to DBS APIs. | ||
""" | ||
|
||
import json | ||
import urllib | ||
from WMCore.Services.pycurl_manager import getdata as multi_getdata | ||
|
||
|
||
def getBlockInfo4PU(blockNames, ckey, cert): | ||
""" | ||
Fetch block information details, file list and number of events, from DBS | ||
server. Here we use concrete set of parameters for DBS to use in this case, i.e. | ||
we must look-up only valid files and get full details from the DBS API (in order | ||
to get number of events). | ||
:param blockNames: list of block names | ||
:return: dictionary of {block: {"FileList": list of strings, "NumberOfEvents": integer}, ...} | ||
""" | ||
urls = [] | ||
for blk in blockNames: | ||
# need to encode block name properly | ||
block = urllib.parse.quote_plus(blk) | ||
url = f"https://cmsweb-prod.cern.ch/dbs/prod/global/DBSReader/files?detail=true&validFileOnly=1&block_name={block}" | ||
urls.append(url) | ||
# place concurrent calls to DBS, please note that multi_getdata is generator, therefore | ||
# it does not put DBS results into the memory until this generator is iterated | ||
results = multi_getdata(urls, ckey, cert) | ||
# parse output of getdata in some form | ||
blockInfo = {} | ||
for row in results: | ||
blk = row['url'].split('block_name=')[-1] | ||
block = urllib.parse.unquote_plus(blk) | ||
data = json.loads(row['data']) | ||
files = [r['logical_file_name'] for r in data] | ||
nevents = sum([r['event_count'] for r in data]) | ||
blockInfo[block] = {'FileList': files, 'NumberOfEvents': nevents} | ||
return blockInfo |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
#!/usr/bin/env python | ||
""" | ||
_DBSReader_t_ | ||
Unit test for the DBSConcurrency module | ||
""" | ||
|
||
import os | ||
import json | ||
import time | ||
import logging | ||
import unittest | ||
|
||
# WMCore modules | ||
from WMCore.Services.DBS.DBSConcurrency import getBlockInfo4PU | ||
from WMCore.Services.pycurl_manager import getdata as multi_getdata | ||
from WMQuality.Emulators.EmulatedUnitTestCase import EmulatedUnitTestCase | ||
|
||
|
||
class DBSConcurrencyTest(EmulatedUnitTestCase): | ||
""" | ||
DBSConcurrencyTest class defines unit tests for DBS concurrent codebase | ||
""" | ||
|
||
def setUp(self): | ||
""" | ||
Initialization function | ||
""" | ||
|
||
self.dbs = 'https://cmsweb-testbed.cern.ch/dbs/int/global/DBSReader' | ||
self.ckey = os.getenv('X509_USER_KEY') | ||
self.cert = os.getenv('X509_USER_CERT') | ||
logging.basicConfig() | ||
self.logger = logging.getLogger() | ||
self.logger.setLevel(logging.DEBUG) | ||
|
||
def testGetBlockInfoList(self): | ||
""" | ||
Unit test for getBlockInfo4PU function | ||
""" | ||
time0 = time.time() | ||
dataset = '/ZMM_13TeV_TuneCP5-pythia8/RunIIAutumn18DR-SNB_102X_upgrade2018_realistic_v17-v2/AODSIM' | ||
url = f"{self.dbs}/blocks?dataset={dataset}" | ||
self.logger.info(url) | ||
results = multi_getdata([url], self.ckey, self.cert) | ||
blocks = [] | ||
for row in results: | ||
data = json.loads(row['data']) | ||
blocks = [r['block_name'] for r in data] | ||
elapsedTime = time.time() - time0 | ||
self.logger.debug("for %s get %d in %s seconds", dataset, len(blocks), elapsedTime) | ||
self.assertTrue(len(blocks), 2) | ||
# call to DBS should be resovled within 1 second | ||
self.assertTrue(elapsedTime < 3) | ||
|
||
time0 = time.time() | ||
blockInfoList = getBlockInfo4PU(blocks, self.ckey, self.cert) | ||
for blk, row in blockInfoList.items(): | ||
self.logger.debug("block %s, nfiles=%d, nevents=%d", blk, len(row['FileList']), row['NumberOfEvents']) | ||
elapsedTime = time.time() - time0 | ||
self.logger.debug("Elapsed time: %d seconds", elapsedTime) | ||
# NOTE: if every DBS call spent 1 second, avg time for 10 calls will be around 1 second | ||
# therefore, we will test quite low number here | ||
self.assertTrue(elapsedTime < 3) | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |