diff --git a/beacon/connections/mongo/analyses.py b/beacon/connections/mongo/analyses.py index a78cf02..4dd8d79 100644 --- a/beacon/connections/mongo/analyses.py +++ b/beacon/connections/mongo/analyses.py @@ -1,3 +1,4 @@ + from beacon.request.parameters import RequestParams from beacon.response.schemas import DefaultSchemas import yaml @@ -23,7 +24,7 @@ def get_analyses(self, entry_id: Optional[str], qparams: RequestParams, dataset: elif query_parameters == {'$and': []}:# pragma: no cover query_parameters = {} query={} - query = apply_filters(self, query, qparams.query.filters, collection, query_parameters) + query = apply_filters(self, query, qparams.query.filters, collection, query_parameters, dataset) schema = DefaultSchemas.ANALYSES include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -39,7 +40,7 @@ def get_analysis_with_id(self, entry_id: Optional[str], qparams: RequestParams, collection = 'analyses' idq="biosampleId" mongo_collection = client.beacon.analyses - query = apply_filters(self, {}, qparams.query.filters, collection, {}) + query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset) query = query_id(self, query, entry_id) schema = DefaultSchemas.ANALYSES include = qparams.query.include_resultset_responses @@ -55,11 +56,38 @@ def get_variants_of_analysis(self, entry_id: Optional[str], qparams: RequestPara collection = 'analyses' mongo_collection = client.beacon.genomicVariations query = {"$and": [{"id": entry_id}]} - query = apply_filters(self, query, qparams.query.filters, collection, {}) + query = apply_filters(self, query, qparams.query.filters, collection, {}, dataset) analysis_ids = client.beacon.analyses \ .find_one(query, {"biosampleId": 1, "_id": 0}) - query = {"caseLevelData.biosampleId": analysis_ids["biosampleId"]} - query = apply_filters(self, query, qparams.query.filters, collection, {}) + targets = client.beacon.targets \ + .find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0}) + position=0 + bioids=targets[0]["biosampleIds"] + for bioid in bioids: + if bioid == analysis_ids["biosampleId"]: + break + position+=1 + position=str(position) + position1="^"+position+"," + position2=","+position+"," + position3=","+position+"$" + query_cl={ "$or": [ + {"biosampleIds": {"$regex": position1}}, + {"biosampleIds": {"$regex": position2}}, + {"biosampleIds": {"$regex": position3}} + ]} + string_of_ids = client.beacon.caseLevelData \ + .find(query_cl, {"id": 1, "_id": 0}) + HGVSIds=list(string_of_ids) + query={} + queryHGVS={} + listHGVS=[] + for HGVSId in HGVSIds: + justid=HGVSId["id"] + listHGVS.append(justid) + queryHGVS["$in"]=listHGVS + query["identifiers.genomicHGVSId"]=queryHGVS + query = apply_filters(self, query, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.GENOMICVARIATIONS include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -67,5 +95,4 @@ def get_variants_of_analysis(self, entry_id: Optional[str], qparams: RequestPara if limit > 100 or limit == 0: limit = 100# pragma: no cover idq="caseLevelData.biosampleId" - count, dataset_count, docs = get_docs_by_response_type(self, include, query, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs, dataset \ No newline at end of file + count, dataset_count, docs = get_docs_by_response_type(self, include, query, dataset, limit, skip, mongo_collection, idq) \ No newline at end of file diff --git a/beacon/connections/mongo/biosamples.py b/beacon/connections/mongo/biosamples.py index 18fe0ff..0d5b0e1 100644 --- a/beacon/connections/mongo/biosamples.py +++ b/beacon/connections/mongo/biosamples.py @@ -23,7 +23,7 @@ def get_biosamples(self, entry_id: Optional[str], qparams: RequestParams, datase elif query_parameters == {'$and': []}:# pragma: no cover query_parameters = {} query={} - query = apply_filters(self, query, qparams.query.filters, collection, query_parameters) + query = apply_filters(self, query, qparams.query.filters, collection, query_parameters, dataset) schema = DefaultSchemas.BIOSAMPLES include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -38,7 +38,7 @@ def get_biosamples(self, entry_id: Optional[str], qparams: RequestParams, datase def get_biosample_with_id(self, entry_id: Optional[str], qparams: RequestParams, dataset: str): collection = 'biosamples' mongo_collection = client.beacon.biosamples - query = apply_filters(self, {}, qparams.query.filters, collection, {}) + query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset) query = query_id(self, query, entry_id) schema = DefaultSchemas.BIOSAMPLES include = qparams.query.include_resultset_responses @@ -54,8 +54,35 @@ def get_biosample_with_id(self, entry_id: Optional[str], qparams: RequestParams, def get_variants_of_biosample(self, entry_id: Optional[str], qparams: RequestParams, dataset: str): collection = 'g_variants' mongo_collection = client.beacon.genomicVariations - query = {"caseLevelData.biosampleId": entry_id} - query = apply_filters(self, query, qparams.query.filters, collection, {}) + targets = client.beacon.targets \ + .find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0}) + position=0 + bioids=targets[0]["biosampleIds"] + for bioid in bioids: + if bioid == entry_id: + break + position+=1 + position=str(position) + position1="^"+position+"," + position2=","+position+"," + position3=","+position+"$" + query_cl={ "$or": [ + {"biosampleIds": {"$regex": position1}}, + {"biosampleIds": {"$regex": position2}}, + {"biosampleIds": {"$regex": position3}} + ]} + string_of_ids = client.beacon.caseLevelData \ + .find(query_cl, {"id": 1, "_id": 0}) + HGVSIds=list(string_of_ids) + query={} + queryHGVS={} + listHGVS=[] + for HGVSId in HGVSIds: + justid=HGVSId["id"] + listHGVS.append(justid) + queryHGVS["$in"]=listHGVS + query["identifiers.genomicHGVSId"]=queryHGVS + query = apply_filters(self, query, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.GENOMICVARIATIONS include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -71,7 +98,7 @@ def get_analyses_of_biosample(self, entry_id: Optional[str], qparams: RequestPar collection = 'biosamples' mongo_collection = client.beacon.analyses query = {"biosampleId": entry_id} - query = apply_filters(self, query, qparams.query.filters, collection, {}) + query = apply_filters(self, query, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.ANALYSES include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -87,7 +114,7 @@ def get_runs_of_biosample(self, entry_id: Optional[str], qparams: RequestParams, collection = 'biosamples' mongo_collection = client.beacon.runs query = {"individualId": entry_id} - query = apply_filters(self, query, qparams.query.filters, collection, {}) + query = apply_filters(self, query, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.RUNS include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -95,5 +122,4 @@ def get_runs_of_biosample(self, entry_id: Optional[str], qparams: RequestParams, if limit > 100 or limit == 0: limit = 100# pragma: no cover idq="biosampleId" - count, dataset_count, docs = get_docs_by_response_type(self, include, query, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs, dataset \ No newline at end of file + count, dataset_count, docs = get_docs_by_response_type(self, include, query, dataset, limit, skip, mongo_collection, idq) \ No newline at end of file diff --git a/beacon/connections/mongo/cohorts.py b/beacon/connections/mongo/cohorts.py index a0697ae..c75cedd 100644 --- a/beacon/connections/mongo/cohorts.py +++ b/beacon/connections/mongo/cohorts.py @@ -13,7 +13,7 @@ def get_cohorts(self, entry_id: Optional[str], qparams: RequestParams): collection = 'cohorts' limit = qparams.query.pagination.limit - query = apply_filters(self, {}, qparams.query.filters, collection, {}) + query = apply_filters(self, {}, qparams.query.filters, collection, {}, "a") schema = DefaultSchemas.COHORTS count = get_count(self, client.beacon.cohorts, query) docs = get_documents(self, @@ -31,7 +31,7 @@ def get_cohorts(self, entry_id: Optional[str], qparams: RequestParams): def get_cohort_with_id(self, entry_id: Optional[str], qparams: RequestParams): collection = 'cohorts' limit = qparams.query.pagination.limit - query = apply_filters(self, {}, qparams.query.filters, collection, {}) + query = apply_filters(self, {}, qparams.query.filters, collection, {}, "a") query = query_id(self, query, entry_id) schema = DefaultSchemas.COHORTS count = get_count(self, client.beacon.cohorts, query) @@ -53,12 +53,12 @@ def get_individuals_of_cohort(self, entry_id: Optional[str], qparams: RequestPar dataset_count=0 limit = qparams.query.pagination.limit include = qparams.query.include_resultset_responses - query = apply_filters(self, {}, qparams.query.filters, collection, {}) + query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset) query = query_id(self, query, entry_id) count = get_count(self, client.beacon.cohorts, query) dict_in={} dict_in['datasetId']=dataset - query = apply_filters(self, dict_in, qparams.query.filters, collection, {}) + query = apply_filters(self, dict_in, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.INDIVIDUALS skip = qparams.query.pagination.skip @@ -75,12 +75,12 @@ def get_analyses_of_cohort(self, entry_id: Optional[str], qparams: RequestParams dataset_count=0 limit = qparams.query.pagination.limit include = qparams.query.include_resultset_responses - query = apply_filters(self, {}, qparams.query.filters, collection, {}) + query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset) query = query_id(self, query, entry_id) count = get_count(self, client.beacon.cohorts, query) dict_in={} dict_in['datasetId']=dataset - query = apply_filters(self, dict_in, qparams.query.filters, collection, {}) + query = apply_filters(self, dict_in, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.ANALYSES skip = qparams.query.pagination.skip if limit > 100 or limit == 0: @@ -96,7 +96,7 @@ def get_variants_of_cohort(self,entry_id: Optional[str], qparams: RequestParams, dataset_count=0 limit = qparams.query.pagination.limit include = qparams.query.include_resultset_responses - query = apply_filters(self, {}, qparams.query.filters, collection, {}) + query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset) query = query_id(self, query, entry_id) count = get_count(self, client.beacon.cohorts, query) query_count={} @@ -117,7 +117,7 @@ def get_variants_of_cohort(self,entry_id: Optional[str], qparams: RequestParams, else: schema = DefaultSchemas.GENOMICVARIATIONS# pragma: no cover return schema, 0, 0, None, dataset# pragma: no cover - query = apply_filters(self, query_count, qparams.query.filters, collection, {}) + query = apply_filters(self, query_count, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.GENOMICVARIATIONS skip = qparams.query.pagination.skip if limit > 100 or limit == 0: @@ -133,12 +133,12 @@ def get_runs_of_cohort(self, entry_id: Optional[str], qparams: RequestParams, da dataset_count=0 limit = qparams.query.pagination.limit include = qparams.query.include_resultset_responses - query = apply_filters(self, {}, qparams.query.filters, collection, {}) + query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset) query = query_id(self, query, entry_id) count = get_count(self, client.beacon.cohorts, query) dict_in={} dict_in['datasetId']=dataset - query = apply_filters(self, dict_in, qparams.query.filters, collection, {}) + query = apply_filters(self, dict_in, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.RUNS skip = qparams.query.pagination.skip if limit > 100 or limit == 0: @@ -154,12 +154,12 @@ def get_biosamples_of_cohort(self, entry_id: Optional[str], qparams: RequestPara dataset_count=0 limit = qparams.query.pagination.limit include = qparams.query.include_resultset_responses - query = apply_filters(self, {}, qparams.query.filters, collection, {}) + query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset) query = query_id(self, query, entry_id) count = get_count(self, client.beacon.cohorts, query) dict_in={} dict_in['datasetId']=dataset - query = apply_filters(self, dict_in, qparams.query.filters, collection, {}) + query = apply_filters(self, dict_in, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.BIOSAMPLES skip = qparams.query.pagination.skip if limit > 100 or limit == 0: diff --git a/beacon/connections/mongo/datasets.py b/beacon/connections/mongo/datasets.py index 2f0d5d1..0a88bb4 100644 --- a/beacon/connections/mongo/datasets.py +++ b/beacon/connections/mongo/datasets.py @@ -91,7 +91,7 @@ def get_variants_of_dataset(self, entry_id: Optional[str], qparams: RequestParam else: schema = DefaultSchemas.GENOMICVARIATIONS# pragma: no cover return schema, 0, 0, None, dataset# pragma: no cover - query = apply_filters(self, query_count, qparams.query.filters, collection, {}) + query = apply_filters(self, query_count, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.GENOMICVARIATIONS include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -107,12 +107,12 @@ def get_biosamples_of_dataset(self, entry_id: Optional[str], qparams: RequestPar mongo_collection = client.beacon.biosamples dataset_count=0 limit = qparams.query.pagination.limit - query = apply_filters(self, {}, qparams.query.filters, collection, {}) + query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset) query = query_id(self, query, entry_id) count = get_count(self, client.beacon.datasets, query) dict_in={} dict_in['datasetId']=dataset - query = apply_filters(self, dict_in, qparams.query.filters, collection, {}) + query = apply_filters(self, dict_in, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.BIOSAMPLES include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -129,12 +129,12 @@ def get_individuals_of_dataset(self, entry_id: Optional[str], qparams: RequestPa mongo_collection = client.beacon.individuals dataset_count=0 limit = qparams.query.pagination.limit - query = apply_filters(self, {}, qparams.query.filters, collection, {}) + query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset) query = query_id(self, query, entry_id) count = get_count(self, client.beacon.datasets, query) dict_in={} dict_in['datasetId']=dataset - query = apply_filters(self, dict_in, qparams.query.filters, collection, {}) + query = apply_filters(self, dict_in, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.INDIVIDUALS include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -151,12 +151,12 @@ def get_runs_of_dataset(self, entry_id: Optional[str], qparams: RequestParams, d mongo_collection = client.beacon.runs dataset_count=0 limit = qparams.query.pagination.limit - query = apply_filters(self, {}, qparams.query.filters, collection, {}) + query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset) query = query_id(self, query, entry_id) count = get_count(self, client.beacon.datasets, query) dict_in={} dict_in['datasetId']=dataset - query = apply_filters(self, dict_in, qparams.query.filters, collection, {}) + query = apply_filters(self, dict_in, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.RUNS include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -177,12 +177,12 @@ def get_analyses_of_dataset(self, entry_id: Optional[str], qparams: RequestParam mongo_collection = client.beacon.analyses dataset_count=0 limit = qparams.query.pagination.limit - query = apply_filters(self, {}, qparams.query.filters, collection, {}) + query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset) query = query_id(self, query, entry_id) count = get_count(self, client.beacon.datasets, query) dict_in={} dict_in['datasetId']=dataset - query = apply_filters(self, dict_in, qparams.query.filters, collection, {}) + query = apply_filters(self, dict_in, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.ANALYSES include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit diff --git a/beacon/connections/mongo/extract_filtering_terms.py b/beacon/connections/mongo/extract_filtering_terms.py index 30ef598..0bed779 100644 --- a/beacon/connections/mongo/extract_filtering_terms.py +++ b/beacon/connections/mongo/extract_filtering_terms.py @@ -232,7 +232,7 @@ def insert_all_ontology_terms_used(): collections.remove('filtering_terms') print("Collections:", collections) for c_name in collections: - if c_name not in ['counts', 'similarities', 'synonyms']: + if c_name not in ['counts', 'similarities', 'synonyms', 'caseLevelData', 'targets']: terms_ids = find_ontology_terms_used(c_name) terms = get_filtering_object(terms_ids, c_name) if len(terms) > 0: diff --git a/beacon/connections/mongo/filters.py b/beacon/connections/mongo/filters.py index 8fb0367..4d7f9a4 100644 --- a/beacon/connections/mongo/filters.py +++ b/beacon/connections/mongo/filters.py @@ -11,31 +11,18 @@ @log_with_args(level) -def cross_query(self, query: dict, scope: str, collection: str, request_parameters: dict): +def cross_query(self, query: dict, scope: str, collection: str, request_parameters: dict, dataset: str): if scope == 'genomicVariation' and collection == 'g_variants' or scope == collection[0:-1]: subquery={} subquery["$or"]=[] if request_parameters != {}: - biosample_ids = client.beacon.genomicVariations.find(request_parameters, {"caseLevelData.biosampleId": 1, "_id": 0}) - final_id='caseLevelData.biosampleId' - original_id="biosampleId" - def_list=[] - for iditem in biosample_ids: - if isinstance(iditem, dict): - if iditem != {}: - for id_item in iditem['caseLevelData']: - if id_item != {}: - new_id={} - new_id[final_id] = id_item[original_id] - try: - subquery['$or'].append(new_id) - except Exception:# pragma: no cover - def_list.append(new_id) - subquery={} - subquery['$or']=def_list + HGVSIds = client.beacon.genomicVariations.find(request_parameters, {"identifiers.genomicHGVSId": 1, "_id": 0}) + HGVSIds=list(HGVSIds) + HGVSId=HGVSIds[0]["identifiers"]["genomicHGVSId"] + queryHGVSId={"datasetId": dataset, "id": HGVSId} try: query["$and"] = [] - query["$and"].append(subquery) + query["$and"].append(queryHGVSId) except Exception:# pragma: no cover pass else: @@ -44,24 +31,52 @@ def cross_query(self, query: dict, scope: str, collection: str, request_paramete mongo_collection=client.beacon.individuals original_id="id" join_ids=list(join_query(self, mongo_collection, query, original_id)) + LOG.debug(join_ids) + ''' final_id="individualId" for id_item in join_ids: new_id={} new_id[final_id] = id_item.pop(original_id) def_list.append(new_id) + query={} query['$or']=def_list mongo_collection=client.beacon.biosamples original_id="id" join_ids2=list(join_query(self, mongo_collection, query, original_id)) - def_list=[] - final_id="caseLevelData.biosampleId" - for id_item in join_ids2: + LOG.debug(join_ids2) + ''' + targets = client.beacon.targets \ + .find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0}) + bioids=targets[0]["biosampleIds"] + positions_list=[] + for id_item in join_ids: new_id={} - new_id[final_id] = id_item.pop(original_id) - def_list.append(new_id) + biosampleId=id_item.pop(original_id) + position=bioids.index(biosampleId) + positions_list.append(position) + LOG.debug(positions_list) + query_cl={} + query_cl["$or"]=[] + for position in positions_list: + position=str(position) + position1="^"+position+"," + position2=","+position+"," + position3=","+position+"$" + query_cl["$or"].append({"biosampleIds": {"$regex": position1}}) + query_cl["$or"].append({"biosampleIds": {"$regex": position2}}) + query_cl["$or"].append({"biosampleIds": {"$regex": position3}}) + string_of_ids = client.beacon.caseLevelData \ + .find(query_cl, {"id": 1, "_id": 0}) + HGVSIds=list(string_of_ids) query={} - query['$or']=def_list + queryHGVS={} + listHGVS=[] + for HGVSId in HGVSIds: + justid=HGVSId["id"] + listHGVS.append(justid) + queryHGVS["$in"]=listHGVS + query["identifiers.genomicHGVSId"]=queryHGVS elif scope == 'individual' and collection in ['runs','biosamples', 'analyses']: mongo_collection=client.beacon.individuals original_id="id" @@ -74,149 +89,296 @@ def cross_query(self, query: dict, scope: str, collection: str, request_paramete query={} query['$or']=def_list elif scope == 'genomicVariation' and collection == 'individuals': - biosample_ids = client.beacon.genomicVariations.find(query, {"caseLevelData.biosampleId": 1, "_id": 0}) - final_id='id' - original_id="biosampleId" - def_list=[] - for iditem in biosample_ids: - if isinstance(iditem, dict): - if iditem != {}: - for id_item in iditem['caseLevelData']: - if id_item != {}: - new_id={} - new_id[final_id] = id_item[original_id] - try: - query['$or'].append(new_id) - except Exception:# pragma: no cover - def_list.append(new_id) - if def_list != []: - try:# pragma: no cover - query['$or'].def_list - except Exception:# pragma: no cover - query={} - query['$or']=def_list - mongo_collection=client.beacon.biosamples - original_id="individualId" - join_ids2=list(join_query(self, mongo_collection, query, original_id)) - def_list=[] - final_id="id" - for id_item in join_ids2: - new_id={} - new_id[final_id] = id_item.pop(original_id) - def_list.append(new_id) - query={} - query['$or']=def_list - if def_list != []: + HGVSIds = client.beacon.genomicVariations \ + .find(query, {"identifiers.genomicHGVSId": 1, "_id": 0}) + HGVSIds=list(HGVSIds) + HGVSId=HGVSIds[0]["identifiers"]["genomicHGVSId"] + queryHGVSId={"datasetId": dataset, "id": HGVSId} + string_of_ids = client.beacon.caseLevelData \ + .find(queryHGVSId, {"biosampleIds": 1, "_id": 0}) + targets = client.beacon.targets \ + .find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0}) + targets=list(targets) + list_of_targets=targets[0]["biosampleIds"] + list_of_positions_strings= string_of_ids[0]['biosampleIds'].split(',') + biosampleIds=[] + for position in list_of_positions_strings: + if position != '': + biosampleIds.append(list_of_targets[int(position)]) + try: + finalquery={} + finalquery["$or"]=[] + for finalid in biosampleIds: + query = {"id": finalid} + finalquery["$or"].append(query) + individual_id = client.beacon.biosamples \ + .find(finalquery, {"individualId": 1, "_id": 0}) try: - query['$or'].def_list - except Exception: - query={} - query['$or']=def_list - elif scope == 'genomicVariation' and collection in ['analyses', 'biosamples', 'runs']: - biosample_ids = client.beacon.genomicVariations.find(query, {"caseLevelData.biosampleId": 1, "_id": 0}) - if collection == 'biosamples': - final_id='id' - else: - final_id='biosampleId'# pragma: no cover - original_id="biosampleId" - def_list=[] - for iditem in biosample_ids: - if isinstance(iditem, dict): - if iditem != {}: - for id_item in iditem['caseLevelData']: - if id_item != {}: - new_id={} - new_id[final_id] = id_item[original_id] - try: - query['$or'].append(new_id) - except Exception:# pragma: no cover - def_list.append(new_id) - if def_list != []: - try:# pragma: no cover - query['$or'].def_list + finalids=[] + for indid in individual_id: + finalids.append(indid["individualId"]) except Exception:# pragma: no cover - query={} - query['$or']=def_list + finalids=[] + if finalids==[]: + finalids=biosampleIds + except Exception: + finalids=biosampleIds + query={} + query["$or"]=[] + for finalid in finalids: + finalquery = {"id": finalid} + query["$or"].append(finalquery) + elif scope == 'genomicVariation' and collection == 'biosamples': + HGVSIds = client.beacon.genomicVariations \ + .find(query, {"identifiers.genomicHGVSId": 1, "_id": 0}) + HGVSIds=list(HGVSIds) + HGVSId=HGVSIds[0]["identifiers"]["genomicHGVSId"] + queryHGVSId={"datasetId": dataset, "id": HGVSId} + string_of_ids = client.beacon.caseLevelData \ + .find(queryHGVSId, {"biosampleIds": 1, "_id": 0}) + targets = client.beacon.targets \ + .find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0}) + targets=list(targets) + list_of_targets=targets[0]["biosampleIds"] + list_of_positions_strings= string_of_ids[0]['biosampleIds'].split(',') + biosampleIds=[] + for position in list_of_positions_strings: + if position != '': + biosampleIds.append(list_of_targets[int(position)]) + finalids=biosampleIds + try: + finalids=[] + for bioid in biosampleIds: + finalids.append({"biosampleId": bioid}) + except Exception:# pragma: no cover + finalids=[] + query = {"$and": [{"$or": finalids}]} + elif scope == 'genomicVariation' and collection in ['analyses','runs']: + HGVSIds = client.beacon.genomicVariations \ + .find(query, {"identifiers.genomicHGVSId": 1, "_id": 0}) + HGVSIds=list(HGVSIds) + HGVSId=HGVSIds[0]["identifiers"]["genomicHGVSId"] + queryHGVSId={"datasetId": dataset, "id": HGVSId} + string_of_ids = client.beacon.caseLevelData \ + .find(queryHGVSId, {"biosampleIds": 1, "_id": 0}) + targets = client.beacon.targets \ + .find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0}) + targets=list(targets) + list_of_targets=targets[0]["biosampleIds"] + list_of_positions_strings= string_of_ids[0]['biosampleIds'].split(',') + biosampleIds=[] + for position in list_of_positions_strings: + if position != '': + biosampleIds.append(list_of_targets[int(position)]) + finalids=biosampleIds + try: + finalids=[] + for bioid in biosampleIds: + finalids.append({"id": bioid}) + except Exception:# pragma: no cover + finalids=[] + query = {"$and": [{"$or": finalids}]} elif scope == 'run' and collection != 'runs': mongo_collection=client.beacon.runs if collection == 'g_variants': original_id="biosampleId" join_ids=list(join_query(self, mongo_collection, query, original_id)) - final_id="caseLevelData.biosampleId" + targets = client.beacon.targets \ + .find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0}) + bioids=targets[0]["biosampleIds"] + positions_list=[] + for id_item in join_ids: + new_id={} + biosampleId=id_item.pop(original_id) + position=bioids.index(biosampleId) + positions_list.append(position) + query_cl={} + query_cl["$or"]=[] + for position in positions_list: + position=str(position) + position1="^"+position+"," + position2=","+position+"," + position3=","+position+"$" + query_cl["$or"].append({"biosampleIds": {"$regex": position1}}) + query_cl["$or"].append({"biosampleIds": {"$regex": position2}}) + query_cl["$or"].append({"biosampleIds": {"$regex": position3}}) + string_of_ids = client.beacon.caseLevelData \ + .find(query_cl, {"id": 1, "_id": 0}) + HGVSIds=list(string_of_ids) + query={} + queryHGVS={} + listHGVS=[] + for HGVSId in HGVSIds: + justid=HGVSId["id"] + listHGVS.append(justid) + queryHGVS["$in"]=listHGVS + query["identifiers.genomicHGVSId"]=queryHGVS elif collection == 'individuals': original_id="individualId" join_ids=list(join_query(self, mongo_collection, query, original_id)) final_id="id" + for id_item in join_ids: + new_id={} + new_id[final_id] = id_item.pop(original_id) + def_list.append(new_id) + query={} + query['$or']=def_list elif collection == 'analyses': original_id="biosampleId" join_ids=list(join_query(self, mongo_collection, query, original_id)) final_id="biosampleId" + for id_item in join_ids: + new_id={} + new_id[final_id] = id_item.pop(original_id) + def_list.append(new_id) + query={} + query['$or']=def_list elif collection == 'biosamples': original_id="biosampleId" join_ids=list(join_query(self, mongo_collection, query, original_id)) final_id="id" - for id_item in join_ids: - new_id={} - new_id[final_id] = id_item.pop(original_id) - def_list.append(new_id) - query={} - query['$or']=def_list + for id_item in join_ids: + new_id={} + new_id[final_id] = id_item.pop(original_id) + def_list.append(new_id) + query={} + query['$or']=def_list elif scope == 'analyse' and collection != 'analyses':# pragma: no cover mongo_collection=client.beacon.analyses if collection == 'g_variants': original_id="biosampleId" join_ids=list(join_query(self, mongo_collection, query, original_id)) - final_id="caseLevelData.biosampleId" + targets = client.beacon.targets \ + .find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0}) + bioids=targets[0]["biosampleIds"] + positions_list=[] + for id_item in join_ids: + new_id={} + biosampleId=id_item.pop(original_id) + position=bioids.index(biosampleId) + positions_list.append(position) + query_cl={} + query_cl["$or"]=[] + for position in positions_list: + position=str(position) + position1="^"+position+"," + position2=","+position+"," + position3=","+position+"$" + query_cl["$or"].append({"biosampleIds": {"$regex": position1}}) + query_cl["$or"].append({"biosampleIds": {"$regex": position2}}) + query_cl["$or"].append({"biosampleIds": {"$regex": position3}}) + string_of_ids = client.beacon.caseLevelData \ + .find(query_cl, {"id": 1, "_id": 0}) + HGVSIds=list(string_of_ids) + query={} + queryHGVS={} + listHGVS=[] + for HGVSId in HGVSIds: + justid=HGVSId["id"] + listHGVS.append(justid) + queryHGVS["$in"]=listHGVS + query["identifiers.genomicHGVSId"]=queryHGVS elif collection == 'individuals': original_id="individualId" join_ids=list(join_query(self, mongo_collection, query, original_id)) final_id="id" + for id_item in join_ids: + new_id={} + new_id[final_id] = id_item.pop(original_id) + def_list.append(new_id) + query={} + query['$or']=def_list elif collection == 'runs': original_id="biosampleId" join_ids=list(join_query(self, mongo_collection, query, original_id)) final_id="biosampleId" + for id_item in join_ids: + new_id={} + new_id[final_id] = id_item.pop(original_id) + def_list.append(new_id) + query={} + query['$or']=def_list elif collection == 'biosamples': original_id="biosampleId" join_ids=list(join_query(self, mongo_collection, query, original_id)) final_id="id" - for id_item in join_ids: - new_id={} - new_id[final_id] = id_item.pop(original_id) - def_list.append(new_id) - query={} - query['$or']=def_list + for id_item in join_ids: + new_id={} + new_id[final_id] = id_item.pop(original_id) + def_list.append(new_id) + query={} + query['$or']=def_list elif scope == 'biosample' and collection != 'biosamples': mongo_collection=client.beacon.biosamples if collection == 'g_variants': original_id="id" join_ids=list(join_query(self, mongo_collection, query, original_id)) - final_id="caseLevelData.biosampleId" + targets = client.beacon.targets \ + .find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0}) + bioids=targets[0]["biosampleIds"] + positions_list=[] + for id_item in join_ids: + new_id={} + biosampleId=id_item.pop(original_id) + position=bioids.index(biosampleId) + positions_list.append(position) + query_cl={} + query_cl["$or"]=[] + for position in positions_list: + position=str(position) + position1="^"+position+"," + position2=","+position+"," + position3=","+position+"$" + query_cl["$or"].append({"biosampleIds": {"$regex": position1}}) + query_cl["$or"].append({"biosampleIds": {"$regex": position2}}) + query_cl["$or"].append({"biosampleIds": {"$regex": position3}}) + string_of_ids = client.beacon.caseLevelData \ + .find(query_cl, {"id": 1, "_id": 0}) + HGVSIds=list(string_of_ids) + query={} + queryHGVS={} + listHGVS=[] + for HGVSId in HGVSIds: + justid=HGVSId["id"] + listHGVS.append(justid) + queryHGVS["$in"]=listHGVS + query["identifiers.genomicHGVSId"]=queryHGVS elif collection == 'individuals': original_id="individualId" join_ids=list(join_query(self, mongo_collection, query, original_id)) final_id="id" + for id_item in join_ids: + new_id={} + new_id[final_id] = id_item.pop(original_id) + def_list.append(new_id) + query={} + query['$or']=def_list elif collection == 'analyses': original_id="id" join_ids=list(join_query(self, mongo_collection, query, original_id)) final_id="biosampleId" + for id_item in join_ids: + new_id={} + new_id[final_id] = id_item.pop(original_id) + def_list.append(new_id) + query={} + query['$or']=def_list elif collection == 'runs': original_id="id" join_ids=list(join_query(self, mongo_collection, query, original_id)) final_id="biosampleId" - query={} - query['$or']=def_list - for id_item in join_ids: - new_id={} - new_id[final_id] = id_item.pop(original_id) - def_list.append(new_id) - query={} - query['$or']=def_list + for id_item in join_ids: + new_id={} + new_id[final_id] = id_item.pop(original_id) + def_list.append(new_id) + query={} + query['$or']=def_list return query @log_with_args(level) -def apply_filters(self, query: dict, filters: List[dict], collection: str, query_parameters: dict) -> dict: +def apply_filters(self, query: dict, filters: List[dict], collection: str, query_parameters: dict, dataset: str) -> dict: request_parameters = query_parameters total_query={} if len(filters) >= 1: @@ -227,155 +389,125 @@ def apply_filters(self, query: dict, filters: List[dict], collection: str, query partial_query = {} if "value" in filter: filter = AlphanumericFilter(**filter) - partial_query = apply_alphanumeric_filter(self, partial_query, filter, collection) + partial_query = apply_alphanumeric_filter(self, partial_query, filter, collection, dataset) elif "includeDescendantTerms" not in filter and '.' not in filter["id"] and filter["id"].isupper(): filter=OntologyFilter(**filter) filter.include_descendant_terms=True - partial_query = apply_ontology_filter(self, partial_query, filter, collection, request_parameters) + partial_query = apply_ontology_filter(self, partial_query, filter, collection, request_parameters, dataset) elif "similarity" in filter or "includeDescendantTerms" in filter or re.match(CURIE_REGEX, filter["id"]) and filter["id"].isupper(): filter = OntologyFilter(**filter)# pragma: no cover partial_query = apply_ontology_filter(self, partial_query, filter, collection, request_parameters)# pragma: no cover else: filter = CustomFilter(**filter) - partial_query = apply_custom_filter(self, partial_query, filter, collection) + partial_query = apply_custom_filter(self, partial_query, filter, collection, dataset) total_query["$and"].append(partial_query) if total_query["$and"] == [{'$or': []}] or total_query['$and'] == []: total_query = {}# pragma: no cover if request_parameters != {}: try: - if len(request_parameters["$or"]) >= 1:# pragma: no cover - array_of_biosamples2=[] - array_of_biosamples=[] - for reqpam in request_parameters["$or"]: - biosample_ids = client.beacon.genomicVariations.find(reqpam, {"caseLevelData.biosampleId": 1, "_id": 0}) - for biosample in biosample_ids: - for bioitem in biosample['caseLevelData']: - if bioitem not in array_of_biosamples2: - array_of_biosamples2.append(bioitem["biosampleId"]) - array_of_biosamples.append(array_of_biosamples2) - array_of_biosamples2=[] - - dict_counts={} - for list_bio in array_of_biosamples: - for item in list_bio: - if item not in array_of_biosamples2: - array_of_biosamples2.append(item) - try: - dict_counts[item]+=1 - except Exception: - dict_counts[item]=1 - partial_query={} - partial_query['$or']=[] - for item in array_of_biosamples2: - if dict_counts[item] == len(request_parameters["$or"]): - partial_query['$or'].append({"id": item}) - - mongo_collection=client.beacon.biosamples - original_id="individualId" - join_ids2=list(join_query(self, mongo_collection, partial_query, original_id)) - def_list=[] - final_id="id" - for id_item in join_ids2: - new_id={} - new_id[final_id] = id_item.pop(original_id) - def_list.append(new_id) - partial_query={} - partial_query['$or']=def_list - - try: - total_query["$and"].append(partial_query) - except Exception: - total_query["$and"]=[] - total_query["$and"].append(partial_query) - except Exception:# pragma: no cover if collection == 'individuals': - partial_query = {} - biosample_ids = client.beacon.genomicVariations.find(request_parameters, {"caseLevelData.biosampleId": 1, "_id": 0}) - final_id='id' - original_id="biosampleId" - def_list=[] - partial_query['$or']=[] - for iditem in biosample_ids: - if isinstance(iditem, dict): - if iditem != {}: - for id_item in iditem['caseLevelData']: - if id_item != {}: - new_id={} - new_id[final_id] = id_item[original_id] - try: - partial_query['$or'].append(new_id) - except Exception: - def_list.append(new_id) - - mongo_collection=client.beacon.biosamples - original_id="individualId" - join_ids2=list(join_query(self, mongo_collection, partial_query, original_id)) - def_list=[] - final_id="id" - for id_item in join_ids2: - new_id={} - new_id[final_id] = id_item.pop(original_id) - def_list.append(new_id) - partial_query={} - partial_query['$or']=def_list - if def_list != []: + HGVSIds = client.beacon.genomicVariations \ + .find(query, {"identifiers.genomicHGVSId": 1, "_id": 0}) + HGVSIds=list(HGVSIds) + HGVSId=HGVSIds[0]["identifiers"]["genomicHGVSId"] + queryHGVSId={"datasetId": dataset, "id": HGVSId} + string_of_ids = client.beacon.caseLevelData \ + .find(queryHGVSId, {"biosampleIds": 1, "_id": 0}) + targets = client.beacon.targets \ + .find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0}) + targets=list(targets) + list_of_targets=targets[0]["biosampleIds"] + list_of_positions_strings= string_of_ids[0]['biosampleIds'].split(',') + biosampleIds=[] + for position in list_of_positions_strings: + if position != '': + biosampleIds.append(list_of_targets[int(position)]) + try: + finalquery={} + finalquery["$or"]=[] + for finalid in biosampleIds: + query = {"id": finalid} + finalquery["$or"].append(query) + individual_id = client.beacon.biosamples \ + .find(finalquery, {"individualId": 1, "_id": 0}) try: - partial_query['$or']=def_list - except Exception: - partial_query={} - partial_query['$or']=def_list + finalids=[] + for indid in individual_id: + finalids.append(indid["individualId"]) + except Exception:# pragma: no cover + finalids=[] + if finalids==[]: + finalids=biosampleIds + except Exception: + finalids=biosampleIds + finalquery={} + finalquery["$or"]=[] + for finalid in finalids: + query = {"id": finalid} + finalquery["$or"].append(query) try: - total_query["$and"].append(partial_query) + total_query["$and"].append(finalquery) except Exception: total_query["$and"]=[] - total_query["$and"].append(partial_query) + total_query["$and"].append(finalquery) elif collection == 'biosamples': - partial_query = {} - biosample_ids = client.beacon.genomicVariations.find(request_parameters, {"caseLevelData.biosampleId": 1, "_id": 0}) - final_id='id' - original_id="biosampleId" - def_list=[] - partial_query['$or']=[] - for iditem in biosample_ids: - if isinstance(iditem, dict): - if iditem != {}: - for id_item in iditem['caseLevelData']: - if id_item != {}: - new_id={} - new_id[final_id] = id_item[original_id] - try: - partial_query['$or'].append(new_id) - except Exception: - def_list.append(new_id) + HGVSIds = client.beacon.genomicVariations \ + .find(query, {"identifiers.genomicHGVSId": 1, "_id": 0}) + HGVSIds=list(HGVSIds) + HGVSId=HGVSIds[0]["identifiers"]["genomicHGVSId"] + queryHGVSId={"datasetId": dataset, "id": HGVSId} + string_of_ids = client.beacon.caseLevelData \ + .find(queryHGVSId, {"biosampleIds": 1, "_id": 0}) + targets = client.beacon.targets \ + .find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0}) + targets=list(targets) + list_of_targets=targets[0]["biosampleIds"] + list_of_positions_strings= string_of_ids[0]['biosampleIds'].split(',') + biosampleIds=[] + for position in list_of_positions_strings: + if position != '': + biosampleIds.append(list_of_targets[int(position)]) + finalids=biosampleIds try: - total_query["$and"].append(partial_query) + finalids=[] + for bioid in biosampleIds: + finalids.append({"id": bioid}) + except Exception:# pragma: no cover + finalids=[] + try: + total_query["$and"].append({"$or": finalids}) except Exception: total_query["$and"]=[] - total_query["$and"].append(partial_query) + total_query["$and"].append({"$or": finalids}) elif collection == 'analyses' or collection == 'runs': - partial_query = {} - biosample_ids = client.beacon.genomicVariations.find(request_parameters, {"caseLevelData.biosampleId": 1, "_id": 0}) - final_id='biosampleId' - original_id="biosampleId" - def_list=[] - partial_query['$or']=[] - for iditem in biosample_ids: - if isinstance(iditem, dict): - if iditem != {}: - for id_item in iditem['caseLevelData']: - if id_item != {}: - new_id={} - new_id[final_id] = id_item[original_id] - try: - partial_query['$or'].append(new_id) - except Exception: - def_list.append(new_id) + HGVSIds = client.beacon.genomicVariations \ + .find(query, {"identifiers.genomicHGVSId": 1, "_id": 0}) + HGVSIds=list(HGVSIds) + HGVSId=HGVSIds[0]["identifiers"]["genomicHGVSId"] + queryHGVSId={"datasetId": dataset, "id": HGVSId} + string_of_ids = client.beacon.caseLevelData \ + .find(queryHGVSId, {"biosampleIds": 1, "_id": 0}) + targets = client.beacon.targets \ + .find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0}) + targets=list(targets) + list_of_targets=targets[0]["biosampleIds"] + list_of_positions_strings= string_of_ids[0]['biosampleIds'].split(',') + biosampleIds=[] + for position in list_of_positions_strings: + if position != '': + biosampleIds.append(list_of_targets[int(position)]) try: - total_query["$and"].append(partial_query) + finalids=[] + for bioid in biosampleIds: + finalids.append({"biosampleId": bioid}) + except Exception:# pragma: no cover + finalids=[] + try: + total_query["$and"].append({"$or": finalids}) except Exception: total_query["$and"]=[] - total_query["$and"].append(partial_query) + total_query["$and"].append({"$or": finalids}) else: try: total_query["$and"].append(request_parameters) @@ -384,13 +516,15 @@ def apply_filters(self, query: dict, filters: List[dict], collection: str, query total_query["$and"].append(request_parameters) if total_query["$and"] == [{'$or': []}] or total_query['$and'] == []: total_query = {} + except Exception: + pass if total_query == {} and query != {}: total_query=query return total_query @log_with_args(level) -def apply_ontology_filter(self, query: dict, filter: OntologyFilter, collection: str, request_parameters: dict) -> dict: +def apply_ontology_filter(self, query: dict, filter: OntologyFilter, collection: str, request_parameters: dict, dataset: str) -> dict: final_term_list=[] query_synonyms={} query_synonyms['id']=filter.id @@ -565,7 +699,7 @@ def apply_ontology_filter(self, query: dict, filter: OntologyFilter, collection: new_query['$or'].append(query_id) query = new_query - query=cross_query(self, query, scope, collection, request_parameters) + query=cross_query(self, query, scope, collection, request_parameters, dataset) if is_filter_id_required:# pragma: no cover @@ -615,7 +749,7 @@ def apply_ontology_filter(self, query: dict, filter: OntologyFilter, collection: new_query['$or'].append(query_id) new_query['$or'].append(query) query = new_query - query=cross_query(self, query, scope, collection, request_parameters) + query=cross_query(self, query, scope, collection, request_parameters, dataset) return query @@ -653,7 +787,7 @@ def format_operator(self, operator: Operator) -> str: return "$lte" @log_with_args(level) -def apply_alphanumeric_filter(self, query: dict, filter: AlphanumericFilter, collection: str) -> dict: +def apply_alphanumeric_filter(self, query: dict, filter: AlphanumericFilter, collection: str, dataset: str) -> dict: scope = filter.scope if scope is None and collection != 'g_variants': scope = collection[0:-1] @@ -753,7 +887,7 @@ def apply_alphanumeric_filter(self, query: dict, filter: AlphanumericFilter, col query_id={} query_id[query_term]=regex_dict query['$or'].append(query_id) - query=cross_query(self, query, scope, collection, {}) + query=cross_query(self, query, scope, collection, {}, dataset) else: try: @@ -766,7 +900,7 @@ def apply_alphanumeric_filter(self, query: dict, filter: AlphanumericFilter, col query_id={} query_id[query_term]=filter.value query['$or'].append(query_id) - query=cross_query(self, query, scope, collection, {}) + query=cross_query(self, query, scope, collection, {}, dataset) elif formatted_operator == "$ne": @@ -820,7 +954,7 @@ def apply_alphanumeric_filter(self, query: dict, filter: AlphanumericFilter, col dict_in={} dict_in["$in"]=new_age_list query[filter.id] = dict_in - query=cross_query(self, query, scope, collection, {}) + query=cross_query(self, query, scope, collection, {}, dataset) elif '<' in filter.operator: age_in_number="" for char in filter.value: @@ -841,7 +975,7 @@ def apply_alphanumeric_filter(self, query: dict, filter: AlphanumericFilter, col dict_in={} dict_in["$in"]=new_age_list query[filter.id] = dict_in - query=cross_query(self, query, scope, collection, {}) + query=cross_query(self, query, scope, collection, {}, dataset) else: query_filtering={} query_filtering['$and']=[] @@ -877,12 +1011,12 @@ def apply_alphanumeric_filter(self, query: dict, filter: AlphanumericFilter, col dict_measures={} dict_measures[measuresfield]=dict_elemmatch query = dict_measures - query=cross_query(self, query, scope, collection, {}) + query=cross_query(self, query, scope, collection, {}, dataset) return query @log_with_args(level) -def apply_custom_filter(self, query: dict, filter: CustomFilter, collection:str) -> dict: +def apply_custom_filter(self, query: dict, filter: CustomFilter, collection:str, dataset: str) -> dict: scope = filter.scope if scope is None and collection != 'g_variants': scope = collection[0:-1] @@ -894,6 +1028,6 @@ def apply_custom_filter(self, query: dict, filter: CustomFilter, collection:str) else: query_term = value_splitted[0] + '.label' query[query_term]=value_splitted[1] - query=cross_query(self, query, scope, collection, {}) + query=cross_query(self, query, scope, collection, {}, dataset) return query diff --git a/beacon/connections/mongo/g_variants.py b/beacon/connections/mongo/g_variants.py index 92ac82d..396ba7e 100644 --- a/beacon/connections/mongo/g_variants.py +++ b/beacon/connections/mongo/g_variants.py @@ -23,7 +23,7 @@ def get_variants(self, entry_id: Optional[str], qparams: RequestParams, dataset: elif query_parameters == {'$and': []}: query_parameters = {} query={} - query = apply_filters(self, query, qparams.query.filters, collection,query_parameters) + query = apply_filters(self, query, qparams.query.filters, collection,query_parameters, dataset) include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit skip = qparams.query.pagination.skip @@ -47,7 +47,7 @@ def get_variant_with_id(self, entry_id: Optional[str], qparams: RequestParams, d query_parameters={}# pragma: no cover else: query=query_parameters - query = apply_filters(self, query, qparams.query.filters, collection, {}) + query = apply_filters(self, query, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.GENOMICVARIATIONS include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -69,19 +69,32 @@ def get_biosamples_of_variant(self, entry_id: Optional[str], qparams: RequestPar query_parameters={}# pragma: no cover else: query=query_parameters - query = apply_filters(self, query, qparams.query.filters, collection,query_parameters) - biosample_ids = client.beacon.genomicVariations \ - .find(query, {"caseLevelData.biosampleId": 1, "_id": 0}) - biosample_ids=list(biosample_ids) - biosample_id=biosample_ids[0]["caseLevelData"] + query = apply_filters(self, query, qparams.query.filters, collection,query_parameters, dataset) + HGVSIds = client.beacon.genomicVariations \ + .find(query, {"identifiers.genomicHGVSId": 1, "_id": 0}) + HGVSIds=list(HGVSIds) + HGVSId=HGVSIds[0]["identifiers"]["genomicHGVSId"] + queryHGVSId={"datasetId": dataset, "id": HGVSId} + string_of_ids = client.beacon.caseLevelData \ + .find(queryHGVSId, {"biosampleIds": 1, "_id": 0}) + targets = client.beacon.targets \ + .find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0}) + targets=list(targets) + list_of_targets=targets[0]["biosampleIds"] + list_of_positions_strings= string_of_ids[0]['biosampleIds'].split(',') + biosampleIds=[] + for position in list_of_positions_strings: + if position != '': + biosampleIds.append(list_of_targets[int(position)]) + finalids=biosampleIds try: finalids=[] - for bioid in biosample_id: - finalids.append({"id": bioid["biosampleId"]}) + for bioid in biosampleIds: + finalids.append({"id": bioid}) except Exception:# pragma: no cover finalids=[] query = {"$and": [{"$or": finalids}]} - query = apply_filters(self, query, qparams.query.filters, collection, {}) + query = apply_filters(self, query, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.BIOSAMPLES include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -103,19 +116,31 @@ def get_runs_of_variant(self, entry_id: Optional[str], qparams: RequestParams, d query_parameters={}# pragma: no cover else: query=query_parameters - query = apply_filters(self, query, qparams.query.filters, collection,query_parameters) - biosample_ids = client.beacon.genomicVariations \ - .find(query, {"caseLevelData.biosampleId": 1, "_id": 0}) - biosample_ids=list(biosample_ids) - biosample_id=biosample_ids[0]["caseLevelData"] + query = apply_filters(self, query, qparams.query.filters, collection,query_parameters, dataset) + HGVSIds = client.beacon.genomicVariations \ + .find(query, {"identifiers.genomicHGVSId": 1, "_id": 0}) + HGVSIds=list(HGVSIds) + HGVSId=HGVSIds[0]["identifiers"]["genomicHGVSId"] + queryHGVSId={"datasetId": dataset, "id": HGVSId} + string_of_ids = client.beacon.caseLevelData \ + .find(queryHGVSId, {"biosampleIds": 1, "_id": 0}) + targets = client.beacon.targets \ + .find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0}) + targets=list(targets) + list_of_targets=targets[0]["biosampleIds"] + list_of_positions_strings= string_of_ids[0]['biosampleIds'].split(',') + biosampleIds=[] + for position in list_of_positions_strings: + if position != '': + biosampleIds.append(list_of_targets[int(position)]) try: finalids=[] - for bioid in biosample_id: - finalids.append(bioid) + for bioid in biosampleIds: + finalids.append({"biosampleId": bioid}) except Exception:# pragma: no cover finalids=[] query = {"$and": [{"$or": finalids}]} - query = apply_filters(self, query, qparams.query.filters, collection, {}) + query = apply_filters(self, query, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.RUNS include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -137,19 +162,31 @@ def get_analyses_of_variant(self, entry_id: Optional[str], qparams: RequestParam query_parameters={}# pragma: no cover else: query=query_parameters - query = apply_filters(self, query, qparams.query.filters, collection,query_parameters) - biosample_ids = client.beacon.genomicVariations \ - .find(query, {"caseLevelData.biosampleId": 1, "_id": 0}) - biosample_ids=list(biosample_ids) - biosample_id=biosample_ids[0]["caseLevelData"] + query = apply_filters(self, query, qparams.query.filters, collection,query_parameters, dataset) + HGVSIds = client.beacon.genomicVariations \ + .find(query, {"identifiers.genomicHGVSId": 1, "_id": 0}) + HGVSIds=list(HGVSIds) + HGVSId=HGVSIds[0]["identifiers"]["genomicHGVSId"] + queryHGVSId={"datasetId": dataset, "id": HGVSId} + string_of_ids = client.beacon.caseLevelData \ + .find(queryHGVSId, {"biosampleIds": 1, "_id": 0}) + targets = client.beacon.targets \ + .find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0}) + targets=list(targets) + list_of_targets=targets[0]["biosampleIds"] + list_of_positions_strings= string_of_ids[0]['biosampleIds'].split(',') + biosampleIds=[] + for position in list_of_positions_strings: + if position != '': + biosampleIds.append(list_of_targets[int(position)]) try: finalids=[] - for bioid in biosample_id: - finalids.append(bioid) + for bioid in biosampleIds: + finalids.append({"biosampleId": bioid}) except Exception:# pragma: no cover finalids=[] query = {"$and": [{"$or": finalids}]} - query = apply_filters(self, query, qparams.query.filters, collection, {}) + query = apply_filters(self, query, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.ANALYSES include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -171,30 +208,43 @@ def get_individuals_of_variant(self, entry_id: Optional[str], qparams: RequestPa query_parameters={}# pragma: no cover else: query=query_parameters - query = apply_filters(self, query, qparams.query.filters, collection,query_parameters) - biosample_ids = client.beacon.genomicVariations \ - .find(query, {"caseLevelData.biosampleId": 1, "_id": 0}) - biosample_ids=list(biosample_ids) - biosample_id=biosample_ids[0]["caseLevelData"] + query = apply_filters(self, query, qparams.query.filters, collection,query_parameters, dataset) + HGVSIds = client.beacon.genomicVariations \ + .find(query, {"identifiers.genomicHGVSId": 1, "_id": 0}) + HGVSIds=list(HGVSIds) + HGVSId=HGVSIds[0]["identifiers"]["genomicHGVSId"] + LOG.debug(HGVSId) + queryHGVSId={"datasetId": dataset, "id": HGVSId} + string_of_ids = client.beacon.caseLevelData \ + .find(queryHGVSId, {"biosampleIds": 1, "_id": 0}) + targets = client.beacon.targets \ + .find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0}) + targets=list(targets) + list_of_targets=targets[0]["biosampleIds"] + LOG.debug(string_of_ids[0]) + list_of_positions_strings= string_of_ids[0]['biosampleIds'].split(',') + biosampleIds=[] + for position in list_of_positions_strings: + if position != '': + biosampleIds.append(list_of_targets[int(position)]) try: - finalids=[] - for bioid in biosample_id: - finalids.append(bioid["biosampleId"]) - except Exception:# pragma: no cover - finalids=[] - finalquery={} - finalquery["$or"]=[] - for finalid in finalids: - query = {"id": finalid} - finalquery["$or"].append(query) - individual_id = client.beacon.biosamples \ - .find(finalquery, {"individualId": 1, "_id": 0}) - try: - finalids=[] - for indid in individual_id: - finalids.append(indid["individualId"]) - except Exception:# pragma: no cover - finalids=[] + finalquery={} + finalquery["$or"]=[] + for finalid in biosampleIds: + query = {"id": finalid} + finalquery["$or"].append(query) + individual_id = client.beacon.biosamples \ + .find(finalquery, {"individualId": 1, "_id": 0}) + try: + finalids=[] + for indid in individual_id: + finalids.append(indid["individualId"]) + except Exception:# pragma: no cover + finalids=[] + if finalids==[]: + finalids=biosampleIds + except Exception: + finalids=biosampleIds finalquery={} finalquery["$or"]=[] for finalid in finalids: @@ -202,7 +252,7 @@ def get_individuals_of_variant(self, entry_id: Optional[str], qparams: RequestPa finalquery["$or"].append(query) superfinalquery={} superfinalquery["$and"]=[finalquery] - query = apply_filters(self, superfinalquery, qparams.query.filters, collection, {}) + query = apply_filters(self, superfinalquery, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.INDIVIDUALS include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -211,4 +261,4 @@ def get_individuals_of_variant(self, entry_id: Optional[str], qparams: RequestPa limit = 100# pragma: no cover idq="id" count, dataset_count, docs = get_docs_by_response_type(self, include, query, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs, dataset + return schema, count, dataset_count, docs, dataset \ No newline at end of file diff --git a/beacon/connections/mongo/individuals.py b/beacon/connections/mongo/individuals.py index 228f60f..e39fc0a 100644 --- a/beacon/connections/mongo/individuals.py +++ b/beacon/connections/mongo/individuals.py @@ -3,7 +3,7 @@ import yaml from beacon.connections.mongo.__init__ import client from beacon.connections.mongo.utils import get_docs_by_response_type, query_id -from beacon.logs.logs import log_with_args +from beacon.logs.logs import log_with_args, LOG from beacon.conf.conf import level from beacon.connections.mongo.filters import apply_filters from beacon.connections.mongo.request_parameters import apply_request_parameters @@ -23,7 +23,7 @@ def get_individuals(self, entry_id: Optional[str], qparams: RequestParams, datas elif query_parameters == {'$and': []}: query_parameters = {} query={} - query = apply_filters(self, query, qparams.query.filters, collection, query_parameters) + query = apply_filters(self, query, qparams.query.filters, collection, query_parameters, dataset) schema = DefaultSchemas.INDIVIDUALS include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -40,7 +40,7 @@ def get_individual_with_id(self, entry_id: Optional[str], qparams: RequestParams idq="id" mongo_collection = client.beacon.individuals query, parameters_as_filters = apply_request_parameters(self, {}, qparams) - query = apply_filters(self, query, qparams.query.filters, collection, {}) + query = apply_filters(self, query, qparams.query.filters, collection, {}, dataset) query = query_id(self, query, entry_id) schema = DefaultSchemas.INDIVIDUALS include = qparams.query.include_resultset_responses @@ -54,14 +54,37 @@ def get_individual_with_id(self, entry_id: Optional[str], qparams: RequestParams @log_with_args(level) def get_variants_of_individual(self, entry_id: Optional[str], qparams: RequestParams, dataset: str): collection = 'g_variants' - query = {"individualId": entry_id} - mongo_collection = client.beacon.biosamples - excluding_fields={"_id": 0, "id": 1} - biosampleId=mongo_collection.find(query, excluding_fields) - query = {"caseLevelData.biosampleId": biosampleId[0]["id"]} + targets = client.beacon.targets \ + .find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0}) + position=0 + bioids=targets[0]["biosampleIds"] + for bioid in bioids: + if bioid == entry_id: + break + position+=1 + position=str(position) + position1="^"+position+"," + position2=","+position+"," + position3=","+position+"$" + query_cl={ "$or": [ + {"biosampleIds": {"$regex": position1}}, + {"biosampleIds": {"$regex": position2}}, + {"biosampleIds": {"$regex": position3}} + ]} + string_of_ids = client.beacon.caseLevelData \ + .find(query_cl, {"id": 1, "_id": 0}) + HGVSIds=list(string_of_ids) + query={} + queryHGVS={} + listHGVS=[] + for HGVSId in HGVSIds: + justid=HGVSId["id"] + listHGVS.append(justid) + queryHGVS["$in"]=listHGVS + query["identifiers.genomicHGVSId"]=queryHGVS mongo_collection = client.beacon.genomicVariations query, parameters_as_filters = apply_request_parameters(self, query, qparams) - query = apply_filters(self, query, qparams.query.filters, collection, {}) + query = apply_filters(self, query, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.GENOMICVARIATIONS include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -78,7 +101,7 @@ def get_biosamples_of_individual(self, entry_id: Optional[str], qparams: Request mongo_collection = client.beacon.biosamples query = {"individualId": entry_id} query, parameters_as_filters = apply_request_parameters(self, query, qparams) - query = apply_filters(self, query, qparams.query.filters, collection, {}) + query = apply_filters(self, query, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.BIOSAMPLES include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -87,4 +110,4 @@ def get_biosamples_of_individual(self, entry_id: Optional[str], qparams: Request limit = 100# pragma: no cover idq="id" count, dataset_count, docs = get_docs_by_response_type(self, include, query, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs, dataset + return schema, count, dataset_count, docs, dataset \ No newline at end of file diff --git a/beacon/connections/mongo/reindex.py b/beacon/connections/mongo/reindex.py index a40c041..c63cf9a 100644 --- a/beacon/connections/mongo/reindex.py +++ b/beacon/connections/mongo/reindex.py @@ -57,7 +57,7 @@ #client.beacon.genomicVariations.create_index([("caseLevelData.biosampleId", 1)]) #client.beacon.genomicVariations.create_index([("variation.location.interval.end.value", -1), ("variation.location.interval.start.value", 1)]) client.beacon.genomicVariations.create_index([("datasetId", 1)]) -client.beacon.genomicVariations.create_index([("variantInternalId", 1), ("caseLevelData.biosampleId", 1)]) +client.beacon.genomicVariations.create_index([("variantInternalId", 1)]) #client.beacon.genomicVariations.create_index([("identifiers.genomicHGVSId", 1), ("variation.location.interval.start.value", 1), ("caseLevelData.biosampleId", 1), ("variation.referenceBases", 1), ("variation.alternateBases", 1)]) client.beacon.genomicVariations.create_index([("variation.location.interval.end.value", -1), ("variation.location.interval.start.value", 1), ("variation.referenceBases", 1), ("variation.alternateBases", 1)]) client.beacon.genomicVariations.create_index([("datasetId", 1), ("variation.location.interval.start.value", 1), ("variation.referenceBases", 1), ("variation.alternateBases", 1)]) diff --git a/beacon/connections/mongo/runs.py b/beacon/connections/mongo/runs.py index 71e2e2e..ff754d6 100644 --- a/beacon/connections/mongo/runs.py +++ b/beacon/connections/mongo/runs.py @@ -20,7 +20,7 @@ def get_runs(self, entry_id: Optional[str], qparams: RequestParams, dataset: str query_parameters={}# pragma: no cover else: query={} - query = apply_filters(self, query, qparams.query.filters, collection, query_parameters) + query = apply_filters(self, query, qparams.query.filters, collection, query_parameters, dataset) schema = DefaultSchemas.RUNS include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -35,7 +35,7 @@ def get_runs(self, entry_id: Optional[str], qparams: RequestParams, dataset: str def get_run_with_id(self, entry_id: Optional[str], qparams: RequestParams, dataset: str): collection = 'runs' mongo_collection = client.beacon.runs - query = apply_filters(self, {}, qparams.query.filters, collection, {}) + query = apply_filters(self, {}, qparams.query.filters, collection, {}, dataset) query = query_id(self, query, entry_id) schema = DefaultSchemas.RUNS include = qparams.query.include_resultset_responses @@ -52,15 +52,38 @@ def get_variants_of_run(self, entry_id: Optional[str], qparams: RequestParams, d collection = 'runs' mongo_collection = client.beacon.genomicVariations query = {"$and": [{"id": entry_id}]} - query = apply_filters(self, query, qparams.query.filters, collection, {}) + query = apply_filters(self, query, qparams.query.filters, collection, {}, dataset) run_ids = client.beacon.runs \ .find_one(query, {"biosampleId": 1, "_id": 0}) - query = {"caseLevelData.biosampleId": run_ids["biosampleId"]} - queryid={} - queryid["datasetId"]=dataset - query["$or"]=[] - query["$or"].append(queryid) - query = apply_filters(self, query, qparams.query.filters, collection, {}) + targets = client.beacon.targets \ + .find({"datasetId": dataset}, {"biosampleIds": 1, "_id": 0}) + position=0 + bioids=targets[0]["biosampleIds"] + for bioid in bioids: + if bioid == run_ids["biosampleId"]: + break + position+=1 + position=str(position) + position1="^"+position+"," + position2=","+position+"," + position3=","+position+"$" + query_cl={ "$or": [ + {"biosampleIds": {"$regex": position1}}, + {"biosampleIds": {"$regex": position2}}, + {"biosampleIds": {"$regex": position3}} + ]} + string_of_ids = client.beacon.caseLevelData \ + .find(query_cl, {"id": 1, "_id": 0}) + HGVSIds=list(string_of_ids) + query={} + queryHGVS={} + listHGVS=[] + for HGVSId in HGVSIds: + justid=HGVSId["id"] + listHGVS.append(justid) + queryHGVS["$in"]=listHGVS + query["identifiers.genomicHGVSId"]=queryHGVS + query = apply_filters(self, query, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.GENOMICVARIATIONS include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -76,7 +99,7 @@ def get_analyses_of_run(self, entry_id: Optional[str], qparams: RequestParams, d collection = 'runs' mongo_collection = client.beacon.analyses query = {"runId": entry_id} - query = apply_filters(self, query, qparams.query.filters, collection, {}) + query = apply_filters(self, query, qparams.query.filters, collection, {}, dataset) schema = DefaultSchemas.RUNS include = qparams.query.include_resultset_responses limit = qparams.query.pagination.limit @@ -84,5 +107,4 @@ def get_analyses_of_run(self, entry_id: Optional[str], qparams: RequestParams, d if limit > 100 or limit == 0: limit = 100# pragma: no cover idq="biosampleId" - count, dataset_count, docs = get_docs_by_response_type(self, include, query, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs, dataset \ No newline at end of file + count, dataset_count, docs = get_docs_by_response_type(self, include, query, dataset, limit, skip, mongo_collection, idq) \ No newline at end of file diff --git a/ri-tools/conf/conf.py b/ri-tools/conf/conf.py index 9590156..ea005f5 100644 --- a/ri-tools/conf/conf.py +++ b/ri-tools/conf/conf.py @@ -6,7 +6,7 @@ allele_frequency=1 # introduce float number, leave 1 if you want to convert all the variants reference_genome='GRCh37' # Choose one between NCBI36, GRCh37, GRCh38 datasetId='COVID_pop11_fin_2' -case_level_data=False +case_level_data=True num_rows=7000000 ### MongoDB parameters ###