Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add find-population-molecular-consequence API #94

Merged
merged 3 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 65 additions & 19 deletions FHIRGenomicsOperations.postman_collection.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"method": "GET",
"header": [],
"url": {
"raw": "https://fhir-gen-ops.herokuapp.com/subject-operations/genotype-operations/$find-subject-variants?subject=NB6TK329&ranges=NC_000002.12:178525988-178807423&includeVariants=true&includePhasing=true",
"raw": "https://fhir-gen-ops.herokuapp.com/subject-operations/genotype-operations/$find-subject-variants?subject=NB6TK329&testIdentifiers=TL000016660-ASM&includeVariants=true&ranges=NC_000002.12:178525988-178807423,NC_000002.12:178525988-178807423",
"protocol": "https",
"host": [
"fhir-gen-ops",
Expand All @@ -33,14 +33,9 @@
"key": "subject",
"value": "NB6TK329"
},
{
"key": "ranges",
"value": "NC_000002.12:178525988-178807423"
},
{
"key": "testIdentifiers",
"value": "TL000016660-ASM",
"disabled": true
"value": "TL000016660-ASM"
},
{
"key": "specimenIdentifiers",
Expand Down Expand Up @@ -68,7 +63,12 @@
},
{
"key": "includePhasing",
"value": "true"
"value": "true",
"disabled": true
},
{
"key": "ranges",
"value": "NC_000002.12:178525988-178807423,NC_000002.12:178525988-178807423"
}
]
}
Expand Down Expand Up @@ -1763,7 +1763,7 @@
"method": "GET",
"header": [],
"url": {
"raw": "https://fhir-gen-ops.herokuapp.com/subject-operations/phenotype-operations/$find-subject-molecular-consequences?subject=HG00403&ranges=NC_000001.10:86852500-86852800&featureConsequences=http://sequenceontology.org|SO:0001575",
"raw": "https://fhir-gen-ops.herokuapp.com/subject-operations/phenotype-operations/$find-subject-molecular-consequences?subject=NB6TK329&variants=NC_000006.12:7576293:G:A",
"protocol": "https",
"host": [
"fhir-gen-ops",
Expand All @@ -1778,20 +1778,21 @@
"query": [
{
"key": "subject",
"value": "HG00403"
"value": "NB6TK329"
},
{
"key": "variants",
"value": "NM_031475.3:c.2217C>T",
"disabled": true
"value": "NC_000006.12:7576293:G:A"
},
{
"key": "ranges",
"value": "NC_000001.10:86852500-86852800"
"value": "NC_000001.10:86852500-86852800",
"disabled": true
},
{
"key": "featureConsequences",
"value": "http://sequenceontology.org|SO:0001575"
"value": "http://sequenceontology.org|SO:0001575",
"disabled": true
},
{
"key": "variants",
Expand Down Expand Up @@ -2735,7 +2736,7 @@
"method": "GET",
"header": [],
"url": {
"raw": "https://fhir-gen-ops.herokuapp.com/population-operations/phenotype-operations/$find-population-dx-implications?conditions=https://www.ncbi.nlm.nih.gov/medgen|C1708353",
"raw": "https://fhir-gen-ops.herokuapp.com/population-operations/phenotype-operations/$find-population-dx-implications?variants=NC_000019.10:38499669:C:T&includePatientList=true",
"protocol": "https",
"host": [
"fhir-gen-ops",
Expand All @@ -2749,12 +2750,16 @@
],
"query": [
{
"key": "conditions",
"value": "https://www.ncbi.nlm.nih.gov/medgen|C1708353"
"key": "variants",
"value": "NC_000019.10:38499669:C:T"
},
{
"key": "variants",
"value": "NC_000019.10:38499669:C:T",
"key": "includePatientList",
"value": "true"
},
{
"key": "conditions",
"value": "https://www.ncbi.nlm.nih.gov/medgen|C1708353",
"disabled": true
}
]
Expand Down Expand Up @@ -2828,6 +2833,47 @@
}
]
},
{
"name": "find-population-molecular-consequences",
"item": [
{
"name": "find-population-molecular-consequences",
"request": {
"method": "GET",
"header": [],
"url": {
"raw": "https://fhir-gen-ops.herokuapp.com/population-operations/phenotype-operations/$find-population-molecular-consequences?variants=NC_000001.10:152785039:G:T&featureConsequences=http://sequenceontology.org|SO:0001583&includePatientList=true",
"protocol": "https",
"host": [
"fhir-gen-ops",
"herokuapp",
"com"
],
"path": [
"population-operations",
"phenotype-operations",
"$find-population-molecular-consequences"
],
"query": [
{
"key": "variants",
"value": "NC_000001.10:152785039:G:T"
},
{
"key": "featureConsequences",
"value": "http://sequenceontology.org|SO:0001583"
},
{
"key": "includePatientList",
"value": "true"
}
]
}
},
"response": []
}
]
},
{
"name": "find-study-metadata",
"item": [
Expand Down
57 changes: 57 additions & 0 deletions app/api_spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,8 @@ paths:
type: array
items:
type: string
example:
- "http://sequenceontology.org|SO:0001629"
style: "form"
explode: false
- name: testIdentifiers
Expand Down Expand Up @@ -1181,6 +1183,61 @@ paths:
default: false
description: Include list of matching patients if set to true.

/population-operations/phenotype-operations/$find-population-molecular-consequences:
get:
description: |-
Retrieve count or list of patients having molecular consequences. More specifically, this operation retrieves the count +/- list of patients that have molecular consequences involving specific featureConsequences, derived from specific variants.

A patient meets numerator criteria if they have at least one molecular consequence matching the query parameters.
summary: "Find Population Molecular Consequences"
operationId: "app.endpoints.find_population_molecular_consequences"
tags:
- "Population Phenotype Operations"
responses:
'200':
description: "Returns a FHIR Parameters resource containing a count +/- list of patients having at least one matching molecular consequence."
content:
application/json:
schema:
type: object
parameters:
- name: variants
in: query
description: List of variants from which consequences are derived. Must be in HGVS or SPDI format.
schema:
type: array
items:
type: string
example:
- "NC_000001.10:152785039:G:T"
style: "form"
explode: false
- name: featureConsequences
in: query
description: List of consequences sought. Must be in token or codesystem|code format.
schema:
type: array
items:
type: string
example:
- "http://sequenceontology.org|SO:0001583"
style: "form"
explode: false
- name: genomicSourceClass
in: query
schema:
type: string
enum:
- "germline"
- "somatic"
description: Enables an App to limit results to those that are 'germline' or 'somatic'. Default is to include variants irrespective of genomic source class.
- name: includePatientList
in: query
schema:
type: boolean
default: false
description: Include list of matching patients if set to true.

/utilities/get-feature-coordinates:
get:
description: 'This utility returns genomic feature coordinates and other annotations. All data are from <a href="https://www.ncbi.nlm.nih.gov/genome/guide/human/">NCBI Human Genome Resources</a>. For chromosomes, build 37 and build 38 reference sequences are returned. For genes, genomic coordinates are returned, along with a list of transcripts. MANE transcript is flagged. For transcripts, genomic coordinates are returned, along with the gene name and composite exons, along with exon coordinates. For proteins, the corresponding transcript is returned.'
Expand Down
9 changes: 7 additions & 2 deletions app/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1859,7 +1859,7 @@ def query_transcript(transcript):
return results


def query_molecular_consequences_by_variants(normalized_variant_list, feature_consequence_list, query):
def query_molecular_consequences_by_variants(normalized_variant_list, feature_consequence_list, query, population=False):
variant_list = []
for item in normalized_variant_list:
if "GRCh37" in item:
Expand All @@ -1883,12 +1883,17 @@ def query_molecular_consequences_by_variants(normalized_variant_list, feature_co
]})
pipeline_part[-1]['$match']['$or'] = or_query

query['SPDI'] = {'$in': variant_list}
if normalized_variant_list != []:
query['SPDI'] = {'$in': variant_list}

query_string = [{'$match': query},
{'$lookup': {'from': 'MolecConseq', 'let': {'myvariant_id': '$_id'}, 'pipeline': pipeline_part,
'as': 'molecularConsequenceMatches'}},
{'$match': {'molecularConsequenceMatches': {'$exists': True, '$not': {'$size': 0}}}}]

if population:
query_string.append({'$group': {'_id': '$patientID'}})

try:
results = variants_db.aggregate(query_string)
results = list(results)
Expand Down
72 changes: 72 additions & 0 deletions app/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -2322,3 +2322,75 @@ def find_population_dx_implications(
result.pop("parameter")

return jsonify(result)


def find_population_molecular_consequences(
variants=None, featureConsequences=None, genomicSourceClass=None,
includePatientList=None):

# Parameters
if not variants and not featureConsequences:
abort(400, "You must supply either 'variants' or 'featureConsequences'.")

normalized_feature_consequence_list = []
if featureConsequences:
normalized_feature_consequence_list = list(map(common.get_feature_consequence, featureConsequences))

# Query
query = {}

# Genomic Source Class Query
if genomicSourceClass:
genomicSourceClass = genomicSourceClass.strip().lower()
query["genomicSourceClass"] = {"$eq": genomicSourceClass}

normalized_variants = []
if variants:
normalized_variants = list(map(common.get_variant, variants))

# Result Object
result = OrderedDict()
result["resourceType"] = "Parameters"
result["parameter"] = []

if normalized_variants:
genomics_build_presence = common.get_genomics_build_presence(query)

for normalizedVariant in normalized_variants:
if not normalizedVariant["GRCh37"] and genomics_build_presence["GRCh37"]:
abort(422, f'Failed LiftOver. Variant: {normalizedVariant["variant"]}')
elif not normalizedVariant["GRCh38"] and genomics_build_presence["GRCh38"]:
abort(422, f'Failed LiftOver. Variant: {normalizedVariant["variant"]}')

query_results = common.query_molecular_consequences_by_variants(normalized_variants, normalized_feature_consequence_list, query, True)

parameter = OrderedDict()
parameter["name"] = "consequences"
parameter["part"] = []

parameter["part"].append({
"name": "numerator",
"valueQuantity": {'value': len(query_results)}
})

parameter["part"].append({
"name": "denominator",
"valueQuantity": {"value": common.patients_db.count_documents({})}
})

if includePatientList:
patients = []
for patientID in query_results:
patients.append(f'{patientID["_id"]}')

for patientID in sorted(patients):
parameter["part"].append({
"name": "subject",
"valueString": f'{patientID}'
})

result["parameter"].append(parameter)

if not result["parameter"]:
result.pop("parameter")
return jsonify(result)
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"resourceType": "Parameters",
"parameter": [
{
"name": "consequences",
"part": [
{
"name": "numerator",
"valueQuantity": {
"value": 1
}
},
{
"name": "denominator",
"valueQuantity": {
"value": 1116
}
},
{
"name": "subject",
"valueString": "TCGA-DD-A1EH"
}
]
}
]
}
13 changes: 13 additions & 0 deletions tests/integration_tests/test_population_phenotype_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,16 @@ def test_find_population_dx_implications_4(client):
response = client.get(url)

tu.compare_actual_and_expected_output(f'{tu.FIND_POPULATION_DX_IMPLICATIONS_OUTPUT_DIR}4.json', response.json)


"""
Find Population Molecular Consequences Tests
-------------------------------------
"""


def test_find_population_molecular_consequences_1(client):
url = tu.find_population_molecular_consequences_query('variants=NC_000001.10:152785039:G:T&featureConsequences=http://sequenceontology.org|SO:0001583&includePatientList=true')
response = client.get(url)

tu.compare_actual_and_expected_output(f'{tu.FIND_POPULATION_MOLECULAR_CONSEQUENCES_OUTPUT_DIR}1.json', response.json)
7 changes: 7 additions & 0 deletions tests/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@
FIND_POPULATION_DX_IMPLICATIONS_URL = "/population-operations/phenotype-operations/$find-population-dx-implications"
FIND_POPULATION_DX_IMPLICATIONS_OUTPUT_DIR = "tests/expected_outputs/find_population_dx_implications/"

FIND_POPULATION_MOLECULAR_CONSEQUENCES_URL = "/population-operations/phenotype-operations/$find-population-molecular-consequences"
FIND_POPULATION_MOLECULAR_CONSEQUENCES_OUTPUT_DIR = "tests/expected_outputs/find_population_molecular_consequences/"

GET_FEATURE_COORDINATES_URL = "/utilities/get-feature-coordinates"
GET_FEATURE_COORDINATES_OUTPUT_DIR = "tests/expected_outputs/get_feature_coordinates/"

Expand Down Expand Up @@ -122,6 +125,10 @@ def find_population_dx_implications_query(query):
return f"{FIND_POPULATION_DX_IMPLICATIONS_URL}?{query}"


def find_population_molecular_consequences_query(query):
return f"{FIND_POPULATION_MOLECULAR_CONSEQUENCES_URL}?{query}"


def get_feature_coordinates_query(query):
return f"{GET_FEATURE_COORDINATES_URL}?{query}"

Expand Down
Loading