From e3aeee61de2e510173a4c4f190fe2339a0910260 Mon Sep 17 00:00:00 2001 From: Paul-Christian Volkmer Date: Tue, 12 Mar 2024 13:13:31 +0100 Subject: feat: salted re-hash IDs within MTB file except patient ID --- .../etl/processor/pseudonym/PseudonymizeService.kt | 4 + .../dev/dnpm/etl/processor/pseudonym/extensions.kt | 172 ++++++++++++++++++++- .../etl/processor/services/RequestProcessor.kt | 2 + 3 files changed, 177 insertions(+), 1 deletion(-) (limited to 'src/main') diff --git a/src/main/kotlin/dev/dnpm/etl/processor/pseudonym/PseudonymizeService.kt b/src/main/kotlin/dev/dnpm/etl/processor/pseudonym/PseudonymizeService.kt index ab8ce2f..d18cd2c 100644 --- a/src/main/kotlin/dev/dnpm/etl/processor/pseudonym/PseudonymizeService.kt +++ b/src/main/kotlin/dev/dnpm/etl/processor/pseudonym/PseudonymizeService.kt @@ -33,4 +33,8 @@ class PseudonymizeService( } } + fun prefix(): String { + return configProperties.prefix + } + } \ No newline at end of file diff --git a/src/main/kotlin/dev/dnpm/etl/processor/pseudonym/extensions.kt b/src/main/kotlin/dev/dnpm/etl/processor/pseudonym/extensions.kt index 0bf4913..014570a 100644 --- a/src/main/kotlin/dev/dnpm/etl/processor/pseudonym/extensions.kt +++ b/src/main/kotlin/dev/dnpm/etl/processor/pseudonym/extensions.kt @@ -20,7 +20,14 @@ package dev.dnpm.etl.processor.pseudonym import de.ukw.ccc.bwhc.dto.MtbFile +import org.apache.commons.codec.digest.DigestUtils +/** Replaces patient ID with generated patient pseudonym + * + * @param pseudonymizeService The pseudonymizeService to be used + * + * @return The MTB file containing patient pseudonymes + */ infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) { val patientPseudonym = pseudonymizeService.patientPseudonym(this.patient.id) @@ -46,8 +53,171 @@ infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) { this.previousGuidelineTherapies.forEach { it.patient = patientPseudonym } this.rebiopsyRequests.forEach { it.patient = patientPseudonym } this.recommendations.forEach { it.patient = patientPseudonym } - this.recommendations.forEach { it.patient = patientPseudonym } this.responses.forEach { it.patient = patientPseudonym } this.studyInclusionRequests.forEach { it.patient = patientPseudonym } this.specimens.forEach { it.patient = patientPseudonym } +} + +/** + * Creates new hash of content IDs with given prefix except for patient IDs + * + * @param pseudonymizeService The pseudonymizeService to be used + * + * @return The MTB file containing rehashed content IDs + */ +infix fun MtbFile.anonymizeContentWith(pseudonymizeService: PseudonymizeService) { + val prefix = pseudonymizeService.prefix() + + fun anonymize(id: String): String { + val hash = DigestUtils.sha256Hex("$prefix-$id").substring(0, 41).lowercase() + return "$prefix$hash" + } + + this.episode.apply { + id = anonymize(id) + } + this.carePlans.onEach { carePlan -> + carePlan.apply { + id = anonymize(id) + diagnosis = anonymize(diagnosis) + geneticCounsellingRequest = anonymize(geneticCounsellingRequest) + rebiopsyRequests = rebiopsyRequests.map { anonymize(it) } + recommendations = recommendations.map { anonymize(it) } + studyInclusionRequests = studyInclusionRequests.map { anonymize(it) } + } + } + this.claims.onEach { claim -> + claim.apply { + id = anonymize(id) + therapy = anonymize(therapy) + } + } + this.claimResponses.onEach { claimResponse -> + claimResponse.apply { + id = anonymize(id) + claim = anonymize(claim) + } + } + this.consent.apply { + id = anonymize(id) + } + this.diagnoses.onEach { diagnosis -> + diagnosis.apply { + id = anonymize(id) + histologyResults = histologyResults.map { anonymize(it) } + } + } + this.ecogStatus.onEach { ecogStatus -> + ecogStatus.apply { + id = anonymize(id) + } + } + this.familyMemberDiagnoses.onEach { familyMemberDiagnosis -> + familyMemberDiagnosis.apply { + id = anonymize(id) + } + } + this.geneticCounsellingRequests.onEach { geneticCounsellingRequest -> + geneticCounsellingRequest.apply { + id = anonymize(id) + } + } + this.histologyReevaluationRequests.onEach { histologyReevaluationRequest -> + histologyReevaluationRequest.apply { + id = anonymize(id) + specimen = anonymize(specimen) + } + } + this.histologyReports.onEach { histologyReport -> + histologyReport.apply { + id = anonymize(id) + specimen = anonymize(specimen) + tumorMorphology.apply { + id = anonymize(id) + specimen = anonymize(specimen) + } + tumorCellContent.apply { + id = anonymize(id) + specimen = anonymize(specimen) + } + } + } + this.lastGuidelineTherapies.onEach { lastGuidelineTherapy -> + lastGuidelineTherapy.apply { + id = anonymize(id) + diagnosis = anonymize(diagnosis) + } + } + this.molecularPathologyFindings.onEach { molecularPathologyFinding -> + molecularPathologyFinding.apply { + id = anonymize(id) + specimen = anonymize(specimen) + } + } + this.molecularTherapies.onEach { molecularTherapy -> + molecularTherapy.apply { + history.onEach { history -> + history.apply { + id = anonymize(id) + basedOn = anonymize(basedOn) + } + } + } + } + this.ngsReports.onEach { ngsReport -> + ngsReport.apply { + id = anonymize(id) + specimen = anonymize(specimen) + tumorCellContent.apply { + id = anonymize(id) + specimen = anonymize(specimen) + } + simpleVariants.onEach { simpleVariant -> + simpleVariant.apply { + id = anonymize(id) + } + } + } + } + this.previousGuidelineTherapies.onEach { previousGuidelineTherapy -> + previousGuidelineTherapy.apply { + id = anonymize(id) + diagnosis = anonymize(diagnosis) + this.medication.forEach { medication -> + medication.apply { + id = anonymize(id) + } + } + } + } + this.rebiopsyRequests.onEach { rebiopsyRequest -> + rebiopsyRequest.apply { + id = anonymize(id) + specimen = anonymize(specimen) + } + } + this.recommendations.onEach { recommendation -> + recommendation.apply { + id = anonymize(id) + diagnosis = anonymize(diagnosis) + ngsReport = anonymize(ngsReport) + } + } + this.responses.onEach { response -> + response.apply { + id = anonymize(id) + therapy = anonymize(therapy) + } + } + this.studyInclusionRequests.onEach { studyInclusionRequest -> + studyInclusionRequest.apply { + id = anonymize(id) + reason = anonymize(reason) + } + } + this.specimens.onEach { specimen -> + specimen.apply { + id = anonymize(id) + } + } } \ No newline at end of file diff --git a/src/main/kotlin/dev/dnpm/etl/processor/services/RequestProcessor.kt b/src/main/kotlin/dev/dnpm/etl/processor/services/RequestProcessor.kt index 66ff291..bdf07cb 100644 --- a/src/main/kotlin/dev/dnpm/etl/processor/services/RequestProcessor.kt +++ b/src/main/kotlin/dev/dnpm/etl/processor/services/RequestProcessor.kt @@ -28,6 +28,7 @@ import dev.dnpm.etl.processor.monitoring.RequestStatus import dev.dnpm.etl.processor.monitoring.RequestType import dev.dnpm.etl.processor.output.MtbFileSender import dev.dnpm.etl.processor.pseudonym.PseudonymizeService +import dev.dnpm.etl.processor.pseudonym.anonymizeContentWith import dev.dnpm.etl.processor.pseudonym.pseudonymizeWith import org.apache.commons.codec.binary.Base32 import org.apache.commons.codec.digest.DigestUtils @@ -55,6 +56,7 @@ class RequestProcessor( val pid = mtbFile.patient.id mtbFile pseudonymizeWith pseudonymizeService + mtbFile anonymizeContentWith pseudonymizeService val request = MtbFileSender.MtbFileRequest(requestId, transformationService.transform(mtbFile)) -- cgit v1.2.3