From e2b1763da105dd913bdf12945cdd7d05a7ca9f47 Mon Sep 17 00:00:00 2001 From: Paul-Christian Volkmer Date: Thu, 20 Nov 2025 12:42:58 +0100 Subject: fix: possible NPE for MSI anonymization This might occur if a null value is present in the list of MSI findings. With this change, the usage if "it" has been replaced with proper element names. --- .../dev/dnpm/etl/processor/pseudonym/extensions.kt | 469 ++++++++++----------- .../dnpm/etl/processor/pseudonym/ExtensionsTest.kt | 400 +++++++++--------- 2 files changed, 427 insertions(+), 442 deletions(-) (limited to 'src') diff --git a/src/main/kotlin/dev/dnpm/etl/processor/pseudonym/extensions.kt b/src/main/kotlin/dev/dnpm/etl/processor/pseudonym/extensions.kt index 179c69d..387119f 100644 --- a/src/main/kotlin/dev/dnpm/etl/processor/pseudonym/extensions.kt +++ b/src/main/kotlin/dev/dnpm/etl/processor/pseudonym/extensions.kt @@ -25,282 +25,243 @@ import dev.pcvolkmer.mv64e.mtb.Mtb import dev.pcvolkmer.mv64e.mtb.MvhMetadata import org.apache.commons.codec.digest.DigestUtils -/** Replaces patient ID with generated patient pseudonym - * - * @since 0.11.0 +/** + * Replaces patient ID with generated patient pseudonym * * @param pseudonymizeService The pseudonymizeService to be used * @return The MTB file containing patient pseudonymes + * @since 0.11.0 */ infix fun Mtb.pseudonymizeWith(pseudonymizeService: PseudonymizeService) { - val patientPseudonym = pseudonymizeService.patientPseudonym(PatientId(this.patient.id)).value - - this.episodesOfCare?.forEach { it.patient?.id = patientPseudonym } - this.carePlans?.forEach { - it.patient.id = patientPseudonym - it.rebiopsyRequests?.forEach { it.patient?.id = patientPseudonym } - it.histologyReevaluationRequests?.forEach { it.patient?.id = patientPseudonym } - it.medicationRecommendations?.forEach { it.patient?.id = patientPseudonym } - it.studyEnrollmentRecommendations?.forEach { it.patient?.id = patientPseudonym } - it.procedureRecommendations?.forEach { it.patient?.id = patientPseudonym } - it.geneticCounselingRecommendation?.patient?.id = patientPseudonym - } - this.diagnoses?.forEach { it.patient?.id = patientPseudonym } - this.guidelineTherapies?.forEach { it.patient?.id = patientPseudonym } - this.guidelineProcedures?.forEach { it.patient?.id = patientPseudonym } - this.patient.id = patientPseudonym - this.claims?.forEach { it.patient?.id = patientPseudonym } - this.claimResponses?.forEach { it.patient?.id = patientPseudonym } - this.diagnoses?.forEach { it.patient?.id = patientPseudonym } - this.familyMemberHistories?.forEach { it.patient?.id = patientPseudonym } - this.histologyReports?.forEach { - it.patient.id = patientPseudonym - it.results.tumorMorphology?.patient?.id = patientPseudonym - it.results.tumorCellContent?.patient?.id = patientPseudonym - } - this.ngsReports?.forEach { - it.patient?.id = patientPseudonym - it.results?.simpleVariants?.forEach { it.patient?.id = patientPseudonym } - it.results?.copyNumberVariants?.forEach { it.patient?.id = patientPseudonym } - it.results?.dnaFusions?.forEach { it.patient?.id = patientPseudonym } - it.results?.rnaFusions?.forEach { it.patient?.id = patientPseudonym } - it.results?.tumorCellContent?.patient?.id = patientPseudonym - it.results?.brcaness?.patient?.id = patientPseudonym - it.results?.tmb?.patient?.id = patientPseudonym - it.results?.hrdScore?.patient?.id = patientPseudonym - } - this.ihcReports?.forEach { - it.patient?.id = patientPseudonym - it.results?.msiMmr?.forEach { it.patient?.id = patientPseudonym } - it.results?.proteinExpression?.forEach { it.patient?.id = patientPseudonym } - } - this.responses?.forEach { it.patient?.id = patientPseudonym } - this.specimens?.forEach { it.patient?.id = patientPseudonym } - this.priorDiagnosticReports?.forEach { it.patient?.id = patientPseudonym } - this.performanceStatus?.forEach { it.patient?.id = patientPseudonym } - this.systemicTherapies?.forEach { - it.history?.forEach { - it.patient?.id = patientPseudonym - } - } - this.followUps?.forEach { - it.patient?.id = patientPseudonym - } - - this.msiFindings?.forEach { it -> it.patient?.id = patientPseudonym } - - this.metadata?.researchConsents?.forEach { it -> - val entry = it ?: return@forEach - if (entry.contains("patient")) { - // here we expect only a patient reference any other data like display - // need to be removed, since may contain unsecure data - entry.remove("patient") - entry["patient"] = mapOf("reference" to "Patient/$patientPseudonym") - } + val patientPseudonym = pseudonymizeService.patientPseudonym(PatientId(this.patient.id)).value + + this.episodesOfCare?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + this.carePlans?.filterNotNull()?.forEach { carePlan -> + carePlan.patient.id = patientPseudonym + carePlan.rebiopsyRequests?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + carePlan.histologyReevaluationRequests?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + carePlan.medicationRecommendations?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + carePlan.studyEnrollmentRecommendations?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + carePlan.procedureRecommendations?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + carePlan.geneticCounselingRecommendation?.patient?.id = patientPseudonym + } + this.diagnoses?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + this.guidelineTherapies?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + this.guidelineProcedures?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + this.patient.id = patientPseudonym + this.claims?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + this.claimResponses?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + this.familyMemberHistories?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + this.histologyReports?.filterNotNull()?.forEach { + it.patient.id = patientPseudonym + it.results.tumorMorphology?.patient?.id = patientPseudonym + it.results.tumorCellContent?.patient?.id = patientPseudonym + } + this.ngsReports?.filterNotNull()?.forEach { ngsReport -> + ngsReport.patient?.id = patientPseudonym + ngsReport.results?.simpleVariants?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + ngsReport.results?.copyNumberVariants?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + ngsReport.results?.dnaFusions?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + ngsReport.results?.rnaFusions?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + ngsReport.results?.tumorCellContent?.patient?.id = patientPseudonym + ngsReport.results?.brcaness?.patient?.id = patientPseudonym + ngsReport.results?.tmb?.patient?.id = patientPseudonym + ngsReport.results?.hrdScore?.patient?.id = patientPseudonym + } + this.ihcReports?.filterNotNull()?.forEach { ihcReports -> + ihcReports.patient?.id = patientPseudonym + ihcReports.results?.msiMmr?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + ihcReports.results?.proteinExpression?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + } + this.responses?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + this.specimens?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + this.priorDiagnosticReports?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + this.performanceStatus?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + this.systemicTherapies?.filterNotNull()?.forEach { systemicTherapy -> systemicTherapy.history?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } } + this.followUps?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + + this.msiFindings?.filterNotNull()?.forEach { it.patient?.id = patientPseudonym } + + this.metadata?.researchConsents?.filterNotNull()?.forEach { researchConsent -> + if (researchConsent.contains("patient")) { + // here we expect only a patient reference any other data like display + // need to be removed, since may contain unsecure data + researchConsent.remove("patient") + researchConsent["patient"] = mapOf("reference" to "Patient/$patientPseudonym") } + } } /** * Creates new hash of content IDs with given prefix except for patient IDs * - * @since 0.11.0 - * * @param pseudonymizeService The pseudonymizeService to be used * @return The MTB file containing rehashed content IDs + * @since 0.11.0 */ infix fun Mtb.anonymizeContentWith(pseudonymizeService: PseudonymizeService) { - val prefix = pseudonymizeService.prefix() - - fun anonymize(id: String): String { - val hash = DigestUtils.sha256Hex("$prefix-$id").substring(0, 41).lowercase() - return "$prefix$hash" + val prefix = pseudonymizeService.prefix() + + fun anonymize(id: String): String { + val hash = DigestUtils.sha256Hex("$prefix-$id").substring(0, 41).lowercase() + return "$prefix$hash" + } + + this.episodesOfCare?.filterNotNull()?.forEach { episodeOfCare -> + episodeOfCare.apply { id = id?.let(::anonymize) } + episodeOfCare.diagnoses?.filterNotNull()?.forEach { it.id = it.id?.let(::anonymize) } + } + + this.carePlans?.onEach { carePlan -> + carePlan?.apply { + this.id = id?.let { anonymize(it) } + + this.geneticCounselingRecommendation?.apply { this.id = this.id?.let(::anonymize) } + this.rebiopsyRequests?.filterNotNull()?.forEach { rebiopsyRequest -> + rebiopsyRequest.id = rebiopsyRequest.id?.let(::anonymize) + rebiopsyRequest.tumorEntity?.id = rebiopsyRequest.tumorEntity?.id?.let(::anonymize) + } + this.histologyReevaluationRequests?.filterNotNull()?.forEach { histologyReevaluationRequest -> + histologyReevaluationRequest.id = histologyReevaluationRequest.id?.let(::anonymize) + histologyReevaluationRequest.specimen?.id = histologyReevaluationRequest.specimen?.id?.let(::anonymize) + } + + this.medicationRecommendations?.filterNotNull()?.forEach { medicationRecommendations -> + medicationRecommendations.id = medicationRecommendations.id?.let(::anonymize) + medicationRecommendations.supportingVariants?.filterNotNull()?.forEach { it.variant?.id = it.variant?.id?.let(::anonymize) } + medicationRecommendations.reason?.id = medicationRecommendations.reason?.id?.let(::anonymize) + } + this.reason?.id = this.reason?.id?.let(::anonymize) + this.studyEnrollmentRecommendations?.filterNotNull()?.forEach { studyEnrollmentRecommendation -> + studyEnrollmentRecommendation.reason?.id = studyEnrollmentRecommendation.reason?.id?.let(::anonymize) + } + this.procedureRecommendations?.filterNotNull()?.forEach { procedureRecommendation -> + procedureRecommendation.id = procedureRecommendation.id?.let(::anonymize) + procedureRecommendation.supportingVariants?.filterNotNull()?.forEach { it.variant?.id = it.variant?.id?.let(::anonymize) } + procedureRecommendation.reason?.id = procedureRecommendation.reason?.id?.let(::anonymize) + } + this.studyEnrollmentRecommendations?.filterNotNull()?.forEach { studyEnrollmentRecommendation -> + studyEnrollmentRecommendation.id = studyEnrollmentRecommendation.id?.let(::anonymize) + studyEnrollmentRecommendation.supportingVariants.forEach { it.variant?.id = it?.variant?.id?.let(::anonymize) } + } } - - this.episodesOfCare?.forEach { - it?.apply { id = id?.let(::anonymize) } - it.diagnoses?.forEach { it -> - it?.id = it.id?.let(::anonymize) - } + } + + this.responses?.filterNotNull()?.forEach { response -> + response.id = response.id?.let(::anonymize) + response.therapy?.id = response.therapy?.id?.let(::anonymize) + } + + this.diagnoses?.filterNotNull()?.forEach { diagnose -> + diagnose.id = diagnose.id?.let(::anonymize) + diagnose.histology?.filterNotNull()?.forEach { it.id = it.id?.let(::anonymize) } + } + + this.ngsReports?.filterNotNull()?.forEach { ngsReport -> + ngsReport.id = ngsReport.id?.let(::anonymize) + ngsReport.results?.tumorCellContent?.id = ngsReport.results.tumorCellContent?.id?.let(::anonymize) + ngsReport.results?.tumorCellContent?.specimen?.id = + ngsReport.results?.tumorCellContent?.specimen?.id?.let(::anonymize) + ngsReport.results?.rnaFusions?.filterNotNull()?.forEach { it.id = it.id?.let(::anonymize) } + ngsReport.results?.simpleVariants?.filterNotNull()?.forEach { + it.id = it.id?.let(::anonymize) + it.transcriptId?.value = it.transcriptId?.value?.let(::anonymize) } - - this.carePlans?.onEach { carePlan -> - carePlan?.apply { - this.id = id?.let { anonymize(it) } - - this.geneticCounselingRecommendation?.apply { - this.id = this.id?.let(::anonymize) - } - this.rebiopsyRequests?.forEach { it -> - it.id = it.id?.let(::anonymize) - it.tumorEntity?.id = it.tumorEntity?.id?.let(::anonymize) - } - this.histologyReevaluationRequests?.forEach { it -> - it.id = it?.id?.let(::anonymize) - it.specimen?.id = it.specimen?.id?.let(::anonymize) - } - - this.medicationRecommendations?.forEach { it -> - it.id = it?.id?.let(::anonymize) - it.supportingVariants?.forEach { it -> - it.variant?.id = it.variant?.id?.let(::anonymize) - } - it.reason?.id = it.reason?.id?.let(::anonymize) - } - this.reason?.id = this.reason?.id?.let(::anonymize) - this.studyEnrollmentRecommendations?.forEach { it -> - it?.reason?.id = it.reason?.id?.let(::anonymize) - } - this.procedureRecommendations?.forEach { it -> - it.id = it?.id?.let(::anonymize) - it.supportingVariants?.forEach { it -> - it.variant?.id = it.variant?.id?.let(::anonymize) - } - - it.reason?.id = it.reason?.id?.let(::anonymize) - - } - this.studyEnrollmentRecommendations?.forEach { it -> - it.id = it?.id?.let(::anonymize) - it.supportingVariants.forEach { it -> - it.variant?.id = it?.variant?.id?.let(::anonymize) - } - } - } - } - - - this.responses?.forEach { it -> - - it?.id = it.id?.let(::anonymize) - it?.therapy?.id = it.therapy?.id?.let(::anonymize) - - } - - this.diagnoses?.forEach { it -> - - it.id = it?.id?.let(::anonymize) - it.histology?.forEach { it -> it.id = it?.id?.let(::anonymize) } - } - - this.ngsReports?.forEach { it -> - it.id = it?.id?.let(::anonymize) - it.results?.tumorCellContent?.id = it.results.tumorCellContent?.id?.let(::anonymize) - it.results?.tumorCellContent?.specimen?.id = - it.results?.tumorCellContent?.specimen?.id?.let(::anonymize) - it.results?.rnaFusions?.forEach { it -> - it?.id = it.id?.let(::anonymize) - } - it.results?.simpleVariants?.forEach { it -> - it?.id = it.id?.let(::anonymize) - it?.transcriptId?.value = it.transcriptId?.value?.let(::anonymize) - } - it.results?.tmb?.id = it.results?.tmb?.id?.let(::anonymize) - it.results?.tmb?.specimen?.id = it.results?.tmb?.specimen?.id?.let(::anonymize) - - it.results?.brcaness?.id = it.results?.brcaness?.id?.let(::anonymize) - it.results?.brcaness?.specimen?.id = it.results?.brcaness?.specimen?.id?.let(::anonymize) - it.results?.copyNumberVariants?.forEach { it -> it?.id = it.id?.let(::anonymize) } - it.results?.hrdScore?.id = it.results?.hrdScore?.id?.let(::anonymize) - it.results?.hrdScore?.specimen?.id = it.results?.hrdScore?.specimen?.id?.let(::anonymize) - it.results?.rnaSeqs?.forEach { it -> it?.id = it.id?.let(::anonymize) } - it.results?.dnaFusions?.forEach { it -> it?.id = it.id?.let(::anonymize) } - it.specimen?.id = it?.specimen?.id?.let(::anonymize) - - } - - this.histologyReports?.forEach { it -> - it.id = it?.id?.let(::anonymize) - it.results?.tumorCellContent?.id = it.results?.tumorCellContent?.id?.let(::anonymize) - it.results?.tumorCellContent?.specimen?.id = - it.results?.tumorCellContent?.specimen?.id?.let(::anonymize) - - it.results?.tumorMorphology?.id = it.results?.tumorMorphology?.id?.let(::anonymize) - it.results?.tumorMorphology?.specimen?.id = - it.results?.tumorMorphology?.specimen?.id?.let(::anonymize) - it.specimen?.id = it.specimen?.id?.let(::anonymize) - - } - this.claimResponses?.forEach { it -> - it.id = it?.id?.let(::anonymize) - it.claim?.id = it.claim?.id?.let(::anonymize) - } - this.claims?.forEach { it -> - - it.id = it?.id?.let(::anonymize) - it.recommendation?.id = it.recommendation?.id?.let(::anonymize) - - } - this.familyMemberHistories?.forEach { it -> it.id = it?.id?.let(::anonymize) } - this.guidelineProcedures?.forEach { it -> - it.id = it?.id?.let(::anonymize) - it.reason?.id = it.reason?.id?.let(::anonymize) - it.basedOn?.id = it.basedOn?.id?.let(::anonymize) - - } - - this.guidelineTherapies?.forEach { it -> - it.id = it?.id?.let(::anonymize) - it.reason?.id = it.reason?.id?.let(::anonymize) - it.basedOn?.id = it.basedOn?.id?.let(::anonymize) - } - this.ihcReports?.forEach { it -> - it.id = it?.id?.let(::anonymize) - it.specimen?.id = it.specimen?.id?.let(::anonymize) - it.results?.proteinExpression?.forEach { it -> it?.id = it.id.let(::anonymize) } - } - - this.msiFindings?.forEach { it -> - - it.id = it?.id?.let(::anonymize) - it.specimen?.id = it.specimen?.id?.let(::anonymize) - } - - this.performanceStatus?.forEach { it -> it.id = it?.id?.let(::anonymize) } - - this.priorDiagnosticReports?.forEach { it -> - - it.id = it?.id?.let(::anonymize) - it.specimen?.id = it.specimen?.id?.let(::anonymize) - } - - this.specimens?.forEach { it -> - - it.id = it?.id?.let(::anonymize) - it.diagnosis?.id = it.diagnosis?.id?.let(::anonymize) - - } - - this.systemicTherapies?.forEach { it -> - - it.history?.forEach { it -> - - it.id = it?.id?.let(::anonymize) - it.reason?.id = it.reason?.id?.let(::anonymize) - it.basedOn?.id = it.basedOn?.id?.let(::anonymize) - } - + ngsReport.results?.tmb?.id = ngsReport.results?.tmb?.id?.let(::anonymize) + ngsReport.results?.tmb?.specimen?.id = ngsReport.results?.tmb?.specimen?.id?.let(::anonymize) + + ngsReport.results?.brcaness?.id = ngsReport.results?.brcaness?.id?.let(::anonymize) + ngsReport.results?.brcaness?.specimen?.id = ngsReport.results?.brcaness?.specimen?.id?.let(::anonymize) + ngsReport.results?.copyNumberVariants?.filterNotNull()?.forEach { it.id = it.id?.let(::anonymize) } + ngsReport.results?.hrdScore?.id = ngsReport.results?.hrdScore?.id?.let(::anonymize) + ngsReport.results?.hrdScore?.specimen?.id = ngsReport.results?.hrdScore?.specimen?.id?.let(::anonymize) + ngsReport.results?.rnaSeqs?.filterNotNull()?.forEach { it.id = it.id?.let(::anonymize) } + ngsReport.results?.dnaFusions?.filterNotNull()?.forEach { it.id = it.id?.let(::anonymize) } + ngsReport.specimen?.id = ngsReport.specimen?.id?.let(::anonymize) + } + + this.histologyReports?.filterNotNull()?.forEach { histologyReport -> + histologyReport.id = histologyReport.id?.let(::anonymize) + histologyReport.results?.tumorCellContent?.id = histologyReport.results?.tumorCellContent?.id?.let(::anonymize) + histologyReport.results?.tumorCellContent?.specimen?.id = + histologyReport.results?.tumorCellContent?.specimen?.id?.let(::anonymize) + + histologyReport.results?.tumorMorphology?.id = histologyReport.results?.tumorMorphology?.id?.let(::anonymize) + histologyReport.results?.tumorMorphology?.specimen?.id = + histologyReport.results?.tumorMorphology?.specimen?.id?.let(::anonymize) + histologyReport.specimen?.id = histologyReport.specimen?.id?.let(::anonymize) + } + this.claimResponses?.filterNotNull()?.forEach { claimResponse -> + claimResponse.id = claimResponse.id?.let(::anonymize) + claimResponse.claim?.id = claimResponse.claim?.id?.let(::anonymize) + } + this.claims?.filterNotNull()?.forEach { claim -> + claim.id = claim.id?.let(::anonymize) + claim.recommendation?.id = claim.recommendation?.id?.let(::anonymize) + } + this.familyMemberHistories?.filterNotNull()?.forEach { it.id = it.id?.let(::anonymize) } + this.guidelineProcedures?.filterNotNull()?.forEach { guidelineProcedure -> + guidelineProcedure.id = guidelineProcedure.id?.let(::anonymize) + guidelineProcedure.reason?.id = guidelineProcedure.reason?.id?.let(::anonymize) + guidelineProcedure.basedOn?.id = guidelineProcedure.basedOn?.id?.let(::anonymize) + } + + this.guidelineTherapies?.filterNotNull()?.forEach { guidelineTherapy -> + guidelineTherapy.id = guidelineTherapy.id?.let(::anonymize) + guidelineTherapy.reason?.id = guidelineTherapy.reason?.id?.let(::anonymize) + guidelineTherapy.basedOn?.id = guidelineTherapy.basedOn?.id?.let(::anonymize) + } + this.ihcReports?.filterNotNull()?.forEach { ihcReport -> + ihcReport.id = ihcReport.id?.let(::anonymize) + ihcReport.specimen?.id = ihcReport.specimen?.id?.let(::anonymize) + ihcReport.results?.proteinExpression?.filterNotNull()?.forEach { it.id = it.id.let(::anonymize) } + } + + this.msiFindings?.filterNotNull()?.forEach { msiFinding -> + msiFinding.id = msiFinding.id?.let(::anonymize) + msiFinding.specimen?.id = msiFinding.specimen?.id?.let(::anonymize) + } + + this.performanceStatus?.filterNotNull()?.forEach { it.id = it.id?.let(::anonymize) } + + this.priorDiagnosticReports?.filterNotNull()?.forEach { priorDiagnosticReport -> + priorDiagnosticReport.id = priorDiagnosticReport.id?.let(::anonymize) + priorDiagnosticReport.specimen?.id = priorDiagnosticReport.specimen?.id?.let(::anonymize) + } + + this.specimens?.filterNotNull()?.forEach { specimen -> + specimen.id = specimen.id?.let(::anonymize) + specimen.diagnosis?.id = specimen.diagnosis?.id?.let(::anonymize) + } + + this.systemicTherapies?.filterNotNull()?.forEach { systemicTherapy -> + systemicTherapy.history?.filterNotNull()?.forEach { history -> + history.id = history.id?.let(::anonymize) + history.reason?.id = history.reason?.id?.let(::anonymize) + history.basedOn?.id = history.basedOn?.id?.let(::anonymize) } + } } fun Mtb.ensureMetaDataIsInitialized() { - // init metadata if necessary - if (this.metadata == null) { - val mvhMetadata = MvhMetadata.builder().build() - this.metadata = mvhMetadata - } - if (this.metadata.researchConsents == null) { - this.metadata.researchConsents = mutableListOf() - } - if (this.metadata.modelProjectConsent == null) { - this.metadata.modelProjectConsent = ModelProjectConsent() - this.metadata.modelProjectConsent.provisions = mutableListOf() - } else if (this.metadata.modelProjectConsent.provisions != null) { - // make sure list can be changed - this.metadata.modelProjectConsent.provisions = - this.metadata.modelProjectConsent.provisions.toMutableList() - } + // init metadata if necessary + if (this.metadata == null) { + val mvhMetadata = MvhMetadata.builder().build() + this.metadata = mvhMetadata + } + if (this.metadata.researchConsents == null) { + this.metadata.researchConsents = mutableListOf() + } + if (this.metadata.modelProjectConsent == null) { + this.metadata.modelProjectConsent = ModelProjectConsent() + this.metadata.modelProjectConsent.provisions = mutableListOf() + } else if (this.metadata.modelProjectConsent.provisions != null) { + // make sure list can be changed + this.metadata.modelProjectConsent.provisions = + this.metadata.modelProjectConsent.provisions.toMutableList() + } } infix fun Mtb.addGenomDeTan(pseudonymizeService: PseudonymizeService) { - this.metadata?.transferTan = pseudonymizeService.genomDeTan(PatientId(this.patient.id)) + this.metadata?.transferTan = pseudonymizeService.genomDeTan(PatientId(this.patient.id)) } diff --git a/src/test/kotlin/dev/dnpm/etl/processor/pseudonym/ExtensionsTest.kt b/src/test/kotlin/dev/dnpm/etl/processor/pseudonym/ExtensionsTest.kt index c302362..2b4cd34 100644 --- a/src/test/kotlin/dev/dnpm/etl/processor/pseudonym/ExtensionsTest.kt +++ b/src/test/kotlin/dev/dnpm/etl/processor/pseudonym/ExtensionsTest.kt @@ -28,6 +28,8 @@ import dev.dnpm.etl.processor.consent.MtbFileConsentService import dev.dnpm.etl.processor.services.ConsentProcessor import dev.dnpm.etl.processor.services.ConsentProcessorTest import dev.pcvolkmer.mv64e.mtb.* +import java.time.Instant +import java.util.* import org.assertj.core.api.Assertions.assertThat import org.hl7.fhir.r4.model.Bundle import org.junit.jupiter.api.Nested @@ -40,231 +42,253 @@ import org.mockito.kotlin.anyValueClass import org.mockito.kotlin.doAnswer import org.mockito.kotlin.whenever import org.springframework.core.io.ClassPathResource -import java.time.Instant -import java.util.* @ExtendWith(MockitoExtension::class) class ExtensionsTest { - fun getObjectMapper(): ObjectMapper { - return JacksonConfig().objectMapper() - } + fun getObjectMapper(): ObjectMapper { + return JacksonConfig().objectMapper() + } - @Nested - inner class UsingDnpmV2Datamodel { + @Nested + inner class UsingDnpmV2Datamodel { - val FAKE_MTB_FILE_PATH = "mv64e-mtb-fake-patient.json" - val CLEAN_PATIENT_ID = "644bae7a-56f6-4ee8-b02f-c532e65af5b1" - - private fun fakeMtbFile(): Mtb { - val mtbFile = ClassPathResource(FAKE_MTB_FILE_PATH).inputStream - return getObjectMapper().readValue(mtbFile, Mtb::class.java) - } - - private fun Mtb.serialized(): String { - return getObjectMapper().writeValueAsString(this) - } + val FAKE_MTB_FILE_PATH = "mv64e-mtb-fake-patient.json" + val CLEAN_PATIENT_ID = "644bae7a-56f6-4ee8-b02f-c532e65af5b1" - @Test - fun shouldNotContainCleanPatientId(@Mock pseudonymizeService: PseudonymizeService) { - doAnswer { - it.arguments[0] - "PSEUDO-ID" - }.whenever(pseudonymizeService).patientPseudonym(anyValueClass()) - - val mtbFile = fakeMtbFile() - mtbFile.ensureMetaDataIsInitialized() - addConsentData(mtbFile) + private fun fakeMtbFile(): Mtb { + val mtbFile = ClassPathResource(FAKE_MTB_FILE_PATH).inputStream + return getObjectMapper().readValue(mtbFile, Mtb::class.java) + } - mtbFile.pseudonymizeWith(pseudonymizeService) + private fun Mtb.serialized(): String { + return getObjectMapper().writeValueAsString(this) + } - assertThat(mtbFile.patient.id).isEqualTo("PSEUDO-ID") - assertThat(mtbFile.serialized()).doesNotContain(CLEAN_PATIENT_ID) - } + @Test + fun shouldNotContainCleanPatientId(@Mock pseudonymizeService: PseudonymizeService) { + doAnswer { + it.arguments[0] + "PSEUDO-ID" + } + .whenever(pseudonymizeService) + .patientPseudonym(anyValueClass()) - private fun addConsentData(mtbFile: Mtb) { - val gIcsConfigProperties = GIcsConfigProperties("", "", "") - val appConfigProperties = AppConfigProperties(emptyList()) + val mtbFile = fakeMtbFile() + mtbFile.ensureMetaDataIsInitialized() + addConsentData(mtbFile) - val bundle = Bundle() - val dummyConsent = ConsentProcessorTest.getDummyGenomDeConsent() - dummyConsent.patient.reference = "Patient/$CLEAN_PATIENT_ID" - bundle.addEntry().resource = dummyConsent + mtbFile.pseudonymizeWith(pseudonymizeService) - ConsentProcessor( - appConfigProperties, - gIcsConfigProperties, - JacksonConfig().objectMapper(), - FhirContext.forR4(), - MtbFileConsentService() - ).embedBroadConsentResources(mtbFile, bundle) + assertThat(mtbFile.patient.id).isEqualTo("PSEUDO-ID") + assertThat(mtbFile.serialized()).doesNotContain(CLEAN_PATIENT_ID) + } - } + private fun addConsentData(mtbFile: Mtb) { + val gIcsConfigProperties = GIcsConfigProperties("", "", "") + val appConfigProperties = AppConfigProperties(emptyList()) + + val bundle = Bundle() + val dummyConsent = ConsentProcessorTest.getDummyGenomDeConsent() + dummyConsent.patient.reference = "Patient/$CLEAN_PATIENT_ID" + bundle.addEntry().resource = dummyConsent + + ConsentProcessor( + appConfigProperties, + gIcsConfigProperties, + JacksonConfig().objectMapper(), + FhirContext.forR4(), + MtbFileConsentService(), + ) + .embedBroadConsentResources(mtbFile, bundle) + } - @Test - fun shouldNotThrowExceptionOnNullValues(@Mock pseudonymizeService: PseudonymizeService) { - doAnswer { - it.arguments[0] - "PSEUDO-ID" - }.whenever(pseudonymizeService).patientPseudonym(anyValueClass()) - - doAnswer { - "TESTDOMAIN" - }.whenever(pseudonymizeService).prefix() - - val mtbFile = Mtb().apply { - this.patient = Patient().apply { - this.id = "PID" - this.birthDate = Date.from(Instant.now()) - this.gender = GenderCoding().apply { - this.code = GenderCodingCode.MALE - } + @Test + fun shouldNotThrowExceptionOnNullValues(@Mock pseudonymizeService: PseudonymizeService) { + doAnswer { + it.arguments[0] + "PSEUDO-ID" + } + .whenever(pseudonymizeService) + .patientPseudonym(anyValueClass()) + + doAnswer { "TESTDOMAIN" }.whenever(pseudonymizeService).prefix() + + val mtbFile = + Mtb().apply { + this.patient = + Patient().apply { + this.id = "PID" + this.birthDate = Date.from(Instant.now()) + this.gender = GenderCoding().apply { this.code = GenderCodingCode.MALE } } - this.episodesOfCare = listOf( + this.episodesOfCare = + listOf( MtbEpisodeOfCare().apply { - this.id = "1" - this.patient = Reference().apply { - this.id = "PID" - } - this.period = PeriodDate().apply { - this.start = Date.from(Instant.now()) - } + this.id = "1" + this.patient = Reference().apply { this.id = "PID" } + this.period = PeriodDate().apply { this.start = Date.from(Instant.now()) } } ) - } + } - mtbFile.pseudonymizeWith(pseudonymizeService) - mtbFile.anonymizeContentWith(pseudonymizeService) + mtbFile.pseudonymizeWith(pseudonymizeService) + mtbFile.anonymizeContentWith(pseudonymizeService) - assertThat(mtbFile.episodesOfCare).hasSize(1) - assertThat(mtbFile.episodesOfCare.map { it.id }).isNotNull - } + assertThat(mtbFile.episodesOfCare).hasSize(1) + assertThat(mtbFile.episodesOfCare.map { it.id }).isNotNull + } - @Test - fun shouldNotContainAnyUuidAfterRehashingOfIds(@Mock pseudonymizeService: PseudonymizeService) { - doAnswer { - it.arguments[0] - "PSEUDO-ID" - }.whenever(pseudonymizeService).patientPseudonym(anyValueClass()) - - doAnswer { - "TESTDOMAIN" - }.whenever(pseudonymizeService).prefix() - - val mtbFile = fakeMtbFile() - - /** - * replace hex values with random long, so our test does not match false positives - */ - mtbFile.ngsReports.forEach { report -> - report.results.simpleVariants.forEach { simpleVariant -> - simpleVariant.externalIds.forEach { extIdValue -> - extIdValue.value = - Math.random().toLong().toString() - } - } - } - mtbFile.ngsReports.forEach { report -> - report.results.rnaFusions.forEach { simpleVariant -> - simpleVariant.externalIds.forEach { extIdValue -> - extIdValue.value = - Math.random().toLong().toString() - } - simpleVariant.fusionPartner3Prime?.transcriptId?.value = - Math.random().toLong().toString() - simpleVariant.fusionPartner5Prime?.transcriptId?.value = - Math.random().toLong().toString() - simpleVariant.externalIds?.forEach { - it?.value = Math.random().toLong().toString() - } - } - } + @Test + fun shouldNotContainAnyUuidAfterRehashingOfIds(@Mock pseudonymizeService: PseudonymizeService) { + doAnswer { + it.arguments[0] + "PSEUDO-ID" + } + .whenever(pseudonymizeService) + .patientPseudonym(anyValueClass()) - mtbFile.pseudonymizeWith(pseudonymizeService) - mtbFile.anonymizeContentWith(pseudonymizeService) - - val pattern = - "\"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\"".toRegex() - .toPattern() - val input = mtbFile.serialized() - val matcher = pattern.matcher(input) - - assertThrows { - matcher.find() - val posSt = "check at pos: " + matcher.start().toString() + ", " + matcher.end() - println(posSt + " with " + matcher.group()) - }.also { - assertThat(it.message).isEqualTo("No match found") - } + doAnswer { "TESTDOMAIN" }.whenever(pseudonymizeService).prefix() + + val mtbFile = fakeMtbFile() + + /** replace hex values with random long, so our test does not match false positives */ + mtbFile.ngsReports.forEach { report -> + report.results.simpleVariants.forEach { simpleVariant -> + simpleVariant.externalIds.forEach { extIdValue -> + extIdValue.value = Math.random().toLong().toString() + } } + } + mtbFile.ngsReports.forEach { report -> + report.results.rnaFusions.forEach { simpleVariant -> + simpleVariant.externalIds.forEach { extIdValue -> + extIdValue.value = Math.random().toLong().toString() + } + simpleVariant.fusionPartner3Prime?.transcriptId?.value = Math.random().toLong().toString() + simpleVariant.fusionPartner5Prime?.transcriptId?.value = Math.random().toLong().toString() + simpleVariant.externalIds?.forEach { it?.value = Math.random().toLong().toString() } + } + } + + mtbFile.pseudonymizeWith(pseudonymizeService) + mtbFile.anonymizeContentWith(pseudonymizeService) + + val pattern = + "\"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\"".toRegex().toPattern() + val input = mtbFile.serialized() + val matcher = pattern.matcher(input) + + assertThrows { + matcher.find() + val posSt = "check at pos: " + matcher.start().toString() + ", " + matcher.end() + println(posSt + " with " + matcher.group()) + } + .also { assertThat(it.message).isEqualTo("No match found") } } + } - @Test - fun shouldUseSameAnonymIdForDiagnosisAndDiagnosisReferences(@Mock pseudonymizeService: PseudonymizeService) { + @Test + fun shouldUseSameAnonymIdForDiagnosisAndDiagnosisReferences( + @Mock pseudonymizeService: PseudonymizeService + ) { - doAnswer { - it.arguments[0] - "PSEUDO-ID" - }.whenever(pseudonymizeService).patientPseudonym(anyValueClass()) + doAnswer { + it.arguments[0] + "PSEUDO-ID" + } + .whenever(pseudonymizeService) + .patientPseudonym(anyValueClass()) - doAnswer { - "TESTDOMAIN" - }.whenever(pseudonymizeService).prefix() + doAnswer { "TESTDOMAIN" }.whenever(pseudonymizeService).prefix() - val mtbFile = Mtb().apply { - this.patient = Patient().apply { + val mtbFile = + Mtb().apply { + this.patient = + Patient().apply { this.id = "PID" this.birthDate = Date.from(Instant.now()) - this.gender = GenderCoding().apply { - this.code = GenderCodingCode.MALE - } - } - this.diagnoses = listOf( - MtbDiagnosis().apply { - this.id = "Diagnosis-1" - } - ) - this.episodesOfCare = listOf( - MtbEpisodeOfCare().apply { + this.gender = GenderCoding().apply { this.code = GenderCodingCode.MALE } + } + this.diagnoses = listOf(MtbDiagnosis().apply { this.id = "Diagnosis-1" }) + this.episodesOfCare = + listOf( + MtbEpisodeOfCare().apply { this.id = "Episode-1" - this.diagnoses = listOf( - Reference().apply { - this.id = "Diagnosis-1" - } - ) - } - ) - this.guidelineTherapies = listOf( - MtbSystemicTherapy().apply { + this.diagnoses = listOf(Reference().apply { this.id = "Diagnosis-1" }) + } + ) + this.guidelineTherapies = + listOf( + MtbSystemicTherapy().apply { this.id = "Systemic-Therapy-1" - this.reason = Reference().apply { - this.id = "Diagnosis-1" - } - } - ) - this.guidelineProcedures = listOf( - OncoProcedure().apply { + this.reason = Reference().apply { this.id = "Diagnosis-1" } + } + ) + this.guidelineProcedures = + listOf( + OncoProcedure().apply { this.id = "Onco-Procedure-1" - this.reason = Reference().apply { - this.id = "Diagnosis-1" - } - } - ) - this.specimens = listOf( - TumorSpecimen().apply { + this.reason = Reference().apply { this.id = "Diagnosis-1" } + } + ) + this.specimens = + listOf( + TumorSpecimen().apply { this.id = "Specimen-1" - this.diagnosis = Reference().apply { - this.id = "Diagnosis-1" - } - } - ) + this.diagnosis = Reference().apply { this.id = "Diagnosis-1" } + } + ) } + mtbFile.pseudonymizeWith(pseudonymizeService) + mtbFile.anonymizeContentWith(pseudonymizeService) + + assertThat(mtbFile.diagnoses.first().id) + .isEqualTo(mtbFile.episodesOfCare.first().diagnoses.first().id) + assertThat(mtbFile.diagnoses.first().id).isEqualTo(mtbFile.guidelineTherapies.first().reason.id) + assertThat(mtbFile.diagnoses.first().id) + .isEqualTo(mtbFile.guidelineProcedures.first().reason.id) + assertThat(mtbFile.diagnoses.first().id).isEqualTo(mtbFile.specimens.first().diagnosis.id) + } + + @Test + fun shouldNotThrowAnyExceptionOnMissingMsiId( + @Mock pseudonymizeService: PseudonymizeService + ) { + + doAnswer { + it.arguments[0] + "PSEUDO-ID" + } + .whenever(pseudonymizeService) + .patientPseudonym(anyValueClass()) + + doAnswer { "TESTDOMAIN" }.whenever(pseudonymizeService).prefix() + + val mtbFile = + Mtb().apply { + this.patient = + Patient().apply { + this.id = "PID" + this.birthDate = Date.from(Instant.now()) + this.gender = GenderCoding().apply { this.code = GenderCodingCode.MALE } + } + this.msiFindings = listOf( + null, + Msi.builder().id("1").build(), + Msi.builder(). build(), + Msi.builder().specimen(null).build(), + Msi.builder().specimen(Reference.builder().build()).build() + ) + } + mtbFile.pseudonymizeWith(pseudonymizeService) mtbFile.anonymizeContentWith(pseudonymizeService) - assertThat(mtbFile.diagnoses.first().id).isEqualTo(mtbFile.episodesOfCare.first().diagnoses.first().id) - assertThat(mtbFile.diagnoses.first().id).isEqualTo(mtbFile.guidelineTherapies.first().reason.id) - assertThat(mtbFile.diagnoses.first().id).isEqualTo(mtbFile.guidelineProcedures.first().reason.id) - assertThat(mtbFile.diagnoses.first().id).isEqualTo(mtbFile.specimens.first().diagnosis.id) + assertThat( mtbFile.msiFindings ).isNotNull + assertThat(mtbFile.msiFindings[1]).satisfiesAnyOf( + { assertThat(it.id).isNull() }, + { assertThat(it.id).isEqualTo("TESTDOMAIN44e20a53bbbf9f3ae39626d05df7014dcd77d6098")} + ) } } -- cgit v1.2.3