From e2b1763da105dd913bdf12945cdd7d05a7ca9f47 Mon Sep 17 00:00:00 2001 From: Paul-Christian Volkmer Date: Thu, 20 Nov 2025 12:42:58 +0100 Subject: fix: possible NPE for MSI anonymization This might occur if a null value is present in the list of MSI findings. With this change, the usage if "it" has been replaced with proper element names. --- .../dnpm/etl/processor/pseudonym/ExtensionsTest.kt | 400 +++++++++++---------- 1 file changed, 212 insertions(+), 188 deletions(-) (limited to 'src/test/kotlin') diff --git a/src/test/kotlin/dev/dnpm/etl/processor/pseudonym/ExtensionsTest.kt b/src/test/kotlin/dev/dnpm/etl/processor/pseudonym/ExtensionsTest.kt index c302362..2b4cd34 100644 --- a/src/test/kotlin/dev/dnpm/etl/processor/pseudonym/ExtensionsTest.kt +++ b/src/test/kotlin/dev/dnpm/etl/processor/pseudonym/ExtensionsTest.kt @@ -28,6 +28,8 @@ import dev.dnpm.etl.processor.consent.MtbFileConsentService import dev.dnpm.etl.processor.services.ConsentProcessor import dev.dnpm.etl.processor.services.ConsentProcessorTest import dev.pcvolkmer.mv64e.mtb.* +import java.time.Instant +import java.util.* import org.assertj.core.api.Assertions.assertThat import org.hl7.fhir.r4.model.Bundle import org.junit.jupiter.api.Nested @@ -40,231 +42,253 @@ import org.mockito.kotlin.anyValueClass import org.mockito.kotlin.doAnswer import org.mockito.kotlin.whenever import org.springframework.core.io.ClassPathResource -import java.time.Instant -import java.util.* @ExtendWith(MockitoExtension::class) class ExtensionsTest { - fun getObjectMapper(): ObjectMapper { - return JacksonConfig().objectMapper() - } + fun getObjectMapper(): ObjectMapper { + return JacksonConfig().objectMapper() + } - @Nested - inner class UsingDnpmV2Datamodel { + @Nested + inner class UsingDnpmV2Datamodel { - val FAKE_MTB_FILE_PATH = "mv64e-mtb-fake-patient.json" - val CLEAN_PATIENT_ID = "644bae7a-56f6-4ee8-b02f-c532e65af5b1" - - private fun fakeMtbFile(): Mtb { - val mtbFile = ClassPathResource(FAKE_MTB_FILE_PATH).inputStream - return getObjectMapper().readValue(mtbFile, Mtb::class.java) - } - - private fun Mtb.serialized(): String { - return getObjectMapper().writeValueAsString(this) - } + val FAKE_MTB_FILE_PATH = "mv64e-mtb-fake-patient.json" + val CLEAN_PATIENT_ID = "644bae7a-56f6-4ee8-b02f-c532e65af5b1" - @Test - fun shouldNotContainCleanPatientId(@Mock pseudonymizeService: PseudonymizeService) { - doAnswer { - it.arguments[0] - "PSEUDO-ID" - }.whenever(pseudonymizeService).patientPseudonym(anyValueClass()) - - val mtbFile = fakeMtbFile() - mtbFile.ensureMetaDataIsInitialized() - addConsentData(mtbFile) + private fun fakeMtbFile(): Mtb { + val mtbFile = ClassPathResource(FAKE_MTB_FILE_PATH).inputStream + return getObjectMapper().readValue(mtbFile, Mtb::class.java) + } - mtbFile.pseudonymizeWith(pseudonymizeService) + private fun Mtb.serialized(): String { + return getObjectMapper().writeValueAsString(this) + } - assertThat(mtbFile.patient.id).isEqualTo("PSEUDO-ID") - assertThat(mtbFile.serialized()).doesNotContain(CLEAN_PATIENT_ID) - } + @Test + fun shouldNotContainCleanPatientId(@Mock pseudonymizeService: PseudonymizeService) { + doAnswer { + it.arguments[0] + "PSEUDO-ID" + } + .whenever(pseudonymizeService) + .patientPseudonym(anyValueClass()) - private fun addConsentData(mtbFile: Mtb) { - val gIcsConfigProperties = GIcsConfigProperties("", "", "") - val appConfigProperties = AppConfigProperties(emptyList()) + val mtbFile = fakeMtbFile() + mtbFile.ensureMetaDataIsInitialized() + addConsentData(mtbFile) - val bundle = Bundle() - val dummyConsent = ConsentProcessorTest.getDummyGenomDeConsent() - dummyConsent.patient.reference = "Patient/$CLEAN_PATIENT_ID" - bundle.addEntry().resource = dummyConsent + mtbFile.pseudonymizeWith(pseudonymizeService) - ConsentProcessor( - appConfigProperties, - gIcsConfigProperties, - JacksonConfig().objectMapper(), - FhirContext.forR4(), - MtbFileConsentService() - ).embedBroadConsentResources(mtbFile, bundle) + assertThat(mtbFile.patient.id).isEqualTo("PSEUDO-ID") + assertThat(mtbFile.serialized()).doesNotContain(CLEAN_PATIENT_ID) + } - } + private fun addConsentData(mtbFile: Mtb) { + val gIcsConfigProperties = GIcsConfigProperties("", "", "") + val appConfigProperties = AppConfigProperties(emptyList()) + + val bundle = Bundle() + val dummyConsent = ConsentProcessorTest.getDummyGenomDeConsent() + dummyConsent.patient.reference = "Patient/$CLEAN_PATIENT_ID" + bundle.addEntry().resource = dummyConsent + + ConsentProcessor( + appConfigProperties, + gIcsConfigProperties, + JacksonConfig().objectMapper(), + FhirContext.forR4(), + MtbFileConsentService(), + ) + .embedBroadConsentResources(mtbFile, bundle) + } - @Test - fun shouldNotThrowExceptionOnNullValues(@Mock pseudonymizeService: PseudonymizeService) { - doAnswer { - it.arguments[0] - "PSEUDO-ID" - }.whenever(pseudonymizeService).patientPseudonym(anyValueClass()) - - doAnswer { - "TESTDOMAIN" - }.whenever(pseudonymizeService).prefix() - - val mtbFile = Mtb().apply { - this.patient = Patient().apply { - this.id = "PID" - this.birthDate = Date.from(Instant.now()) - this.gender = GenderCoding().apply { - this.code = GenderCodingCode.MALE - } + @Test + fun shouldNotThrowExceptionOnNullValues(@Mock pseudonymizeService: PseudonymizeService) { + doAnswer { + it.arguments[0] + "PSEUDO-ID" + } + .whenever(pseudonymizeService) + .patientPseudonym(anyValueClass()) + + doAnswer { "TESTDOMAIN" }.whenever(pseudonymizeService).prefix() + + val mtbFile = + Mtb().apply { + this.patient = + Patient().apply { + this.id = "PID" + this.birthDate = Date.from(Instant.now()) + this.gender = GenderCoding().apply { this.code = GenderCodingCode.MALE } } - this.episodesOfCare = listOf( + this.episodesOfCare = + listOf( MtbEpisodeOfCare().apply { - this.id = "1" - this.patient = Reference().apply { - this.id = "PID" - } - this.period = PeriodDate().apply { - this.start = Date.from(Instant.now()) - } + this.id = "1" + this.patient = Reference().apply { this.id = "PID" } + this.period = PeriodDate().apply { this.start = Date.from(Instant.now()) } } ) - } + } - mtbFile.pseudonymizeWith(pseudonymizeService) - mtbFile.anonymizeContentWith(pseudonymizeService) + mtbFile.pseudonymizeWith(pseudonymizeService) + mtbFile.anonymizeContentWith(pseudonymizeService) - assertThat(mtbFile.episodesOfCare).hasSize(1) - assertThat(mtbFile.episodesOfCare.map { it.id }).isNotNull - } + assertThat(mtbFile.episodesOfCare).hasSize(1) + assertThat(mtbFile.episodesOfCare.map { it.id }).isNotNull + } - @Test - fun shouldNotContainAnyUuidAfterRehashingOfIds(@Mock pseudonymizeService: PseudonymizeService) { - doAnswer { - it.arguments[0] - "PSEUDO-ID" - }.whenever(pseudonymizeService).patientPseudonym(anyValueClass()) - - doAnswer { - "TESTDOMAIN" - }.whenever(pseudonymizeService).prefix() - - val mtbFile = fakeMtbFile() - - /** - * replace hex values with random long, so our test does not match false positives - */ - mtbFile.ngsReports.forEach { report -> - report.results.simpleVariants.forEach { simpleVariant -> - simpleVariant.externalIds.forEach { extIdValue -> - extIdValue.value = - Math.random().toLong().toString() - } - } - } - mtbFile.ngsReports.forEach { report -> - report.results.rnaFusions.forEach { simpleVariant -> - simpleVariant.externalIds.forEach { extIdValue -> - extIdValue.value = - Math.random().toLong().toString() - } - simpleVariant.fusionPartner3Prime?.transcriptId?.value = - Math.random().toLong().toString() - simpleVariant.fusionPartner5Prime?.transcriptId?.value = - Math.random().toLong().toString() - simpleVariant.externalIds?.forEach { - it?.value = Math.random().toLong().toString() - } - } - } + @Test + fun shouldNotContainAnyUuidAfterRehashingOfIds(@Mock pseudonymizeService: PseudonymizeService) { + doAnswer { + it.arguments[0] + "PSEUDO-ID" + } + .whenever(pseudonymizeService) + .patientPseudonym(anyValueClass()) - mtbFile.pseudonymizeWith(pseudonymizeService) - mtbFile.anonymizeContentWith(pseudonymizeService) - - val pattern = - "\"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\"".toRegex() - .toPattern() - val input = mtbFile.serialized() - val matcher = pattern.matcher(input) - - assertThrows { - matcher.find() - val posSt = "check at pos: " + matcher.start().toString() + ", " + matcher.end() - println(posSt + " with " + matcher.group()) - }.also { - assertThat(it.message).isEqualTo("No match found") - } + doAnswer { "TESTDOMAIN" }.whenever(pseudonymizeService).prefix() + + val mtbFile = fakeMtbFile() + + /** replace hex values with random long, so our test does not match false positives */ + mtbFile.ngsReports.forEach { report -> + report.results.simpleVariants.forEach { simpleVariant -> + simpleVariant.externalIds.forEach { extIdValue -> + extIdValue.value = Math.random().toLong().toString() + } } + } + mtbFile.ngsReports.forEach { report -> + report.results.rnaFusions.forEach { simpleVariant -> + simpleVariant.externalIds.forEach { extIdValue -> + extIdValue.value = Math.random().toLong().toString() + } + simpleVariant.fusionPartner3Prime?.transcriptId?.value = Math.random().toLong().toString() + simpleVariant.fusionPartner5Prime?.transcriptId?.value = Math.random().toLong().toString() + simpleVariant.externalIds?.forEach { it?.value = Math.random().toLong().toString() } + } + } + + mtbFile.pseudonymizeWith(pseudonymizeService) + mtbFile.anonymizeContentWith(pseudonymizeService) + + val pattern = + "\"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\"".toRegex().toPattern() + val input = mtbFile.serialized() + val matcher = pattern.matcher(input) + + assertThrows { + matcher.find() + val posSt = "check at pos: " + matcher.start().toString() + ", " + matcher.end() + println(posSt + " with " + matcher.group()) + } + .also { assertThat(it.message).isEqualTo("No match found") } } + } - @Test - fun shouldUseSameAnonymIdForDiagnosisAndDiagnosisReferences(@Mock pseudonymizeService: PseudonymizeService) { + @Test + fun shouldUseSameAnonymIdForDiagnosisAndDiagnosisReferences( + @Mock pseudonymizeService: PseudonymizeService + ) { - doAnswer { - it.arguments[0] - "PSEUDO-ID" - }.whenever(pseudonymizeService).patientPseudonym(anyValueClass()) + doAnswer { + it.arguments[0] + "PSEUDO-ID" + } + .whenever(pseudonymizeService) + .patientPseudonym(anyValueClass()) - doAnswer { - "TESTDOMAIN" - }.whenever(pseudonymizeService).prefix() + doAnswer { "TESTDOMAIN" }.whenever(pseudonymizeService).prefix() - val mtbFile = Mtb().apply { - this.patient = Patient().apply { + val mtbFile = + Mtb().apply { + this.patient = + Patient().apply { this.id = "PID" this.birthDate = Date.from(Instant.now()) - this.gender = GenderCoding().apply { - this.code = GenderCodingCode.MALE - } - } - this.diagnoses = listOf( - MtbDiagnosis().apply { - this.id = "Diagnosis-1" - } - ) - this.episodesOfCare = listOf( - MtbEpisodeOfCare().apply { + this.gender = GenderCoding().apply { this.code = GenderCodingCode.MALE } + } + this.diagnoses = listOf(MtbDiagnosis().apply { this.id = "Diagnosis-1" }) + this.episodesOfCare = + listOf( + MtbEpisodeOfCare().apply { this.id = "Episode-1" - this.diagnoses = listOf( - Reference().apply { - this.id = "Diagnosis-1" - } - ) - } - ) - this.guidelineTherapies = listOf( - MtbSystemicTherapy().apply { + this.diagnoses = listOf(Reference().apply { this.id = "Diagnosis-1" }) + } + ) + this.guidelineTherapies = + listOf( + MtbSystemicTherapy().apply { this.id = "Systemic-Therapy-1" - this.reason = Reference().apply { - this.id = "Diagnosis-1" - } - } - ) - this.guidelineProcedures = listOf( - OncoProcedure().apply { + this.reason = Reference().apply { this.id = "Diagnosis-1" } + } + ) + this.guidelineProcedures = + listOf( + OncoProcedure().apply { this.id = "Onco-Procedure-1" - this.reason = Reference().apply { - this.id = "Diagnosis-1" - } - } - ) - this.specimens = listOf( - TumorSpecimen().apply { + this.reason = Reference().apply { this.id = "Diagnosis-1" } + } + ) + this.specimens = + listOf( + TumorSpecimen().apply { this.id = "Specimen-1" - this.diagnosis = Reference().apply { - this.id = "Diagnosis-1" - } - } - ) + this.diagnosis = Reference().apply { this.id = "Diagnosis-1" } + } + ) } + mtbFile.pseudonymizeWith(pseudonymizeService) + mtbFile.anonymizeContentWith(pseudonymizeService) + + assertThat(mtbFile.diagnoses.first().id) + .isEqualTo(mtbFile.episodesOfCare.first().diagnoses.first().id) + assertThat(mtbFile.diagnoses.first().id).isEqualTo(mtbFile.guidelineTherapies.first().reason.id) + assertThat(mtbFile.diagnoses.first().id) + .isEqualTo(mtbFile.guidelineProcedures.first().reason.id) + assertThat(mtbFile.diagnoses.first().id).isEqualTo(mtbFile.specimens.first().diagnosis.id) + } + + @Test + fun shouldNotThrowAnyExceptionOnMissingMsiId( + @Mock pseudonymizeService: PseudonymizeService + ) { + + doAnswer { + it.arguments[0] + "PSEUDO-ID" + } + .whenever(pseudonymizeService) + .patientPseudonym(anyValueClass()) + + doAnswer { "TESTDOMAIN" }.whenever(pseudonymizeService).prefix() + + val mtbFile = + Mtb().apply { + this.patient = + Patient().apply { + this.id = "PID" + this.birthDate = Date.from(Instant.now()) + this.gender = GenderCoding().apply { this.code = GenderCodingCode.MALE } + } + this.msiFindings = listOf( + null, + Msi.builder().id("1").build(), + Msi.builder(). build(), + Msi.builder().specimen(null).build(), + Msi.builder().specimen(Reference.builder().build()).build() + ) + } + mtbFile.pseudonymizeWith(pseudonymizeService) mtbFile.anonymizeContentWith(pseudonymizeService) - assertThat(mtbFile.diagnoses.first().id).isEqualTo(mtbFile.episodesOfCare.first().diagnoses.first().id) - assertThat(mtbFile.diagnoses.first().id).isEqualTo(mtbFile.guidelineTherapies.first().reason.id) - assertThat(mtbFile.diagnoses.first().id).isEqualTo(mtbFile.guidelineProcedures.first().reason.id) - assertThat(mtbFile.diagnoses.first().id).isEqualTo(mtbFile.specimens.first().diagnosis.id) + assertThat( mtbFile.msiFindings ).isNotNull + assertThat(mtbFile.msiFindings[1]).satisfiesAnyOf( + { assertThat(it.id).isNull() }, + { assertThat(it.id).isEqualTo("TESTDOMAIN44e20a53bbbf9f3ae39626d05df7014dcd77d6098")} + ) } } -- cgit v1.2.3