summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/main/kotlin/dev/dnpm/etl/processor/pseudonym/extensions.kt101
-rw-r--r--src/test/kotlin/dev/dnpm/etl/processor/pseudonym/ExtensionsTest.kt373
2 files changed, 323 insertions, 151 deletions
diff --git a/src/main/kotlin/dev/dnpm/etl/processor/pseudonym/extensions.kt b/src/main/kotlin/dev/dnpm/etl/processor/pseudonym/extensions.kt
index bf645f6..111494b 100644
--- a/src/main/kotlin/dev/dnpm/etl/processor/pseudonym/extensions.kt
+++ b/src/main/kotlin/dev/dnpm/etl/processor/pseudonym/extensions.kt
@@ -21,12 +21,12 @@ package dev.dnpm.etl.processor.pseudonym
import de.ukw.ccc.bwhc.dto.MtbFile
import dev.dnpm.etl.processor.PatientId
+import dev.pcvolkmer.mv64e.mtb.Mtb
import org.apache.commons.codec.digest.DigestUtils
/** Replaces patient ID with generated patient pseudonym
*
* @param pseudonymizeService The pseudonymizeService to be used
- *
* @return The MTB file containing patient pseudonymes
*/
infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
@@ -49,7 +49,11 @@ infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
}
this.lastGuidelineTherapies?.forEach { it.patient = patientPseudonym }
this.molecularPathologyFindings?.forEach { it.patient = patientPseudonym }
- this.molecularTherapies?.forEach { molecularTherapy -> molecularTherapy.history.forEach { it.patient = patientPseudonym } }
+ this.molecularTherapies?.forEach { molecularTherapy ->
+ molecularTherapy.history.forEach {
+ it.patient = patientPseudonym
+ }
+ }
this.ngsReports?.forEach { it.patient = patientPseudonym }
this.previousGuidelineTherapies?.forEach { it.patient = patientPseudonym }
this.rebiopsyRequests?.forEach { it.patient = patientPseudonym }
@@ -63,7 +67,6 @@ infix fun MtbFile.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
* Creates new hash of content IDs with given prefix except for patient IDs
*
* @param pseudonymizeService The pseudonymizeService to be used
- *
* @return The MTB file containing rehashed content IDs
*/
infix fun MtbFile.anonymizeContentWith(pseudonymizeService: PseudonymizeService) {
@@ -120,8 +123,8 @@ infix fun MtbFile.anonymizeContentWith(pseudonymizeService: PseudonymizeService)
id = id?.let { anonymize(it) }
}
}
- this.geneticCounsellingRequests?.onEach { geneticCounsellingRequest ->
- geneticCounsellingRequest?.apply {
+ this.geneticCounsellingRequests?.onEach { geneticCounsellingRequest ->
+ geneticCounsellingRequest?.apply {
id = id?.let { anonymize(it) }
}
}
@@ -223,4 +226,90 @@ infix fun MtbFile.anonymizeContentWith(pseudonymizeService: PseudonymizeService)
id = id?.let { anonymize(it) }
}
}
-} \ No newline at end of file
+}
+
+/** Replaces patient ID with generated patient pseudonym
+ *
+ * @since 0.11.0
+ *
+ * @param pseudonymizeService The pseudonymizeService to be used
+ * @return The MTB file containing patient pseudonymes
+ */
+infix fun Mtb.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
+ val patientPseudonym = pseudonymizeService.patientPseudonym(PatientId(this.patient.id)).value
+
+ this.episodesOfCare?.forEach { it.patient.id = patientPseudonym }
+ this.carePlans?.forEach {
+ it.patient.id = patientPseudonym
+ it.rebiopsyRequests?.forEach { it.patient.id = patientPseudonym }
+ it.histologyReevaluationRequests?.forEach { it.patient.id = patientPseudonym }
+ it.medicationRecommendations.forEach { it.patient.id = patientPseudonym }
+ it.studyEnrollmentRecommendations?.forEach { it.patient.id = patientPseudonym }
+ it.procedureRecommendations?.forEach { it.patient.id = patientPseudonym }
+ it.geneticCounselingRecommendation.patient.id = patientPseudonym
+ }
+ this.diagnoses?.forEach { it.patient.id = patientPseudonym }
+ this.guidelineTherapies?.forEach { it.patient.id = patientPseudonym }
+ this.guidelineProcedures?.forEach { it.patient.id = patientPseudonym }
+ this.patient.id = patientPseudonym
+ this.claims?.forEach { it.patient.id = patientPseudonym }
+ this.claimResponses?.forEach { it.patient.id = patientPseudonym }
+ this.diagnoses?.forEach { it.patient.id = patientPseudonym }
+ this.histologyReports?.forEach {
+ it.patient.id = patientPseudonym
+ it.results.tumorMorphology?.patient?.id = patientPseudonym
+ it.results.tumorCellContent?.patient?.id = patientPseudonym
+ }
+ this.ngsReports?.forEach {
+ it.patient.id = patientPseudonym
+ it.results.simpleVariants?.forEach { it.patient.id = patientPseudonym }
+ it.results.copyNumberVariants?.forEach { it.patient.id = patientPseudonym }
+ it.results.dnaFusions?.forEach { it.patient.id = patientPseudonym }
+ it.results.rnaFusions?.forEach { it.patient.id = patientPseudonym }
+ it.results.tumorCellContent?.patient?.id = patientPseudonym
+ it.results.brcaness?.patient?.id = patientPseudonym
+ it.results.tmb?.patient?.id = patientPseudonym
+ it.results.hrdScore?.patient?.id = patientPseudonym
+ }
+ this.ihcReports?.forEach {
+ it.patient.id = patientPseudonym
+ it.results.msiMmr?.forEach { it.patient.id = patientPseudonym }
+ it.results.proteinExpression?.forEach { it.patient.id = patientPseudonym }
+ }
+ this.responses?.forEach { it.patient.id = patientPseudonym }
+ this.specimens?.forEach { it.patient.id = patientPseudonym }
+ this.priorDiagnosticReports?.forEach { it.patient.id = patientPseudonym }
+ this.performanceStatus.forEach { it.patient.id = patientPseudonym }
+ this.systemicTherapies.forEach {
+ it.history?.forEach {
+ it.patient.id = patientPseudonym
+ }
+ }
+}
+
+/**
+ * Creates new hash of content IDs with given prefix except for patient IDs
+ *
+ * @since 0.11.0
+ *
+ * @param pseudonymizeService The pseudonymizeService to be used
+ * @return The MTB file containing rehashed content IDs
+ */
+infix fun Mtb.anonymizeContentWith(pseudonymizeService: PseudonymizeService) {
+ val prefix = pseudonymizeService.prefix()
+
+ fun anonymize(id: String): String {
+ val hash = DigestUtils.sha256Hex("$prefix-$id").substring(0, 41).lowercase()
+ return "$prefix$hash"
+ }
+
+ this.episodesOfCare?.forEach {
+ it?.apply {
+ id = id?.let {
+ anonymize(it)
+ }
+ }
+ }
+
+ // TODO all other properties
+}
diff --git a/src/test/kotlin/dev/dnpm/etl/processor/pseudonym/ExtensionsTest.kt b/src/test/kotlin/dev/dnpm/etl/processor/pseudonym/ExtensionsTest.kt
index 0acf7db..d0ccb2b 100644
--- a/src/test/kotlin/dev/dnpm/etl/processor/pseudonym/ExtensionsTest.kt
+++ b/src/test/kotlin/dev/dnpm/etl/processor/pseudonym/ExtensionsTest.kt
@@ -1,7 +1,7 @@
/*
* This file is part of ETL-Processor
*
- * Copyright (c) 2023 Comprehensive Cancer Center Mainfranken, Datenintegrationszentrum Philipps-Universität Marburg and Contributors
+ * Copyright (c) 2025 Comprehensive Cancer Center Mainfranken, Datenintegrationszentrum Philipps-Universität Marburg and Contributors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
@@ -21,7 +21,12 @@ package dev.dnpm.etl.processor.pseudonym
import com.fasterxml.jackson.databind.ObjectMapper
import de.ukw.ccc.bwhc.dto.*
+import dev.pcvolkmer.mv64e.mtb.MTBEpisodeOfCare
+import dev.pcvolkmer.mv64e.mtb.Mtb
+import dev.pcvolkmer.mv64e.mtb.PeriodDate
+import dev.pcvolkmer.mv64e.mtb.Reference
import org.assertj.core.api.Assertions.assertThat
+import org.junit.jupiter.api.Nested
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.assertThrows
import org.junit.jupiter.api.extension.ExtendWith
@@ -32,167 +37,245 @@ import org.mockito.kotlin.doAnswer
import org.mockito.kotlin.whenever
import org.springframework.core.io.ClassPathResource
-const val FAKE_MTB_FILE_PATH = "fake_MTBFile.json"
-const val CLEAN_PATIENT_ID = "5dad2f0b-49c6-47d8-a952-7b9e9e0f7549"
-
@ExtendWith(MockitoExtension::class)
class ExtensionsTest {
- private fun fakeMtbFile(): MtbFile {
- val mtbFile = ClassPathResource(FAKE_MTB_FILE_PATH).inputStream
- return ObjectMapper().readValue(mtbFile, MtbFile::class.java)
- }
+ @Nested
+ inner class UsingBwhcDatamodel {
- private fun MtbFile.serialized(): String {
- return ObjectMapper().writeValueAsString(this)
- }
+ val FAKE_MTB_FILE_PATH = "fake_MTBFile.json"
+ val CLEAN_PATIENT_ID = "5dad2f0b-49c6-47d8-a952-7b9e9e0f7549"
+
+ private fun fakeMtbFile(): MtbFile {
+ val mtbFile = ClassPathResource(FAKE_MTB_FILE_PATH).inputStream
+ return ObjectMapper().readValue(mtbFile, MtbFile::class.java)
+ }
- @Test
- fun shouldNotContainCleanPatientId(@Mock pseudonymizeService: PseudonymizeService) {
- doAnswer {
- it.arguments[0]
- "PSEUDO-ID"
- }.whenever(pseudonymizeService).patientPseudonym(anyValueClass())
+ private fun MtbFile.serialized(): String {
+ return ObjectMapper().writeValueAsString(this)
+ }
- val mtbFile = fakeMtbFile()
+ @Test
+ fun shouldNotContainCleanPatientId(@Mock pseudonymizeService: PseudonymizeService) {
+ doAnswer {
+ it.arguments[0]
+ "PSEUDO-ID"
+ }.whenever(pseudonymizeService).patientPseudonym(anyValueClass())
- mtbFile.pseudonymizeWith(pseudonymizeService)
+ val mtbFile = fakeMtbFile()
- assertThat(mtbFile.patient.id).isEqualTo("PSEUDO-ID")
- assertThat(mtbFile.serialized()).doesNotContain(CLEAN_PATIENT_ID)
- }
+ mtbFile.pseudonymizeWith(pseudonymizeService)
+
+ assertThat(mtbFile.patient.id).isEqualTo("PSEUDO-ID")
+ assertThat(mtbFile.serialized()).doesNotContain(CLEAN_PATIENT_ID)
+ }
+
+ @Test
+ fun shouldNotContainAnyUuidAfterRehashingOfIds(@Mock pseudonymizeService: PseudonymizeService) {
+ doAnswer {
+ it.arguments[0]
+ "PSEUDO-ID"
+ }.whenever(pseudonymizeService).patientPseudonym(anyValueClass())
- @Test
- fun shouldNotContainAnyUuidAfterRehashingOfIds(@Mock pseudonymizeService: PseudonymizeService) {
- doAnswer {
- it.arguments[0]
- "PSEUDO-ID"
- }.whenever(pseudonymizeService).patientPseudonym(anyValueClass())
+ doAnswer {
+ "TESTDOMAIN"
+ }.whenever(pseudonymizeService).prefix()
- doAnswer {
- "TESTDOMAIN"
- }.whenever(pseudonymizeService).prefix()
+ val mtbFile = fakeMtbFile()
- val mtbFile = fakeMtbFile()
+ mtbFile.pseudonymizeWith(pseudonymizeService)
+ mtbFile.anonymizeContentWith(pseudonymizeService)
- mtbFile.pseudonymizeWith(pseudonymizeService)
- mtbFile.anonymizeContentWith(pseudonymizeService)
+ val pattern = "\"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\"".toRegex().toPattern()
+ val matcher = pattern.matcher(mtbFile.serialized())
- val pattern = "\"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\"".toRegex().toPattern()
- val matcher = pattern.matcher(mtbFile.serialized())
+ assertThrows<IllegalStateException> {
+ matcher.find()
+ matcher.group()
+ }.also {
+ assertThat(it.message).isEqualTo("No match found")
+ }
- assertThrows<IllegalStateException> {
- matcher.find()
- matcher.group()
- }.also {
- assertThat(it.message).isEqualTo("No match found")
}
- }
+ @Test
+ fun shouldRehashIdsWithPrefix(@Mock pseudonymizeService: PseudonymizeService) {
+ doAnswer {
+ it.arguments[0]
+ "PSEUDO-ID"
+ }.whenever(pseudonymizeService).patientPseudonym(anyValueClass())
- @Test
- fun shouldRehashIdsWithPrefix(@Mock pseudonymizeService: PseudonymizeService) {
- doAnswer {
- it.arguments[0]
- "PSEUDO-ID"
- }.whenever(pseudonymizeService).patientPseudonym(anyValueClass())
-
- doAnswer {
- "TESTDOMAIN"
- }.whenever(pseudonymizeService).prefix()
-
- val mtbFile = MtbFile.builder()
- .withPatient(
- Patient.builder()
- .withId("1")
- .withBirthDate("2000-08-08")
- .withGender(Patient.Gender.MALE)
- .build()
- )
- .withConsent(
- Consent.builder()
- .withId("1")
- .withStatus(Consent.Status.ACTIVE)
- .withPatient("123")
- .build()
- )
- .withEpisode(
- Episode.builder()
- .withId("1")
- .withPatient("1")
- .withPeriod(PeriodStart("2023-08-08"))
- .build()
- )
- .build()
-
- mtbFile.pseudonymizeWith(pseudonymizeService)
- mtbFile.anonymizeContentWith(pseudonymizeService)
-
-
- assertThat(mtbFile.episode.id)
- // TESTDOMAIN<sha256(TESTDOMAIN-1)[0-41]>
- .isEqualTo("TESTDOMAIN44e20a53bbbf9f3ae39626d05df7014dcd77d6098")
- }
+ doAnswer {
+ "TESTDOMAIN"
+ }.whenever(pseudonymizeService).prefix()
+
+ val mtbFile = MtbFile.builder()
+ .withPatient(
+ Patient.builder()
+ .withId("1")
+ .withBirthDate("2000-08-08")
+ .withGender(Patient.Gender.MALE)
+ .build()
+ )
+ .withConsent(
+ Consent.builder()
+ .withId("1")
+ .withStatus(Consent.Status.ACTIVE)
+ .withPatient("123")
+ .build()
+ )
+ .withEpisode(
+ Episode.builder()
+ .withId("1")
+ .withPatient("1")
+ .withPeriod(PeriodStart("2023-08-08"))
+ .build()
+ )
+ .build()
+
+ mtbFile.pseudonymizeWith(pseudonymizeService)
+ mtbFile.anonymizeContentWith(pseudonymizeService)
+
+
+ assertThat(mtbFile.episode.id)
+ // TESTDOMAIN<sha256(TESTDOMAIN-1)[0-41]>
+ .isEqualTo("TESTDOMAIN44e20a53bbbf9f3ae39626d05df7014dcd77d6098")
+ }
+
+ @Test
+ fun shouldNotThrowExceptionOnNullValues(@Mock pseudonymizeService: PseudonymizeService) {
+ doAnswer {
+ it.arguments[0]
+ "PSEUDO-ID"
+ }.whenever(pseudonymizeService).patientPseudonym(anyValueClass())
+
+ doAnswer {
+ "TESTDOMAIN"
+ }.whenever(pseudonymizeService).prefix()
+
+ val mtbFile = MtbFile.builder()
+ .withPatient(
+ Patient.builder()
+ .withId("1")
+ .withBirthDate("2000-08-08")
+ .withGender(Patient.Gender.MALE)
+ .build()
+ )
+ .withConsent(
+ Consent.builder()
+ .withId("1")
+ .withStatus(Consent.Status.ACTIVE)
+ .withPatient("123")
+ .build()
+ )
+ .withEpisode(
+ Episode.builder()
+ .withId("1")
+ .withPatient("1")
+ .withPeriod(PeriodStart("2023-08-08"))
+ .build()
+ )
+ .withClaims(null)
+ .withDiagnoses(null)
+ .withCarePlans(null)
+ .withClaimResponses(null)
+ .withEcogStatus(null)
+ .withFamilyMemberDiagnoses(null)
+ .withGeneticCounsellingRequests(null)
+ .withHistologyReevaluationRequests(null)
+ .withHistologyReports(null)
+ .withLastGuidelineTherapies(null)
+ .withMolecularPathologyFindings(null)
+ .withMolecularTherapies(null)
+ .withNgsReports(null)
+ .withPreviousGuidelineTherapies(null)
+ .withRebiopsyRequests(null)
+ .withRecommendations(null)
+ .withResponses(null)
+ .withStudyInclusionRequests(null)
+ .withSpecimens(null)
+ .build()
- @Test
- fun shouldNotThrowExceptionOnNullValues(@Mock pseudonymizeService: PseudonymizeService) {
- doAnswer {
- it.arguments[0]
- "PSEUDO-ID"
- }.whenever(pseudonymizeService).patientPseudonym(anyValueClass())
-
- doAnswer {
- "TESTDOMAIN"
- }.whenever(pseudonymizeService).prefix()
-
- val mtbFile = MtbFile.builder()
- .withPatient(
- Patient.builder()
- .withId("1")
- .withBirthDate("2000-08-08")
- .withGender(Patient.Gender.MALE)
- .build()
- )
- .withConsent(
- Consent.builder()
- .withId("1")
- .withStatus(Consent.Status.ACTIVE)
- .withPatient("123")
- .build()
- )
- .withEpisode(
- Episode.builder()
- .withId("1")
- .withPatient("1")
- .withPeriod(PeriodStart("2023-08-08"))
- .build()
- )
- .withClaims(null)
- .withDiagnoses(null)
- .withCarePlans(null)
- .withClaimResponses(null)
- .withEcogStatus(null)
- .withFamilyMemberDiagnoses(null)
- .withGeneticCounsellingRequests(null)
- .withHistologyReevaluationRequests(null)
- .withHistologyReports(null)
- .withLastGuidelineTherapies(null)
- .withMolecularPathologyFindings(null)
- .withMolecularTherapies(null)
- .withNgsReports(null)
- .withPreviousGuidelineTherapies(null)
- .withRebiopsyRequests(null)
- .withRecommendations(null)
- .withResponses(null)
- .withStudyInclusionRequests(null)
- .withSpecimens(null)
- .build()
-
- mtbFile.pseudonymizeWith(pseudonymizeService)
- mtbFile.anonymizeContentWith(pseudonymizeService)
-
-
- assertThat(mtbFile.episode.id).isNotNull()
+ mtbFile.pseudonymizeWith(pseudonymizeService)
+ mtbFile.anonymizeContentWith(pseudonymizeService)
+
+ assertThat(mtbFile.episode.id).isNotNull()
+ }
}
-} \ No newline at end of file
+ @Nested
+ inner class UsingDnpmV2Datamodel {
+
+ val FAKE_MTB_FILE_PATH = "mv64e-mtb-fake-patient.json"
+ val CLEAN_PATIENT_ID = "63f8fd7b-8127-4f3c-8843-aa9199e21c29"
+
+ private fun fakeMtbFile(): Mtb {
+ val mtbFile = ClassPathResource(FAKE_MTB_FILE_PATH).inputStream
+ return ObjectMapper().readValue(mtbFile, Mtb::class.java)
+ }
+
+ private fun Mtb.serialized(): String {
+ return ObjectMapper().writeValueAsString(this)
+ }
+
+ @Test
+ fun shouldNotContainCleanPatientId(@Mock pseudonymizeService: PseudonymizeService) {
+ doAnswer {
+ it.arguments[0]
+ "PSEUDO-ID"
+ }.whenever(pseudonymizeService).patientPseudonym(anyValueClass())
+
+ val mtbFile = fakeMtbFile()
+
+ mtbFile.pseudonymizeWith(pseudonymizeService)
+
+ assertThat(mtbFile.patient.id).isEqualTo("PSEUDO-ID")
+ assertThat(mtbFile.serialized()).doesNotContain(CLEAN_PATIENT_ID)
+ }
+
+ @Test
+ fun shouldNotThrowExceptionOnNullValues(@Mock pseudonymizeService: PseudonymizeService) {
+ doAnswer {
+ it.arguments[0]
+ "PSEUDO-ID"
+ }.whenever(pseudonymizeService).patientPseudonym(anyValueClass())
+
+ doAnswer {
+ "TESTDOMAIN"
+ }.whenever(pseudonymizeService).prefix()
+
+ val mtbFile = Mtb.builder()
+ .withPatient(
+ dev.pcvolkmer.mv64e.mtb.Patient.builder()
+ .withId("1")
+ .withBirthDate("2000-08-08")
+ .withGender(null)
+ .build()
+ )
+ .withEpisodesOfCare(
+ listOf(
+ MTBEpisodeOfCare.builder()
+ .withId("1")
+ .withPatient(Reference("1"))
+ .withPeriod(PeriodDate.builder().withStart("2023-08-08").build())
+ .build()
+ )
+ )
+ .withClaims(null)
+ .withDiagnoses(null)
+ .withCarePlans(null)
+ .withClaimResponses(null)
+ .withHistologyReports(null)
+ .withNgsReports(null)
+ .withResponses(null)
+ .withSpecimens(null)
+ .build()
+
+ mtbFile.pseudonymizeWith(pseudonymizeService)
+ mtbFile.anonymizeContentWith(pseudonymizeService)
+
+ assertThat(mtbFile.episodesOfCare).hasSize(1)
+ assertThat(mtbFile.episodesOfCare.map { it.id }).isNotNull
+ }
+ }
+}