summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjlidke2025-08-14 10:33:55 +0200
committerGitHub2025-08-14 10:33:55 +0200
commitbe513f305ae4c632aa567e42e9438f233590ab3f (patch)
tree8144d2baaaf7f00e079c0a92fd969cf7d9a76447
parent2e881578937ee39bab3cacff9ee09328658341c2 (diff)
108 anonym id mtb v2 (#131)
-rw-r--r--src/main/kotlin/dev/dnpm/etl/processor/pseudonym/extensions.kt187
-rw-r--r--src/test/kotlin/dev/dnpm/etl/processor/pseudonym/ExtensionsTest.kt58
2 files changed, 233 insertions, 12 deletions
diff --git a/src/main/kotlin/dev/dnpm/etl/processor/pseudonym/extensions.kt b/src/main/kotlin/dev/dnpm/etl/processor/pseudonym/extensions.kt
index 28a7d3c..01c781b 100644
--- a/src/main/kotlin/dev/dnpm/etl/processor/pseudonym/extensions.kt
+++ b/src/main/kotlin/dev/dnpm/etl/processor/pseudonym/extensions.kt
@@ -87,6 +87,8 @@ infix fun Mtb.pseudonymizeWith(pseudonymizeService: PseudonymizeService) {
it.patient.id = patientPseudonym
}
+ this.msiFindings?.forEach { it -> it.patient.id = patientPseudonym }
+
this.metadata?.researchConsents?.forEach { it ->
val entry = it ?: return@forEach
if (entry.contains("patient")) {
@@ -115,14 +117,177 @@ infix fun Mtb.anonymizeContentWith(pseudonymizeService: PseudonymizeService) {
}
this.episodesOfCare?.forEach {
- it?.apply {
- id = id?.let {
- anonymize(it)
+ it?.apply { id = id?.let(::anonymize) }
+ it.diagnoses?.forEach { it ->
+ it?.id = it.id?.let(::anonymize)
+ }
+ }
+
+ this.carePlans?.onEach { carePlan ->
+ carePlan?.apply {
+ id = id?.let { anonymize(it) }
+
+ diagnoses?.forEach { it -> it?.id = it.id?.let(::anonymize) }
+ geneticCounselingRecommendation?.apply {
+ id = geneticCounselingRecommendation.id?.let(::anonymize)
+ }
+ rebiopsyRequests?.forEach { it ->
+ it.id = it.id?.let(::anonymize)
+ it.tumorEntity?.id = it.tumorEntity?.id?.let(::anonymize)
+ }
+ histologyReevaluationRequests?.forEach { it ->
+ it.id = it?.id?.let(::anonymize)
+ it.specimen?.id = it.specimen?.id?.let(::anonymize)
+ }
+
+ medicationRecommendations?.forEach { it ->
+ it.id = it?.id?.let(::anonymize)
+ it.supportingVariants?.forEach { it ->
+ it.variant?.id = it.variant?.id?.let(::anonymize)
+ }
+ it.reason?.id = it.reason?.id?.let(::anonymize)
+ }
+ reason?.id = reason?.id?.let(::anonymize)
+ studyEnrollmentRecommendations?.forEach { it ->
+ it?.reason?.id = it.reason?.id?.let(::anonymize)
+ }
+
+ procedureRecommendations?.forEach { it ->
+
+ it.id = it?.id?.let(::anonymize)
+ it.supportingVariants?.forEach { it ->
+ it.variant?.id = it.variant?.id?.let(::anonymize)
+ }
+
+ it.reason?.id = it.reason?.id?.let(::anonymize)
+
+ studyEnrollmentRecommendations?.forEach { it ->
+
+ it.id = it?.id?.let(::anonymize)
+ it.supportingVariants.forEach { it ->
+ it.variant?.id = it?.variant?.id?.let(::anonymize)
+ }
+ responses?.forEach { it ->
+ it.id = it?.id?.let(::anonymize)
+ it.id = it?.id?.let(::anonymize)
+ }
+ }
}
}
}
- // TODO all other properties
+
+ this.responses?.forEach { it ->
+
+ it?.id = it.id?.let(::anonymize)
+ it?.therapy?.id = it.therapy?.id?.let(::anonymize)
+
+ }
+
+ this.diagnoses?.forEach { it ->
+
+ it.id = it?.id?.let(::anonymize)
+ it.histology?.forEach { it -> it.id = it?.id?.let(::anonymize) }
+ }
+
+ this.ngsReports?.forEach { it ->
+ it.id = it?.id?.let(::anonymize)
+ it.results?.tumorCellContent?.id = it.results.tumorCellContent?.id?.let(::anonymize)
+ it.results?.tumorCellContent?.specimen?.id =
+ it.results?.tumorCellContent?.specimen?.id?.let(::anonymize)
+ it.results?.rnaFusions?.forEach { it ->
+ it?.id = it.id?.let(::anonymize)
+ }
+ it.results?.simpleVariants?.forEach { it ->
+ it?.id = it.id?.let(::anonymize)
+ it?.transcriptId?.value = it.transcriptId?.value?.let(::anonymize)
+ }
+ it.results?.tmb?.id = it.results?.tmb?.id?.let(::anonymize)
+ it.results?.tmb?.specimen?.id = it.results?.tmb?.specimen?.id?.let(::anonymize)
+
+ it.results?.brcaness?.id = it.results?.brcaness?.id?.let(::anonymize)
+ it.results?.brcaness?.specimen?.id = it.results?.brcaness?.specimen?.id?.let(::anonymize)
+ it.results?.copyNumberVariants?.forEach { it -> it?.id = it.id?.let(::anonymize) }
+ it.results?.hrdScore?.id = it.results?.hrdScore?.id?.let(::anonymize)
+ it.results?.hrdScore?.specimen?.id = it.results?.hrdScore?.specimen?.id?.let(::anonymize)
+ it.results?.rnaSeqs?.forEach { it -> it?.id = it.id?.let(::anonymize) }
+ it.results?.dnaFusions?.forEach { it -> it?.id = it.id?.let(::anonymize) }
+ it.specimen?.id = it?.specimen?.id?.let(::anonymize)
+
+ }
+
+ this.histologyReports?.forEach { it ->
+ it.id = it?.id?.let(::anonymize)
+ it.results?.tumorCellContent?.id = it.results?.tumorCellContent?.id?.let(::anonymize)
+ it.results?.tumorCellContent?.specimen?.id =
+ it.results?.tumorCellContent?.specimen?.id?.let(::anonymize)
+
+ it.results?.tumorMorphology?.id = it.results?.tumorMorphology?.id?.let(::anonymize)
+ it.results?.tumorMorphology?.specimen?.id =
+ it.results?.tumorMorphology?.specimen?.id?.let(::anonymize)
+ it.specimen?.id = it.specimen?.id?.let(::anonymize)
+
+ }
+ this.claimResponses?.forEach { it ->
+ it.id = it?.id?.let(::anonymize)
+ it.claim?.id = it.claim?.id?.let(::anonymize)
+ }
+ this.claims?.forEach { it ->
+
+ it.id = it?.id?.let(::anonymize)
+ it.recommendation?.id = it.recommendation?.id?.let(::anonymize)
+
+ }
+ this.familyMemberHistories?.forEach { it -> it.id = it?.id?.let(::anonymize) }
+ this.guidelineProcedures?.forEach { it ->
+ it.id = it?.id?.let(::anonymize)
+ it.reason?.id = it.reason?.id?.let(::anonymize)
+ it.basedOn?.id = it.basedOn?.id?.let(::anonymize)
+
+ }
+
+ this.guidelineTherapies?.forEach { it ->
+ it.id = it?.id?.let(::anonymize)
+ it.reason?.id = it.reason?.id?.let(::anonymize)
+ it.basedOn?.id = it.basedOn?.id?.let(::anonymize)
+ }
+ this.ihcReports?.forEach { it ->
+ it.id = it?.id?.let(::anonymize)
+ it.specimen?.id = it.specimen?.id?.let(::anonymize)
+ it.results.proteinExpression.forEach { it -> it?.id = it.id.let(::anonymize) }
+ }
+
+ this.msiFindings?.forEach { it ->
+
+ it.id = it?.id?.let(::anonymize)
+ it.specimen?.id = it.specimen?.id?.let(::anonymize)
+ }
+
+ this.performanceStatus?.forEach { it -> it.id = it?.id?.let(::anonymize) }
+
+ this.priorDiagnosticReports?.forEach { it ->
+
+ it.id = it?.id?.let(::anonymize)
+ it.specimen?.id = it.specimen?.id?.let(::anonymize)
+ }
+
+ this.specimens?.forEach { it ->
+
+ it.id = it?.id?.let(::anonymize)
+ it.diagnosis?.id = it.diagnosis?.id?.let(::anonymize)
+
+ }
+
+ this.systemicTherapies?.forEach { it ->
+
+ it.history?.forEach { it ->
+
+ it.id = it?.id?.let(::anonymize)
+ it.reason?.id = it.reason?.id?.let(::anonymize)
+ it.basedOn?.id = it.basedOn?.id?.let(::anonymize)
+ }
+
+ }
}
fun Mtb.ensureMetaDataIsInitialized() {
@@ -137,15 +302,13 @@ fun Mtb.ensureMetaDataIsInitialized() {
if (this.metadata.modelProjectConsent == null) {
this.metadata.modelProjectConsent = ModelProjectConsent()
this.metadata.modelProjectConsent.provisions = mutableListOf()
- } else
- if (this.metadata.modelProjectConsent.provisions != null) {
- // make sure list can be changed
- this.metadata.modelProjectConsent.provisions =
- this.metadata.modelProjectConsent.provisions.toMutableList()
- }
+ } else if (this.metadata.modelProjectConsent.provisions != null) {
+ // make sure list can be changed
+ this.metadata.modelProjectConsent.provisions =
+ this.metadata.modelProjectConsent.provisions.toMutableList()
+ }
}
-infix fun Mtb.addGenomDeTan(pseudonymizeService: PseudonymizeService)
-{
+infix fun Mtb.addGenomDeTan(pseudonymizeService: PseudonymizeService) {
this.metadata.transferTan = pseudonymizeService.genomDeTan(PatientId(this.patient.id))
}
diff --git a/src/test/kotlin/dev/dnpm/etl/processor/pseudonym/ExtensionsTest.kt b/src/test/kotlin/dev/dnpm/etl/processor/pseudonym/ExtensionsTest.kt
index 5955263..58405cd 100644
--- a/src/test/kotlin/dev/dnpm/etl/processor/pseudonym/ExtensionsTest.kt
+++ b/src/test/kotlin/dev/dnpm/etl/processor/pseudonym/ExtensionsTest.kt
@@ -138,5 +138,63 @@ class ExtensionsTest {
assertThat(mtbFile.episodesOfCare).hasSize(1)
assertThat(mtbFile.episodesOfCare.map { it.id }).isNotNull
}
+
+ @Test
+ fun shouldNotContainAnyUuidAfterRehashingOfIds(@Mock pseudonymizeService: PseudonymizeService) {
+ doAnswer {
+ it.arguments[0]
+ "PSEUDO-ID"
+ }.whenever(pseudonymizeService).patientPseudonym(anyValueClass())
+
+ doAnswer {
+ "TESTDOMAIN"
+ }.whenever(pseudonymizeService).prefix()
+
+ val mtbFile = fakeMtbFile()
+
+ /**
+ * replace hex values with random long, so our test does not match false positives
+ */
+ mtbFile.ngsReports.forEach { report ->
+ report.results.simpleVariants.forEach { simpleVariant ->
+ simpleVariant.externalIds.forEach { extIdValue ->
+ extIdValue.value =
+ Math.random().toLong().toString()
+ }
+ }
+ }
+ mtbFile.ngsReports.forEach { report ->
+ report.results.rnaFusions.forEach { simpleVariant ->
+ simpleVariant.externalIds.forEach { extIdValue ->
+ extIdValue.value =
+ Math.random().toLong().toString()
+ }
+ simpleVariant.fusionPartner3Prime?.transcriptId?.value =
+ Math.random().toLong().toString()
+ simpleVariant.fusionPartner5Prime?.transcriptId?.value =
+ Math.random().toLong().toString()
+ simpleVariant.externalIds?.forEach { it ->
+ it?.value = Math.random().toLong().toString()
+ }
+ }
+ }
+
+ mtbFile.pseudonymizeWith(pseudonymizeService)
+ mtbFile.anonymizeContentWith(pseudonymizeService)
+
+ val pattern =
+ "\"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\"".toRegex()
+ .toPattern()
+ val input = mtbFile.serialized()
+ val matcher = pattern.matcher(input)
+
+ assertThrows<IllegalStateException> {
+ matcher.find()
+ val posSt = "check at pos: " + matcher.start().toString() + ", " + matcher.end()
+ println(posSt + " with " + matcher.group())
+ }.also {
+ assertThat(it.message).isEqualTo("No match found")
+ }
+ }
}
}