<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
<!DOCTYPE GmsArticle SYSTEM "http://www.egms.de/dtd/2.0.34/GmsArticle.dtd">
<GmsArticle xmlns:xlink="http://www.w3.org/1999/xlink">
  <MetaData>
    <Identifier>25gmds042</Identifier>
    <IdentifierDoi>10.3205/25gmds042</IdentifierDoi>
    <IdentifierUrn>urn:nbn:de:0183-25gmds0428</IdentifierUrn>
    <ArticleType>Meeting Abstract</ArticleType>
    <TitleGroup>
      <Title language="en">Privacy-preserving federated analysis and harmonisation of heterogeneous datasets in NFDI4Health</Title>
    </TitleGroup>
    <CreatorList>
      <Creator>
        <PersonNames>
          <Lastname>Siampani</Lastname>
          <LastnameHeading>Siampani</LastnameHeading>
          <Firstname>Sofia Maria</Firstname>
          <Initials>SM</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Max Delbr&#252;ck Center for Molecular Medicine in the Helmholtz Association (MDC), Molecular Epidemiology Research Group, Berlin, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Schwarz</Lastname>
          <LastnameHeading>Schwarz</LastnameHeading>
          <Firstname>Florian</Firstname>
          <Initials>F</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Department of Molecular Epidemiology, German Institute of Human Nutrition Potsdam-Rehbruecke, Nuthetal, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Jannasch</Lastname>
          <LastnameHeading>Jannasch</LastnameHeading>
          <Firstname>Franziska</Firstname>
          <Initials>F</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Department of Molecular Epidemiology, German Institute of Human Nutrition Potsdam-Rehbruecke, Nuthetal, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Osei</Lastname>
          <LastnameHeading>Osei</LastnameHeading>
          <Firstname>Tracy Bonsu</Firstname>
          <Initials>TB</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Max Delbr&#252;ck Center for Molecular Medicine in the Helmholtz Association (MDC), Molecular Epidemiology Research Group, Berlin, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Perrar</Lastname>
          <LastnameHeading>Perrar</LastnameHeading>
          <Firstname>Ines</Firstname>
          <Initials>I</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Department of Nutrition and Food Sciences, Nutritional Epidemiology, University of Bonn, Bonn, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Schulze</Lastname>
          <LastnameHeading>Schulze</LastnameHeading>
          <Firstname>Matthias B.</Firstname>
          <Initials>MB</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Department of Molecular Epidemiology, German Institute of Human Nutrition Potsdam-Rehbruecke, Nuthetal, Germany</Affiliation>
          <Affiliation>Institute of Nutritional Science, University of Potsdam, Nuthetal, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>N&#246;thlings</Lastname>
          <LastnameHeading>N&#246;thlings</LastnameHeading>
          <Firstname>Ute</Firstname>
          <Initials>U</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Department of Nutrition and Food Sciences, Nutritional Epidemiology, University of Bonn, Bonn, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Nimptsch</Lastname>
          <LastnameHeading>Nimptsch</LastnameHeading>
          <Firstname>Katharina</Firstname>
          <Initials>K</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Max Delbr&#252;ck Center for Molecular Medicine in the Helmholtz Association (MDC), Molecular Epidemiology Research Group, Berlin, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Pischon</Lastname>
          <LastnameHeading>Pischon</LastnameHeading>
          <Firstname>Tobias</Firstname>
          <Initials>T</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Max Delbr&#252;ck Center for Molecular Medicine in the Helmholtz Association (MDC), Molecular Epidemiology Research Group, Berlin, Germany</Affiliation>
          <Affiliation>Charit&#233; &#8211; Universit&#228;tsmedizin Berlin, Corporate Member of Freie Universit&#228;t Berlin, Humboldt Universit&#228;t zu Berlin, Berlin Institute of Health, Berlin, Germany</Affiliation>
          <Affiliation>Biobank Technology Platform, Max Delbr&#252;ck Center for Molecular Medicine in the Helmholtz Association (MDC), Berlin, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
    </CreatorList>
    <PublisherList>
      <Publisher>
        <Corporation>
          <Corporatename>German Medical Science GMS Publishing House</Corporatename>
        </Corporation>
        <Address>D&#252;sseldorf</Address>
      </Publisher>
    </PublisherList>
    <SubjectGroup>
      <SubjectheadingDDB>610</SubjectheadingDDB>
      <Keyword language="en">NFDI4Health</Keyword>
      <Keyword language="en">DataSHIELD</Keyword>
      <Keyword language="en">data harmonisation</Keyword>
    </SubjectGroup>
    <DatePublishedList>
      <DatePublished>20251103</DatePublished>
    </DatePublishedList>
    <Language>engl</Language>
    <License license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
      <AltText language="en">This is an Open Access article distributed under the terms of the Creative Commons Attribution 4.0 License.</AltText>
      <AltText language="de">Dieser Artikel ist ein Open-Access-Artikel und steht unter den Lizenzbedingungen der Creative Commons Attribution 4.0 License (Namensnennung).</AltText>
    </License>
    <SourceGroup>
      <Meeting>
        <MeetingId>M0631</MeetingId>
        <MeetingSequence>042</MeetingSequence>
        <MeetingCorporation>Deutsche Gesellschaft f&#252;r Medizinische Informatik, Biometrie und Epidemiologie</MeetingCorporation>
        <MeetingName>70. Jahrestagung der Deutschen Gesellschaft f&#252;r Medizinische Informatik, Biometrie und Epidemiologie e. V. (GMDS)</MeetingName>
        <MeetingTitle></MeetingTitle>
        <MeetingSession>V: Gesundheitsdatennutzung &#8211; distributed &#38; federated analyses</MeetingSession>
        <MeetingCity>Jena</MeetingCity>
        <MeetingDate>
          <DateFrom>20250907</DateFrom>
          <DateTo>20250911</DateTo>
        </MeetingDate>
      </Meeting>
    </SourceGroup>
    <ArticleNo>Abstr. 83</ArticleNo>
    <Fundings>
      <Funding fundId="442326535">Deutsche Forschungsgemeinschaft (DFG)</Funding>
    </Fundings>
  </MetaData>
  <OrigData>
    <TextBlock name="Text" linked="yes">
      <MainHeadline>Text</MainHeadline><Pgraph><Mark1>Introduction: </Mark1>In Germany, cohort studies produce phenotypically rich but heterogeneous datasets that capture various aspects of the population&#39;s health and lifestyle. Combining and jointly analysing these datasets may enable large scientific opportunities by enhancement of statistical power, broadening generalisability, and examination of subgroups and minorities. However, data protection and governance regulations as well as heterogeneity in variables across cohorts present significant challenges. NFDI4Health, the national research data infrastructure for personal health data, addresses these challenges through the implementation of DataSHIELD <TextLink reference="1"></TextLink>, a federated analysis framework, combined with a central access point. This enables a sustainable way for secure, privacy-preserving analyses without transferring or sharing individual-level data. To make datasets compatible for joint analysis, a data harmonisation concept has been developed using Rmonize <TextLink reference="2"></TextLink>, a package by Maelstrom Research that streamlines the process in a semi-automatic way. The presentation will cover the concept of federated analysis and harmonisation, along with its technical implementation within NFDI4Health.</Pgraph><Pgraph><Mark1>Methods:</Mark1> We implemented DataSHIELD&#39;s client-server architecture at participating data holding organisations (DHO), where individual-level data of cohort studies remains securely stored on Opal servers. Analysts interact with a central analysis environment hosted at the Max Delbr&#252;ck Center that is connected to the DHOs, issuing commands that execute locally across all connected servers. The framework employs automated disclosure controls to ensure only non-disclosive summary statistics are returned. In addition, a data harmonisation workflow has been set-up using Rmonize. This process involves collecting metadata, defining a target dataschema, and developing harmonisation algorithms for each study-specific variable. DHOs execute these algorithms locally without exposing individual-level data. Researchers receive summary reports with descriptive statistics to validate the outcomes. The harmonised datasets are then used in DataSHIELD. </Pgraph><Pgraph><Mark1>Results:</Mark1> We have successfully implemented and expanded the DataSHIELD network across Germany, enabling DHOs to host data within the federated environment. Currently, nine nodes are active, covering eleven studies. To extend DataSHIELD functionality, we developed R packages such as dsClusterAnalysis and dsSupportClient. </Pgraph><Pgraph>In parallel, we implemented the harmonisation workflow to standardise epidemiological variables (e.g., anthropometric measures, dietary patterns and chronic disease data) across cohort studies. So far, we have defined the harmonisation potential for 215 target variables across 5 studies. This experience demonstrated successful alignment for a substantial proportion of targeted variables (52&#37; complete or partial), enabling their reuse in federated analyses. Challenges included managing variable granularity, defining plausible value ranges for quality checks and ensuring robust testing to minimise workload for DHOs. </Pgraph><Pgraph><Mark1>Conclusion:</Mark1> The combination of DataSHIELD for federated analysis with a central analysis environment and implementation of a semi-automatic harmonisation concept provides a sustainable, privacy-preserving solution for collaborative health research that allows the combination of heterogenous datasets. By ensuring data compatibility and secure analysis workflows, this approach tackles key challenges in multi-study projects. Future work will focus on expanding the DataSHIELD network and the harmonisation service within NFDI4Health.</Pgraph><Pgraph><Mark1>Acknowledgements:</Mark1> This work was done as part of the NFDI4Health Consortium (<Hyperlink href="https:&#47;&#47;www.nfdi4health.de&#47;">https:&#47;&#47;www.nfdi4health.de&#47;</Hyperlink>). We gratefully acknowledge the financial support of the Deutsche Forschungsgemeinschaft (DFG, German Research Foundation) &#8211; project number 442326535.</Pgraph><Pgraph>The authors declare that they have no competing interests.</Pgraph><Pgraph>The authors declare that an ethics committee vote is not required.</Pgraph></TextBlock>
    <References linked="yes">
      <Reference refNo="1">
        <RefAuthor>Gaye A</RefAuthor>
        <RefAuthor>Marcon Y</RefAuthor>
        <RefAuthor>Isaeva J</RefAuthor>
        <RefAuthor>LaFlamme P</RefAuthor>
        <RefAuthor>Turner A</RefAuthor>
        <RefAuthor>Jones EM</RefAuthor>
        <RefAuthor></RefAuthor>
        <RefTitle>DataSHIELD: Taking the analysis to the data, not the data to the analysis</RefTitle>
        <RefYear>2014</RefYear>
        <RefJournal>Int J Epidemiol</RefJournal>
        <RefPage>1929&#8211;44</RefPage>
        <RefTotal>Gaye A, Marcon Y, Isaeva J, LaFlamme P, Turner A, Jones EM, et al. DataSHIELD: Taking the analysis to the data, not the data to the analysis. Int J Epidemiol. 2014;43(6):1929&#8211;44.</RefTotal>
      </Reference>
      <Reference refNo="2">
        <RefAuthor>Anonym</RefAuthor>
        <RefTitle></RefTitle>
        <RefYear>2023</RefYear>
        <RefBookTitle>Rmonize Package Documentation. Rmonize: A package for harmonizing epidemiological datasets in R</RefBookTitle>
        <RefPage></RefPage>
        <RefTotal>Rmonize Package Documentation. Rmonize: A package for harmonizing epidemiological datasets in R. 2023 &#91;cited 2025 Apr 9&#93;. Available from: https:&#47;&#47;cran.r-project.org&#47;package&#61;Rmonize</RefTotal>
        <RefLink>https:&#47;&#47;cran.r-project.org&#47;package&#61;Rmonize</RefLink>
      </Reference>
    </References>
    <Media>
      <Tables>
        <NoOfTables>0</NoOfTables>
      </Tables>
      <Figures>
        <NoOfPictures>0</NoOfPictures>
      </Figures>
      <InlineFigures>
        <NoOfPictures>0</NoOfPictures>
      </InlineFigures>
      <Attachments>
        <NoOfAttachments>0</NoOfAttachments>
      </Attachments>
    </Media>
  </OrigData>
</GmsArticle>