<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
<!DOCTYPE GmsArticle SYSTEM "http://www.egms.de/dtd/2.0.34/GmsArticle.dtd">
<GmsArticle xmlns:xlink="http://www.w3.org/1999/xlink">
  <MetaData>
    <Identifier>25gmds064</Identifier>
    <IdentifierDoi>10.3205/25gmds064</IdentifierDoi>
    <IdentifierUrn>urn:nbn:de:0183-25gmds0641</IdentifierUrn>
    <ArticleType>Meeting Abstract</ArticleType>
    <TitleGroup>
      <Title language="en">Transforming Oncology Data for Artificial Intelligence: Leveraging oBDS for Scalable Medical Research</Title>
    </TitleGroup>
    <CreatorList>
      <Creator>
        <PersonNames>
          <Lastname>Warnecke</Lastname>
          <LastnameHeading>Warnecke</LastnameHeading>
          <Firstname>Yannik</Firstname>
          <Initials>Y</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Institute of Medical Informatics, University of M&#252;nster, M&#252;nster, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Kuhn</Lastname>
          <LastnameHeading>Kuhn</LastnameHeading>
          <Firstname>Martin</Firstname>
          <Initials>M</Initials>
        </PersonNames>
        <Address>
          <Affiliation>German Research Center for Artificial Intelligence (DFKI), Branch Trier, Trier, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Bley</Lastname>
          <LastnameHeading>Bley</LastnameHeading>
          <Firstname>Laura Isabell</Firstname>
          <Initials>LI</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Department of Dermatology, University Clinic M&#252;nster, M&#252;nster, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Preciado-Marquez</Lastname>
          <LastnameHeading>Preciado-Marquez</LastnameHeading>
          <Firstname>Daniel</Firstname>
          <Initials>D</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Institute of Medical Informatics, University of M&#252;nster, M&#252;nster, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Otte</Lastname>
          <LastnameHeading>Otte</LastnameHeading>
          <Firstname>Karen</Firstname>
          <Initials>K</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Medical Informatics Group, Berlin Institute of Health at Charit&#233; &#8211; Universit&#228;tsmedizin Berlin, Berlin, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Pasquier</Lastname>
          <LastnameHeading>Pasquier</LastnameHeading>
          <Firstname>Anna</Firstname>
          <Initials>A</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Medical Informatics Group, Berlin Institute of Health at Charit&#233; &#8211; Universit&#228;tsmedizin Berlin, Berlin, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Gr&#252;ger</Lastname>
          <LastnameHeading>Gr&#252;ger</LastnameHeading>
          <Firstname>Joscha</Firstname>
          <Initials>J</Initials>
        </PersonNames>
        <Address>
          <Affiliation>German Research Center for Artificial Intelligence (DFKI), Branch Trier, Trier, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Weishaupt</Lastname>
          <LastnameHeading>Weishaupt</LastnameHeading>
          <Firstname>Carsten</Firstname>
          <Initials>C</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Department of Dermatology, University Clinic M&#252;nster, M&#252;nster, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Bergmann</Lastname>
          <LastnameHeading>Bergmann</LastnameHeading>
          <Firstname>Ralph</Firstname>
          <Initials>R</Initials>
        </PersonNames>
        <Address>
          <Affiliation>German Research Center for Artificial Intelligence (DFKI), Branch Trier, Trier, Germany</Affiliation>
          <Affiliation>Business Information Systems II, University of Trier, Trier, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Braun</Lastname>
          <LastnameHeading>Braun</LastnameHeading>
          <Firstname>Stephan Alexander</Firstname>
          <Initials>SA</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Department of Dermatology, University Clinic M&#252;nster, M&#252;nster, Germany</Affiliation>
          <Affiliation>Department of Dermatology, Medical Faculty, Heinrich Heine University D&#252;sseldorf, D&#252;sseldorf, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Prasser</Lastname>
          <LastnameHeading>Prasser</LastnameHeading>
          <Firstname>Fabian</Firstname>
          <Initials>F</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Medical Informatics Group, Berlin Institute of Health at Charit&#233; &#8211; Universit&#228;tsmedizin Berlin, Berlin, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Heider</Lastname>
          <LastnameHeading>Heider</LastnameHeading>
          <Firstname>Dominik</Firstname>
          <Initials>D</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Institute of Medical Informatics, University of M&#252;nster, M&#252;nster, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Storck</Lastname>
          <LastnameHeading>Storck</LastnameHeading>
          <Firstname>Michael</Firstname>
          <Initials>M</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Institute of Medical Informatics, University of M&#252;nster, M&#252;nster, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
    </CreatorList>
    <PublisherList>
      <Publisher>
        <Corporation>
          <Corporatename>German Medical Science GMS Publishing House</Corporatename>
        </Corporation>
        <Address>D&#252;sseldorf</Address>
      </Publisher>
    </PublisherList>
    <SubjectGroup>
      <SubjectheadingDDB>610</SubjectheadingDDB>
      <Keyword language="en">artificial intelligence (AI)</Keyword>
      <Keyword language="en">data standardization and transformation</Keyword>
      <Keyword language="en">ETL</Keyword>
      <Keyword language="en">anonymization</Keyword>
      <Keyword language="en">synthetization</Keyword>
    </SubjectGroup>
    <DatePublishedList>
      <DatePublished>20251103</DatePublished>
    </DatePublishedList>
    <Language>engl</Language>
    <License license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
      <AltText language="en">This is an Open Access article distributed under the terms of the Creative Commons Attribution 4.0 License.</AltText>
      <AltText language="de">Dieser Artikel ist ein Open-Access-Artikel und steht unter den Lizenzbedingungen der Creative Commons Attribution 4.0 License (Namensnennung).</AltText>
    </License>
    <SourceGroup>
      <Meeting>
        <MeetingId>M0631</MeetingId>
        <MeetingSequence>064</MeetingSequence>
        <MeetingCorporation>Deutsche Gesellschaft f&#252;r Medizinische Informatik, Biometrie und Epidemiologie</MeetingCorporation>
        <MeetingName>70. Jahrestagung der Deutschen Gesellschaft f&#252;r Medizinische Informatik, Biometrie und Epidemiologie e. V. (GMDS)</MeetingName>
        <MeetingTitle></MeetingTitle>
        <MeetingSession>V: Machine learning and AI applications 1</MeetingSession>
        <MeetingCity>Jena</MeetingCity>
        <MeetingDate>
          <DateFrom>20250907</DateFrom>
          <DateTo>20250911</DateTo>
        </MeetingDate>
      </Meeting>
    </SourceGroup>
    <ArticleNo>Abstr. 139</ArticleNo>
  </MetaData>
  <OrigData>
    <TextBlock name="Text" linked="yes">
      <MainHeadline>Text</MainHeadline><Pgraph><Mark1>Introduction:</Mark1> The Oncology Base Dataset (oBDS) is a standardized reporting framework used in Germany for documenting oncology cases, playing a crucial role in ensuring data consistency and availability for cancer research and treatment planning <TextLink reference="1"></TextLink>. However, leveraging oBDS data for predictive modeling and decision support poses challenges due to its hierarchical format being optimized for reporting, and thus, it is not directly usable for most artificial intelligence (AI) methods <TextLink reference="2"></TextLink>. Moreover, ensuring the wide availability of data from different institutions remains a significant legal challenge in Germany <TextLink reference="3"></TextLink>. In this study, we investigated the general potential of utilizing the oBDS for AI-driven research as part of the project &#39;<Mark2>KI-basierte Anonymisierung in der Medizin</Mark2>&#39; (KI-AIM), which explores the application of anonymization and synthetization techniques in healthcare to increase data availability for AI research.</Pgraph><Pgraph><Mark1>Methods:</Mark1> To overcome the limitations of oBDS for AI applications, we implemented an <Mark2>Extract, Transform, Load</Mark2> (ETL) process, that restructures the dataset into a two-dimensional format, making it more suitable for machine learning. The ETL process utilizes the Last Value Carried Forward (LVCF) method to fill missing values with the most recent available data point, ensuring completeness and consistency in patient records <TextLink reference="4"></TextLink>. This step is critical for medical datasets where data collection intervals often vary across cases. Next, we evaluated the utility of the transformed dataset using a straightforward AI analysis. This involved training foundational machine learning and deep learning models on the processed oBDS data to classify the current tumor stage, evaluating their performance using key metrics such as accuracy and F1-score. </Pgraph><Pgraph><Mark1>Results:</Mark1> Our analysis demonstrated that the transformed oBDS dataset effectively supports AI applications. Across all evaluated classifiers, including Support Vector Classifier (SVC), Logistic Regression (LR), K-Nearest Neighbors (KNN), Random Forest Classifier (RFC), Decision Tree Classifier (DTC) and Multi-Layer-Perceptron (MLP) we observed average Accuracy of 79.90&#37; and average F1-Score of 79.12&#37;, with individual models achieving Accuracy between 65.25&#37; (LR) and 93.51&#37; (RFC) and F1-Scores between 63.58&#37; (LR) and 93.41&#37; (RFC). </Pgraph><Pgraph><Mark1>Conclusion:</Mark1> The successful transformation of oBDS data into an AI-compatible format paves the way for more advanced analyses and shows the potential of its application for oncology research. This process is especially important in the context of the Medical Informatics Initiative (MII), as it ensures that the newly established Oncology Core Dataset Module becomes usable for AI applications. As part of our ongoing project, we plan to integrate this dataset with anonymization and synthetization techniques to create a robust and privacy-preserving framework for AI-driven oncology research.  Specifically, we will explore both the use of k-anonymity and the application of generative models to synthesize patient data, creating realistic and anonymous datasets for training and validating AI models. By combining these approaches, we aim to develop a sophisticated medical use case that demonstrates the potential of anonymized and synthesized data for improving cancer diagnosis, treatment, and patient outcomes. Future work will focus on evaluating the effectiveness of these techniques in a real-world setting, with the goal of translating our findings into clinical practice.</Pgraph><Pgraph>The authors declare that they have no competing interests.</Pgraph><Pgraph>The authors declare that an ethics committee vote is not required.</Pgraph></TextBlock>
    <References linked="yes">
      <Reference refNo="1">
        <RefAuthor>Klinkhammer-Schalke M</RefAuthor>
        <RefAuthor>Kleihues van Tol K</RefAuthor>
        <RefAuthor>Jurkschat R</RefAuthor>
        <RefAuthor>Meyer M</RefAuthor>
        <RefAuthor>Katalinic A</RefAuthor>
        <RefAuthor>Holleczek B</RefAuthor>
        <RefAuthor>Braulke F</RefAuthor>
        <RefAuthor>Schneider C</RefAuthor>
        <RefAuthor>Franke B</RefAuthor>
        <RefAuthor>Hoffmann W</RefAuthor>
        <RefAuthor>Nennecke A</RefAuthor>
        <RefAuthor> Mitglieder der AG Daten</RefAuthor>
        <RefTitle>Der einheitliche onkologische Basisdatensatz (oBDS)</RefTitle>
        <RefYear>2024</RefYear>
        <RefJournal>Forum</RefJournal>
        <RefPage>191&#8211;195</RefPage>
        <RefTotal>Klinkhammer-Schalke M, Kleihues van Tol K, Jurkschat R, Meyer M, Katalinic A, Holleczek B, Braulke F, Schneider C, Franke B, Hoffmann W, Nennecke A; Mitglieder der AG Daten. Der einheitliche onkologische Basisdatensatz (oBDS). Forum. 2024;39:191&#8211;195. DOI: 10.1007&#47;s12312-024-01320-1</RefTotal>
        <RefLink>http:&#47;&#47;dx.doi.org&#47;10.1007&#47;s12312-024-01320-1</RefLink>
      </Reference>
      <Reference refNo="2">
        <RefAuthor>Reddy GT</RefAuthor>
        <RefAuthor>Reddy MPK</RefAuthor>
        <RefAuthor>Lakshmanna K</RefAuthor>
        <RefAuthor>Kaluri R</RefAuthor>
        <RefAuthor>Rajput DS</RefAuthor>
        <RefAuthor>Srivastava G</RefAuthor>
        <RefAuthor>Baker T</RefAuthor>
        <RefTitle>Analysis of Dimensionality Reduction Techniques on Big Data</RefTitle>
        <RefYear>2020</RefYear>
        <RefJournal>IEEE Access</RefJournal>
        <RefPage>54776&#8211;54788</RefPage>
        <RefTotal>Reddy GT, Reddy MPK, Lakshmanna K, Kaluri R, Rajput DS, Srivastava G, Baker T. Analysis of Dimensionality Reduction Techniques on Big Data. IEEE Access. 2020;8:54776&#8211;54788. DOI: 10.1109&#47;ACCESS.2020.2980942</RefTotal>
        <RefLink>http:&#47;&#47;dx.doi.org&#47;10.1109&#47;ACCESS.2020.2980942</RefLink>
      </Reference>
      <Reference refNo="3">
        <RefAuthor>Eger T</RefAuthor>
        <RefAuthor>Scheufen M</RefAuthor>
        <RefTitle>Data Sharing in Deutschland: Theorie, Empirie und europ&#228;ische Gesetzgebung</RefTitle>
        <RefYear>2024</RefYear>
        <RefJournal>Wirtschaftsdienst</RefJournal>
        <RefPage>725&#8211;729</RefPage>
        <RefTotal>Eger T, Scheufen M. Data Sharing in Deutschland: Theorie, Empirie und europ&#228;ische Gesetzgebung. Wirtschaftsdienst. 2024;104:725&#8211;729. DOI: 10.2478&#47;wd-2024-0183</RefTotal>
        <RefLink>http:&#47;&#47;dx.doi.org&#47;10.2478&#47;wd-2024-0183</RefLink>
      </Reference>
      <Reference refNo="4">
        <RefAuthor>Twisk J</RefAuthor>
        <RefAuthor>de Vente W</RefAuthor>
        <RefTitle>Attrition in longitudinal studies. How to deal with missing data</RefTitle>
        <RefYear>2002</RefYear>
        <RefJournal>J Clin Epidemiol</RefJournal>
        <RefPage>329&#8211;337</RefPage>
        <RefTotal>Twisk J, de Vente W. Attrition in longitudinal studies. How to deal with missing data. J Clin Epidemiol. 2002;55:329&#8211;337. DOI: 10.1016&#47;s0895-4356(01)00476-0</RefTotal>
        <RefLink>http:&#47;&#47;dx.doi.org&#47;10.1016&#47;s0895-4356(01)00476-0</RefLink>
      </Reference>
    </References>
    <Media>
      <Tables>
        <NoOfTables>0</NoOfTables>
      </Tables>
      <Figures>
        <NoOfPictures>0</NoOfPictures>
      </Figures>
      <InlineFigures>
        <NoOfPictures>0</NoOfPictures>
      </InlineFigures>
      <Attachments>
        <NoOfAttachments>0</NoOfAttachments>
      </Attachments>
    </Media>
  </OrigData>
</GmsArticle>