<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
<!DOCTYPE GmsArticle SYSTEM "http://www.egms.de/dtd/2.0.34/GmsArticle.dtd">
<GmsArticle xmlns:xlink="http://www.w3.org/1999/xlink">
  <MetaData>
    <Identifier>25gmds070</Identifier>
    <IdentifierDoi>10.3205/25gmds070</IdentifierDoi>
    <IdentifierUrn>urn:nbn:de:0183-25gmds0703</IdentifierUrn>
    <ArticleType>Meeting Abstract</ArticleType>
    <TitleGroup>
      <Title language="en">Real-world benchmarking of statistical software for feature selection in longitudinal biomedical data</Title>
    </TitleGroup>
    <CreatorList>
      <Creator>
        <PersonNames>
          <Lastname>Gieswinkel</Lastname>
          <LastnameHeading>Gieswinkel</LastnameHeading>
          <Firstname>Alexander</Firstname>
          <Initials>A</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Preventive Cardiology and Preventive Medicine, Department of Cardiology, University Medical Center of the Johannes Gutenberg University Mainz, Mainz, Germany</Affiliation>
          <Affiliation>Institute of Mathematics, Johannes Gutenberg University Mainz, Mainz, Germany</Affiliation>
          <Affiliation>German Center for Cardiovascular Research (DZHK), partner site Rhine Main, Mainz, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Buch</Lastname>
          <LastnameHeading>Buch</LastnameHeading>
          <Firstname>Gregor</Firstname>
          <Initials>G</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Preventive Cardiology and Preventive Medicine, Department of Cardiology, University Medical Center of the Johannes Gutenberg University Mainz, Mainz, Germany</Affiliation>
          <Affiliation>German Center for Cardiovascular Research (DZHK), partner site Rhine Main, Mainz, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>ten Cate</Lastname>
          <LastnameHeading>ten Cate</LastnameHeading>
          <Firstname>Vincent</Firstname>
          <Initials>V</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Preventive Cardiology and Preventive Medicine, Department of Cardiology, University Medical Center of the Johannes Gutenberg University Mainz, Mainz, Germany</Affiliation>
          <Affiliation>German Center for Cardiovascular Research (DZHK), partner site Rhine Main, Mainz, Germany</Affiliation>
          <Affiliation>Clinical Epidemiology and Systems Medicine, Center for Thrombosis and Hemostasis, University Medical Center of the Johannes Gutenberg University Mainz, Mainz, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>G&#252;l</Lastname>
          <LastnameHeading>G&#252;l</LastnameHeading>
          <Firstname>G&#246;khan</Firstname>
          <Initials>G</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Preventive Cardiology and Preventive Medicine, Department of Cardiology, University Medical Center of the Johannes Gutenberg University Mainz, Mainz, Germany</Affiliation>
          <Affiliation>Clinical Epidemiology and Systems Medicine, Center for Thrombosis and Hemostasis, University Medical Center of the Johannes Gutenberg University Mainz, Mainz, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Hartung</Lastname>
          <LastnameHeading>Hartung</LastnameHeading>
          <Firstname>Lisa</Firstname>
          <Initials>L</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Institute of Mathematics, Johannes Gutenberg University Mainz, Mainz, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Wild</Lastname>
          <LastnameHeading>Wild</LastnameHeading>
          <Firstname>Philipp</Firstname>
          <Initials>P</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Preventive Cardiology and Preventive Medicine, Department of Cardiology, University Medical Center of the Johannes Gutenberg University Mainz, Mainz, Germany</Affiliation>
          <Affiliation>German Center for Cardiovascular Research (DZHK), partner site Rhine Main, Mainz, Germany</Affiliation>
          <Affiliation>Clinical Epidemiology and Systems Medicine, Center for Thrombosis and Hemostasis, University Medical Center of the Johannes Gutenberg University Mainz, Mainz, Germany</Affiliation>
          <Affiliation>Institute of Molecular Biology (IMB), Mainz, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
    </CreatorList>
    <PublisherList>
      <Publisher>
        <Corporation>
          <Corporatename>German Medical Science GMS Publishing House</Corporatename>
        </Corporation>
        <Address>D&#252;sseldorf</Address>
      </Publisher>
    </PublisherList>
    <SubjectGroup>
      <SubjectheadingDDB>610</SubjectheadingDDB>
      <Keyword language="en">longitudinal data</Keyword>
      <Keyword language="en">simulation study</Keyword>
      <Keyword language="en">systematic review</Keyword>
      <Keyword language="en">statistical software</Keyword>
      <Keyword language="en">feature selection</Keyword>
    </SubjectGroup>
    <DatePublishedList>
      <DatePublished>20251103</DatePublished>
    </DatePublishedList>
    <Language>engl</Language>
    <License license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
      <AltText language="en">This is an Open Access article distributed under the terms of the Creative Commons Attribution 4.0 License.</AltText>
      <AltText language="de">Dieser Artikel ist ein Open-Access-Artikel und steht unter den Lizenzbedingungen der Creative Commons Attribution 4.0 License (Namensnennung).</AltText>
    </License>
    <SourceGroup>
      <Meeting>
        <MeetingId>M0631</MeetingId>
        <MeetingSequence>070</MeetingSequence>
        <MeetingCorporation>Deutsche Gesellschaft f&#252;r Medizinische Informatik, Biometrie und Epidemiologie</MeetingCorporation>
        <MeetingName>70. Jahrestagung der Deutschen Gesellschaft f&#252;r Medizinische Informatik, Biometrie und Epidemiologie e. V. (GMDS)</MeetingName>
        <MeetingTitle></MeetingTitle>
        <MeetingSession>V: Machine learning and AI applications 2</MeetingSession>
        <MeetingCity>Jena</MeetingCity>
        <MeetingDate>
          <DateFrom>20250907</DateFrom>
          <DateTo>20250911</DateTo>
        </MeetingDate>
      </Meeting>
    </SourceGroup>
    <ArticleNo>Abstr. 246</ArticleNo>
  </MetaData>
  <OrigData>
    <TextBlock name="Text" linked="yes">
      <MainHeadline>Text</MainHeadline><Pgraph><Mark1>Introduction:</Mark1> Recent advances in biochemical technology enable increasing availability of high-dimensional omics data for multiple time points in prospective cohort studies. Supervised feature selection is often required in these settings to overcome dimensionality problems and to achieve biomedical interpretability. To date, an overview of existing methods for this framework is lacking. This highlights the need for a systematic review and evaluation of this area.</Pgraph><Pgraph><Mark1>Methods:</Mark1> A systematic search of statistical software was conducted to identify appropriate methods. The Comprehensive R Archive Network (CRAN) was examined via the R package <Mark2>packagefinder </Mark2>using a search query containing relevant keywords <TextLink reference="1"></TextLink>. Eligible software was identified by manually screening the package descriptions, and through computational testing with a fixed application example. The inter- and intra-class correlation structure of longitudinal proteomic data was analysed to generate synthetic data for an ADEMP-designed simulation study to support the implication of the findings to real-world cohort data <TextLink reference="2"></TextLink>, <TextLink reference="3"></TextLink>. Feature selection performance of identified methods was evaluated in real-world scenarios, considering varying sample sizes, total number of predictors and true positives, time points and signal-to-noise ratios. Only frequentist implementations <TextLink reference="4"></TextLink> with given default settings were included in the Monte Carlo simulations for a fair comparison. The estimated true positive rate (eTPR) and estimated false discovery rate (eFDR) were chosen as targeted performance measures.</Pgraph><Pgraph><Mark1>Results:</Mark1> Of 21,528 accessible packages on CRAN (status: June 2024), 324 packages with matching keywords in the descriptions were extracted by the search query. Screening of the descriptions identified 45 packages that were then tested in R. Methods for inappropriate settings (N&#61;11), absent variable selection (N&#61;5), not applicable to the predefined testing data (N&#61;4) or other reasons (N&#61;11) were excluded. Six of these remaining 14 methods were based on mixed effects models (<Mark2>buildmer, rpql, splmm, alqrfe, plsmmLasso, glmmLasso</Mark2>), five on generalized estimating equations (<Mark2>sgee, LassoGEE, geeVerse, PGEE, pgee.mixed</Mark2>), two methods were built on Bayesian frameworks (<Mark2>sparsereg, spikeSlabGAM</Mark2>) and one package was modelling time series (<Mark2>midasml</Mark2>). All implementations were able to process continuous outcomes, while only four supported binary outcomes. A total of N&#61;8 frequentist methods with &#8216;ready-to-use&#8217; default settings were considered in the simulation study.</Pgraph><Pgraph>The packages <Mark2>buildmer </Mark2>and <Mark2>pslmmLasso </Mark2>consistently demonstrated an eTPR exceeding 80&#37; while maintaining the eFDR under 20&#37;, across various signal-to-noise settings. By comparison, all other methods underperformed in jointly evaluating both performance metrics. <Mark2>splmm </Mark2>achieved similar eFDR but yielded lower eTPR, whereas <Mark2>geeVerse </Mark2>showed an opposite trend.</Pgraph><Pgraph><Mark1>Discussion:</Mark1> The majority of the available statistical software is based on frequentist techniques, while Bayesian procedures represent a minority. Alternative concepts like tree-based methods are notably absent. There was no evidence of superiority for modern selection techniques such as regularized regression (<Mark2>pslmmLasso</Mark2>) over traditional approaches like stepwise regression (<Mark2>buildmer</Mark2>) for feature selection in longitudinal data.</Pgraph><Pgraph>Future analysis will include Bayesian methods in the simulation study, and provide a more comprehensive examination of the results.</Pgraph><Pgraph><Mark1>Conclusion:</Mark1> A variety of statistical software is available for supervised feature selection in longitudinal biomedical data. Among these, methods based on mixed-effects models appear to outperform generalized estimating equations.</Pgraph><Pgraph>The authors declare that they have no competing interests.</Pgraph><Pgraph>The authors declare that an ethics committee vote is not required.</Pgraph><Pgraph>The contribution has already been presented as a poster at 46th Annual Conference of the International Society for Clinical Biostatistics (ISCB): Systematic review and real life-oriented evaluation on methods for feature selection in longitudinal biomedical data</Pgraph></TextBlock>
    <References linked="yes">
      <Reference refNo="1">
        <RefAuthor>Buch G</RefAuthor>
        <RefAuthor>Wild PS</RefAuthor>
        <RefTitle>Investigating selection strategies for identifying biometrical techniques: a case study on group variable selection methods in R</RefTitle>
        <RefYear>2023</RefYear>
        <RefBookTitle>68. Jahrestagung der Deutschen Gesellschaft f&#252;r Medizinische Informatik, Biometrie und Epidemiologie e. V. (GMDS). Heilbronn, 17.-21.09.2023</RefBookTitle>
        <RefPage>DocAbstr. 303</RefPage>
        <RefTotal>Buch G, Wild PS. Investigating selection strategies for identifying biometrical techniques: a case study on group variable selection methods in R. In: 68. Jahrestagung der Deutschen Gesellschaft f&#252;r Medizinische Informatik, Biometrie und Epidemiologie e. V. (GMDS). Heilbronn, 17.-21.09.2023. D&#252;sseldorf: German Medical Science GMS Publishing House; 2023. DocAbstr. 303 DOI: 10.3205&#47;23gmds081</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.3205&#47;23gmds081</RefLink>
      </Reference>
      <Reference refNo="2">
        <RefAuthor>Morris TP</RefAuthor>
        <RefAuthor>White IR</RefAuthor>
        <RefAuthor>Crowther MJ</RefAuthor>
        <RefTitle>Using simulation studies to evaluate statistical methods</RefTitle>
        <RefYear>2019</RefYear>
        <RefJournal>Stat Med</RefJournal>
        <RefPage>2074-102</RefPage>
        <RefTotal>Morris TP, White IR, Crowther MJ. Using simulation studies to evaluate statistical methods. Stat Med. 2019;38(11):2074-102.</RefTotal>
      </Reference>
      <Reference refNo="3">
        <RefAuthor>Schulz A</RefAuthor>
        <RefAuthor>Zoller D</RefAuthor>
        <RefAuthor>Nickels S</RefAuthor>
        <RefAuthor>Beutel ME</RefAuthor>
        <RefAuthor>Blettner M</RefAuthor>
        <RefAuthor>Wild PS</RefAuthor>
        <RefAuthor></RefAuthor>
        <RefTitle>Simulation of complex data structures for planning of studies with focus on biomarker comparison</RefTitle>
        <RefYear>2017</RefYear>
        <RefJournal>BMC Med Res Methodol</RefJournal>
        <RefPage>90</RefPage>
        <RefTotal>Schulz A, Zoller D, Nickels S, Beutel ME, Blettner M, Wild PS, et al. Simulation of complex data structures for planning of studies with focus on biomarker comparison. BMC Med Res Methodol. 2017;17(1):90.</RefTotal>
      </Reference>
      <Reference refNo="4">
        <RefAuthor>Fitzmaurice GM</RefAuthor>
        <RefAuthor>Laird NM</RefAuthor>
        <RefAuthor>Ware JH</RefAuthor>
        <RefTitle></RefTitle>
        <RefYear>2011</RefYear>
        <RefBookTitle>Applied longitudinal analysis</RefBookTitle>
        <RefPage></RefPage>
        <RefTotal>Fitzmaurice GM, Laird NM, Ware JH. Applied longitudinal analysis. Second Edition. Hoboken, NJ: Wiley-Interscience; 2011.</RefTotal>
      </Reference>
    </References>
    <Media>
      <Tables>
        <NoOfTables>0</NoOfTables>
      </Tables>
      <Figures>
        <NoOfPictures>0</NoOfPictures>
      </Figures>
      <InlineFigures>
        <NoOfPictures>0</NoOfPictures>
      </InlineFigures>
      <Attachments>
        <NoOfAttachments>0</NoOfAttachments>
      </Attachments>
    </Media>
  </OrigData>
</GmsArticle>