<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
<!DOCTYPE GmsArticle SYSTEM "http://www.egms.de/dtd/2.0.34/GmsArticle.dtd">
<GmsArticle xmlns:xlink="http://www.w3.org/1999/xlink">
  <MetaData>
    <Identifier>25gmds156</Identifier>
    <IdentifierDoi>10.3205/25gmds156</IdentifierDoi>
    <IdentifierUrn>urn:nbn:de:0183-25gmds1562</IdentifierUrn>
    <ArticleType>Meeting Abstract</ArticleType>
    <TitleGroup>
      <Title language="en">Leveraging ChatGPT for systematic reviews &#8211; a feasibility study and framework proposal</Title>
    </TitleGroup>
    <CreatorList>
      <Creator>
        <PersonNames>
          <Lastname>Appel</Lastname>
          <LastnameHeading>Appel</LastnameHeading>
          <Firstname>Katharina</Firstname>
          <Initials>K</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Goethe-Universit&#228;t Frankfurt, Institut f&#252;r Digitale Medizin und Klinische Datenwissenschaften, Frankfurt am Main, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Schnorr</Lastname>
          <LastnameHeading>Schnorr</LastnameHeading>
          <Firstname>Isabel</Firstname>
          <Initials>I</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Goethe-Universit&#228;t Frankfurt, Institut f&#252;r Digitale Medizin und Klinische Datenwissenschaften, Frankfurt am Main, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Vehreschild</Lastname>
          <LastnameHeading>Vehreschild</LastnameHeading>
          <Firstname>J&#246;rg Janne</Firstname>
          <Initials>JJ</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Goethe-Universit&#228;t Frankfurt, Institut f&#252;r Digitale Medizin und Klinische Datenwissenschaften, Frankfurt am Main, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Maier</Lastname>
          <LastnameHeading>Maier</LastnameHeading>
          <Firstname>Daniel</Firstname>
          <Initials>D</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Goethe-Universit&#228;t Frankfurt, Institut f&#252;r Digitale Medizin und Klinische Datenwissenschaften, Frankfurt am Main, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
    </CreatorList>
    <PublisherList>
      <Publisher>
        <Corporation>
          <Corporatename>German Medical Science GMS Publishing House</Corporatename>
        </Corporation>
        <Address>D&#252;sseldorf</Address>
      </Publisher>
    </PublisherList>
    <SubjectGroup>
      <SubjectheadingDDB>610</SubjectheadingDDB>
      <Keyword language="en">systematic review</Keyword>
      <Keyword language="en">large language model (LLM)</Keyword>
      <Keyword language="en">artificial intelligence (AI)</Keyword>
    </SubjectGroup>
    <DatePublishedList>
      <DatePublished>20251103</DatePublished>
    </DatePublishedList>
    <Language>engl</Language>
    <License license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
      <AltText language="en">This is an Open Access article distributed under the terms of the Creative Commons Attribution 4.0 License.</AltText>
      <AltText language="de">Dieser Artikel ist ein Open-Access-Artikel und steht unter den Lizenzbedingungen der Creative Commons Attribution 4.0 License (Namensnennung).</AltText>
    </License>
    <SourceGroup>
      <Meeting>
        <MeetingId>M0631</MeetingId>
        <MeetingSequence>156</MeetingSequence>
        <MeetingCorporation>Deutsche Gesellschaft f&#252;r Medizinische Informatik, Biometrie und Epidemiologie</MeetingCorporation>
        <MeetingName>70. Jahrestagung der Deutschen Gesellschaft f&#252;r Medizinische Informatik, Biometrie und Epidemiologie e. V. (GMDS)</MeetingName>
        <MeetingTitle></MeetingTitle>
        <MeetingSession>PS 8: Medizinische Biometrie</MeetingSession>
        <MeetingCity>Jena</MeetingCity>
        <MeetingDate>
          <DateFrom>20250907</DateFrom>
          <DateTo>20250911</DateTo>
        </MeetingDate>
      </Meeting>
    </SourceGroup>
    <ArticleNo>Abstr. 336</ArticleNo>
  </MetaData>
  <OrigData>
    <TextBlock name="Text" linked="yes">
      <MainHeadline>Text</MainHeadline><Pgraph><Mark1>Introduction:</Mark1> Systematic reviews are an important instrument to synthesize existing knowledge for evidence-based medicine <TextLink reference="1"></TextLink>. While a manually conducted systematic review is a highly resource-intensive and error-prone project, the implementation of Large Language Models (LLMs) could support critical steps such as the screening of titles and abstracts and full-text data extraction. While the performance of LLMs for systematic reviews has been demonstrated in recent publications <TextLink reference="1"></TextLink>, <TextLink reference="2"></TextLink>, <TextLink reference="3"></TextLink>, a user-friendly, accessible framework is still missing. This study proposes and evaluates such an LLM-supported systematic review-framework.</Pgraph><Pgraph><Mark1>Methods:</Mark1> Based on a published systematic review about clinical prognostic scores for COVID-19 <TextLink reference="4"></TextLink>, we developed and validated an LLM-supported framework aligned with PRISMA guidelines. Based on an evaluation of performance and cost-effectiveness, we opted for implementing OpenAI&#8217;s Application Programming Interface (API) and the o3 model <TextLink reference="5"></TextLink>. In two subsequent steps, we called the API to (i) screen each article&#8217;s title and abstract as well as (ii) it&#8217;s full text (provided as an Extensible Markup Language &#91;XML&#93; file). Therefore, we engineered a dynamic prompt with detailed context information regarding the LLM&#8217;s persona, the instruction&#8217;s target and its topic, the specific task, the inclusion and exclusion criteria, and the output format we wished to receive. More specifically, the LLM was requested to check each inclusion and exclusion criterion and to create a brief explanation. Articles meeting the criteria were included and excluded otherwise. Performance metrics (accuracy, Cohen&#8217;s &#954;, sensitivity, specificity, false-positive rate &#91;FPR&#93;, false-negative rate (FNR)&#93; were compared to the results of the original human review that served as ground truth.</Pgraph><Pgraph><Mark1>Results:</Mark1> We included 1383 scientific articles. Preliminary results for a subset of these articles demonstrate the LLM&#39;s performance in title and abstract screening with an accuracy of 91.6&#37;, a low FNR (6.8&#37;) and a Cohen&#8217;s &#954; of 0.75 (95&#37; CI 0.69-0.80). Moderate overall accuracy was observed during the subsequent full text screening phase (accuracy 77.9&#37;, FNR 10.9&#37;, Cohen&#8217;s &#954; of 0.49 &#91;95&#37; CI 0.37-0.62&#93;).</Pgraph><Pgraph><Mark1>Conclusion:</Mark1> The findings suggest the LLM-based approach could substantially accelerate screening efforts and reduce manual workload. However, the performance decline revealed for the fulltext screening requires further investigation and demonstrates the contextual complexity of the original human review&#8217;s study aim. We propose that LLMs such as o3 are best utilized to assist researchers in the systematic review process, but not to perform a fully automated systematic review.</Pgraph><Pgraph>Daniel Maier received speaker honoraria from Free University Berlin and travel compensation from IQVIA. J&#246;rg Janne Vehreschild received payments or honoraria from Merck &#47; MSD, Gilead, Pfizer, Astellas Pharma, Basilea, German Centre for Infection Research (DZIF), University Hospital Freiburg&#47; Congress and Communication, Academy for Infectious Medicine, University Manchester, German Society for Infectious Diseases (DGI), &#196;rztekammer Nordrhein, &#196;rztekammer Hessen, University Hospital Aachen, Back Bay Strategies, German Society for Internal Medicine (DGIM), Shionogi, Molecular Health, Netzwerk Universit&#228;tsmedizin, Janssen, NordForsk, Biontech, APOGEPHA, German Cancer Consortium (DKTK), University Hospital Oldenburg. J&#246;rg Janne Vehreschild has grants from Merck &#47; MSD, Gilead, Pfizer, Astellas Pharma, Basilea, German Centre for Infection Research (DZIF), German Federal Ministry of Education and Research (BMBF), Deutsches Zetrum f&#252;r Luft- und Raumfahrt (DLR), University of Bristol, Rigshospitalet Copenhagen, German Network University Medicine, German Cancer, Consortium (DKTK), German Federal Ministry of Health (BMG), European Union. J&#246;rg Janne Vehreschild received support for attending meetings and&#47;or travel from German Centre for Infection Research (DZIF), University Manchester, German Society for Infectious Diseases (DGI), University Hospital Aachen, German Society for Internal Medicine (DGIM), Netzwerk Universit&#228;tsmedizin, German Cancer Consortium (DKTK). J&#246;rg Janne Vehreschild participated on Data Safety Monitoring Boards or Advisory Boards of Merck &#47; MSD, Gilead, Pfizer, Astellas Pharma, Basilea, German Centre for Infection Research (DZIF), Academy for Infectious Medicine, University Manchester, German Society for Infectious Diseases (DGI), German Society for Internal Medicine (DGIM), Netzwerk Universit&#228;tsmedizin, Janssen, Biontech. Zaira R. All other authors report no conflicts of interests.</Pgraph><Pgraph>The authors declare that an ethics committee vote is not required.</Pgraph></TextBlock>
    <References linked="yes">
      <Reference refNo="1">
        <RefAuthor>Affengruber L</RefAuthor>
        <RefAuthor>van der Maten MM</RefAuthor>
        <RefAuthor>Spiero I</RefAuthor>
        <RefAuthor>Nussbaumer-Streit B</RefAuthor>
        <RefAuthor>Mahmic-Kaknjo M</RefAuthor>
        <RefAuthor>Ellen ME</RefAuthor>
        <RefAuthor></RefAuthor>
        <RefTitle>An exploration of available methods and tools to improve the efficiency of systematic review production: a scoping review</RefTitle>
        <RefYear>2024</RefYear>
        <RefJournal>BMC Med Res Methodol</RefJournal>
        <RefPage>210</RefPage>
        <RefTotal>Affengruber L, van der Maten MM, Spiero I, Nussbaumer-Streit B, Mahmic-Kaknjo M, Ellen ME, et al. An exploration of available methods and tools to improve the efficiency of systematic review production: a scoping review. BMC Med Res Methodol. 2024;24:210. DOI: 10.1186&#47;s12874-024-02320-4</RefTotal>
        <RefLink>http:&#47;&#47;dx.doi.org&#47;10.1186&#47;s12874-024-02320-4</RefLink>
      </Reference>
      <Reference refNo="2">
        <RefAuthor>Hanegraaf P</RefAuthor>
        <RefAuthor>Wondimu A</RefAuthor>
        <RefAuthor>Mosselman JJ</RefAuthor>
        <RefAuthor>de Jong R</RefAuthor>
        <RefAuthor>Abogunrin S</RefAuthor>
        <RefAuthor>Querios L</RefAuthor>
        <RefAuthor></RefAuthor>
        <RefTitle>Inter-reviewer reliability of human literature reviewing and implications for the introduction of machine-assisted systematic reviews: a mixed-methods review</RefTitle>
        <RefYear>2024</RefYear>
        <RefJournal>BMJ Open</RefJournal>
        <RefPage>e076912</RefPage>
        <RefTotal>Hanegraaf P, Wondimu A, Mosselman JJ, de Jong R, Abogunrin S, Querios L, et al. Inter-reviewer reliability of human literature reviewing and implications for the introduction of machine-assisted systematic reviews: a mixed-methods review. BMJ Open.2024; 14(3):e076912. DOI: 10.1136&#47;bmjopen-2023-076912</RefTotal>
        <RefLink>http:&#47;&#47;dx.doi.org&#47;10.1136&#47;bmjopen-2023-076912</RefLink>
      </Reference>
      <Reference refNo="3">
        <RefAuthor>Delgado-Chaves FM</RefAuthor>
        <RefAuthor>Jennings MJ</RefAuthor>
        <RefAuthor>Atalaia A</RefAuthor>
        <RefAuthor>Wolff J</RefAuthor>
        <RefAuthor>Horvath R</RefAuthor>
        <RefAuthor>Mamdouh Z M</RefAuthor>
        <RefAuthor>Baumbach J</RefAuthor>
        <RefAuthor>Baumbach L</RefAuthor>
        <RefTitle>Transforming literature screening: The emerging role of large language models in systematic reviews</RefTitle>
        <RefYear>2025</RefYear>
        <RefJournal>PNAS</RefJournal>
        <RefPage>e2411962122</RefPage>
        <RefTotal>Delgado-Chaves FM, Jennings MJ, Atalaia A, Wolff J, Horvath R, Mamdouh Z M, Baumbach J, Baumbach L. Transforming literature screening: The emerging role of large language models in systematic reviews. PNAS. 2025;122(2):e2411962122. DOI: 10.1073&#47;pnas.2411962122</RefTotal>
        <RefLink>http:&#47;&#47;dx.doi.org&#47;10.1073&#47;pnas.2411962122</RefLink>
      </Reference>
      <Reference refNo="4">
        <RefAuthor>Appel KS</RefAuthor>
        <RefAuthor>Geisler R</RefAuthor>
        <RefAuthor>Maier D</RefAuthor>
        <RefAuthor>Miljukov O</RefAuthor>
        <RefAuthor>Hopff SM</RefAuthor>
        <RefAuthor>Vehreschild JJ</RefAuthor>
        <RefTitle>A Systematic review of Predictor Composition, Outcomes, Risk of Bias, and Validation of COVID-19 Prognostic Scores</RefTitle>
        <RefYear>2024</RefYear>
        <RefJournal>Clin Infect Dis</RefJournal>
        <RefPage>889-899</RefPage>
        <RefTotal>Appel KS, Geisler R, Maier D, Miljukov O, Hopff SM, Vehreschild JJ. A Systematic review of Predictor Composition, Outcomes, Risk of Bias, and Validation of COVID-19 Prognostic Scores. Clin Infect Dis. 2024 Apr 15;78(4):889-899. DOI: 10.1093&#47;cid&#47;ciad618</RefTotal>
        <RefLink>http:&#47;&#47;dx.doi.org&#47;10.1093&#47;cid&#47;ciad618</RefLink>
      </Reference>
      <Reference refNo="5">
        <RefAuthor>Open AI</RefAuthor>
        <RefTitle></RefTitle>
        <RefYear>2025</RefYear>
        <RefBookTitle>o3 Our most powerful reasoning model</RefBookTitle>
        <RefPage></RefPage>
        <RefTotal>Open AI. o3 Our most powerful reasoning model. San Francisco, California, U.S.: Open AI; 2025 &#91;cited 27 June 2025&#93;. Available from: https:&#47;&#47;platform.openai.com&#47;docs&#47;models&#47;o3</RefTotal>
        <RefLink>https:&#47;&#47;platform.openai.com&#47;docs&#47;models&#47;o3</RefLink>
      </Reference>
    </References>
    <Media>
      <Tables>
        <NoOfTables>0</NoOfTables>
      </Tables>
      <Figures>
        <NoOfPictures>0</NoOfPictures>
      </Figures>
      <InlineFigures>
        <NoOfPictures>0</NoOfPictures>
      </InlineFigures>
      <Attachments>
        <NoOfAttachments>0</NoOfAttachments>
      </Attachments>
    </Media>
  </OrigData>
</GmsArticle>