<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
<!DOCTYPE GmsArticle SYSTEM "http://www.egms.de/dtd/2.0.34/GmsArticle.dtd">
<GmsArticle xmlns:xlink="http://www.w3.org/1999/xlink">
  <MetaData>
    <Identifier>25gmds077</Identifier>
    <IdentifierDoi>10.3205/25gmds077</IdentifierDoi>
    <IdentifierUrn>urn:nbn:de:0183-25gmds0775</IdentifierUrn>
    <ArticleType>Meeting Abstract</ArticleType>
    <TitleGroup>
      <Title language="en">Enhancing Transparency in Research: Integrating Initial Data Analysis into Statistical Analysis Plans</Title>
    </TitleGroup>
    <CreatorList>
      <Creator>
        <PersonNames>
          <Lastname>Schmidt</Lastname>
          <LastnameHeading>Schmidt</LastnameHeading>
          <Firstname>Carsten Oliver</Firstname>
          <Initials>CO</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Universit&#228;t Greifswald, Greifswald, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Heinze</Lastname>
          <LastnameHeading>Heinze</LastnameHeading>
          <Firstname>Georg</Firstname>
          <Initials>G</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Medical University of Vienna, Wien, Austria</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Lusa</Lastname>
          <LastnameHeading>Lusa</LastnameHeading>
          <Firstname>Lara</Firstname>
          <Initials>L</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Faculty of Medicine, Institute of Biostatistics and Medical Informatics, University of Ljubljana, Ljubljana, Slovenia</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Huebner</Lastname>
          <LastnameHeading>Huebner</LastnameHeading>
          <Firstname>Marianne</Firstname>
          <Initials>M</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Department of Statistics and Probability, Michigan State University, East Lansing, United States</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
    </CreatorList>
    <PublisherList>
      <Publisher>
        <Corporation>
          <Corporatename>German Medical Science GMS Publishing House</Corporatename>
        </Corporation>
        <Address>D&#252;sseldorf</Address>
      </Publisher>
    </PublisherList>
    <SubjectGroup>
      <SubjectheadingDDB>610</SubjectheadingDDB>
      <Keyword language="en">initial data analysis</Keyword>
      <Keyword language="en">statistical analysis plan</Keyword>
      <Keyword language="en">data quality</Keyword>
      <Keyword language="en">reproducible research</Keyword>
    </SubjectGroup>
    <DatePublishedList>
      <DatePublished>20251103</DatePublished>
    </DatePublishedList>
    <Language>engl</Language>
    <License license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
      <AltText language="en">This is an Open Access article distributed under the terms of the Creative Commons Attribution 4.0 License.</AltText>
      <AltText language="de">Dieser Artikel ist ein Open-Access-Artikel und steht unter den Lizenzbedingungen der Creative Commons Attribution 4.0 License (Namensnennung).</AltText>
    </License>
    <SourceGroup>
      <Meeting>
        <MeetingId>M0631</MeetingId>
        <MeetingSequence>077</MeetingSequence>
        <MeetingCorporation>Deutsche Gesellschaft f&#252;r Medizinische Informatik, Biometrie und Epidemiologie</MeetingCorporation>
        <MeetingName>70. Jahrestagung der Deutschen Gesellschaft f&#252;r Medizinische Informatik, Biometrie und Epidemiologie e. V. (GMDS)</MeetingName>
        <MeetingTitle></MeetingTitle>
        <MeetingSession>V: Medical Biometry 1: Methoden f&#252;r die Studienplanung</MeetingSession>
        <MeetingCity>Jena</MeetingCity>
        <MeetingDate>
          <DateFrom>20250907</DateFrom>
          <DateTo>20250911</DateTo>
        </MeetingDate>
      </Meeting>
    </SourceGroup>
    <ArticleNo>Abstr. 359</ArticleNo>
  </MetaData>
  <OrigData>
    <TextBlock name="Text" linked="yes">
      <MainHeadline>Text</MainHeadline><Pgraph><Mark1>Introduction:</Mark1> Statistical Analysis Plans (SAPs) are fundamental to ensuring transparency, reproducibility, and methodological rigour in statistical research. However, the critical phase of Initial Data Analysis (IDA) &#8211; assessing data suitability for subsequent analyses &#8211; remains insufficiently represented in SAPs and often poorly reported in scientific publications <TextLink reference="1"></TextLink>. Given the complexity of IDA, encompassing detailed data quality assessments, evaluation of missing data, exploration of univariate and multivariate properties, and many preprocessing steps <TextLink reference="2"></TextLink>, <TextLink reference="3"></TextLink>, <TextLink reference="4"></TextLink>, there is a high need to formalise its planning and documentation within SAPs.</Pgraph><Pgraph><Mark1>Methods:</Mark1> To address this gap, the TG3 &#8220;Initial Data Analysis&#8221; working group within the STRATOS (STRengthening Analytical Thinking for Observational Studies) initiative developed an extension to the conventional SAP: the Statistical Analysis Plan for Initial Data Analysis (SAPI). A Delphi-based consensus process involving experienced statisticians from all STRATOS topic groups was conducted over several rounds to identify, refine, and integrate key components of IDA into a structured plan aligned with best practices for main data analysis (MDA).</Pgraph><Pgraph><Mark1>Results:</Mark1> The resulting SAPI is organised into eight comprehensive sections: </Pgraph><Pgraph><OrderedList><ListItem level="1" levelPosition="1" numString="1.">Administrative Information, encompassing project documentation, ethical approvals, and team contacts; </ListItem><ListItem level="1" levelPosition="2" numString="2.">Project Background, outlining research aims, and target populations; </ListItem><ListItem level="1" levelPosition="3" numString="3.">Observation Units, detailing data sources, sampling, and dataset descriptions; </ListItem><ListItem level="1" levelPosition="4" numString="4.">Variables, specifying variables for the main and initial data analysis; </ListItem><ListItem level="1" levelPosition="5" numString="5.">Methods for Main Data Analysis (MDA), covering the description of observation units, model specifications, assumptions, sample size requirements, and planned sensitivity analyses; </ListItem><ListItem level="1" levelPosition="6" numString="6.">Methods for Initial Data Analysis (IDA), providing detailed guidance on data preparation, assessment of unit and item missingness, univariable and multivariable data descriptions; </ListItem><ListItem level="1" levelPosition="7" numString="7.">Evaluation and Updates, addressing procedures for revising the SAPI following IDA; and </ListItem><ListItem level="1" levelPosition="8" numString="8.">Supplementary Information, including key references, ensuring sustainable handling of analysis outputs. </ListItem></OrderedList></Pgraph><Pgraph>Within the SAPI structure, the MDA is specified prior to the IDA, as the former defines the scope and nature of required data checks.</Pgraph><Pgraph><Mark1>Discussion:</Mark1> Formalising IDA within a dedicated SAPI enhances transparency, strengthens reproducibility, and promotes a more structured approach to data assessment prior to main analyses. Planning and reporting IDA activities with comparable rigour to MDA ensures that critical preparatory steps are visible, and reproducible. The SAPI is currently undergoing application testing across diverse studies to evaluate needs for improvements.</Pgraph><Pgraph><Mark1>Conclusion:</Mark1> The SAPI framework addresses a significant deficiency in traditional SAP practices by systematically integrating Initial Data Analysis. It explicitly adopts a much broader perspective on the entire data lifecycle than a conventional SAP. Through structured planning and transparent reporting, it contributes to improving the quality, reproducibility, and credibility of empirical research, aligning with wider scientific efforts to foster rigorous and transparent methodology.</Pgraph><Pgraph>The authors declare that they have no competing interests.</Pgraph><Pgraph>The authors declare that an ethics committee vote is not required.</Pgraph></TextBlock>
    <References linked="yes">
      <Reference refNo="1">
        <RefAuthor>Huebner M</RefAuthor>
        <RefAuthor>Vach W</RefAuthor>
        <RefAuthor>le Cessie S</RefAuthor>
        <RefAuthor>Schmidt CO</RefAuthor>
        <RefAuthor>Lusa L</RefAuthor>
        <RefAuthor> Topic Group &#8220;Initial Data Analysis&#8221; of the STRATOS Initiative (STRengthening Analytical Thinking for Observational Studies)</RefAuthor>
        <RefTitle>Hidden analyses: a review of reporting practice and recommendations for more transparent reporting of initial data analyses</RefTitle>
        <RefYear>2020</RefYear>
        <RefJournal>BMC Med Res Methodol</RefJournal>
        <RefPage>61</RefPage>
        <RefTotal>Huebner M, Vach W, le Cessie S, Schmidt CO, Lusa L; Topic Group &#8220;Initial Data Analysis&#8221; of the STRATOS Initiative (STRengthening Analytical Thinking for Observational Studies). Hidden analyses: a review of reporting practice and recommendations for more transparent reporting of initial data analyses. BMC Med Res Methodol. 2020 Mar 13;20(1):61.</RefTotal>
      </Reference>
      <Reference refNo="2">
        <RefAuthor>Heinze G</RefAuthor>
        <RefAuthor>Baillie M</RefAuthor>
        <RefAuthor>Lusa L</RefAuthor>
        <RefAuthor>Sauerbrei W</RefAuthor>
        <RefAuthor>Schmidt CO</RefAuthor>
        <RefAuthor>Harrell FE</RefAuthor>
        <RefAuthor></RefAuthor>
        <RefTitle>Regression without regrets -initial data analysis is a prerequisite for multivariable regression</RefTitle>
        <RefYear>2024</RefYear>
        <RefJournal>BMC Med Res Methodol</RefJournal>
        <RefPage>178</RefPage>
        <RefTotal>Heinze G, Baillie M, Lusa L, Sauerbrei W, Schmidt CO, Harrell FE, et al. Regression without regrets -initial data analysis is a prerequisite for multivariable regression. BMC Med Res Methodol. 2024;24(1):178.</RefTotal>
      </Reference>
      <Reference refNo="3">
        <RefAuthor>Lusa L</RefAuthor>
        <RefAuthor>Proust-Lima C</RefAuthor>
        <RefAuthor>Schmidt CO</RefAuthor>
        <RefAuthor>Lee KJ</RefAuthor>
        <RefAuthor>le Cessie S</RefAuthor>
        <RefAuthor>Baillie M</RefAuthor>
        <RefAuthor></RefAuthor>
        <RefTitle>Initial data analysis for longitudinal studies to build a solid foundation for reproducible analysis</RefTitle>
        <RefYear>2024</RefYear>
        <RefJournal>PLoS ONE</RefJournal>
        <RefPage>e0295726</RefPage>
        <RefTotal>Lusa L, Proust-Lima C, Schmidt CO, Lee KJ, le Cessie S, Baillie M, et al. Initial data analysis for longitudinal studies to build a solid foundation for reproducible analysis. PLoS ONE. 2024;19(5):e0295726.</RefTotal>
      </Reference>
      <Reference refNo="4">
        <RefAuthor>Schmidt CO</RefAuthor>
        <RefAuthor>Struckmann S</RefAuthor>
        <RefAuthor>Enzenbach C</RefAuthor>
        <RefAuthor>Reineke A</RefAuthor>
        <RefAuthor>Stausberg J</RefAuthor>
        <RefAuthor>Damerow S</RefAuthor>
        <RefAuthor></RefAuthor>
        <RefTitle>Facilitating harmonized data quality assessments. A data quality framework for observational health research data collections with software implementations in R</RefTitle>
        <RefYear>2021</RefYear>
        <RefJournal>BMC Med Res Methodol</RefJournal>
        <RefPage>63</RefPage>
        <RefTotal>Schmidt CO, Struckmann S, Enzenbach C, Reineke A, Stausberg J, Damerow S, et al. Facilitating harmonized data quality assessments. A data quality framework for observational health research data collections with software implementations in R. BMC Med Res Methodol. 2021;21(1):63.</RefTotal>
      </Reference>
    </References>
    <Media>
      <Tables>
        <NoOfTables>0</NoOfTables>
      </Tables>
      <Figures>
        <NoOfPictures>0</NoOfPictures>
      </Figures>
      <InlineFigures>
        <NoOfPictures>0</NoOfPictures>
      </InlineFigures>
      <Attachments>
        <NoOfAttachments>0</NoOfAttachments>
      </Attachments>
    </Media>
  </OrigData>
</GmsArticle>