<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
<!DOCTYPE GmsArticle SYSTEM "http://www.egms.de/dtd/2.0.34/GmsArticle.dtd">
<GmsArticle xmlns:xlink="http://www.w3.org/1999/xlink">
  <MetaData>
    <Identifier>25gmds180</Identifier>
    <IdentifierDoi>10.3205/25gmds180</IdentifierDoi>
    <IdentifierUrn>urn:nbn:de:0183-25gmds1802</IdentifierUrn>
    <ArticleType>Meeting Abstract</ArticleType>
    <TitleGroup>
      <Title language="en">Missing value imputation for single methylomes</Title>
    </TitleGroup>
    <CreatorList>
      <Creator>
        <PersonNames>
          <Lastname>Kemda Ngueda</Lastname>
          <LastnameHeading>Kemda Ngueda</LastnameHeading>
          <Firstname>Christelle</Firstname>
          <Initials>C</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Institute of Medical Statistics, Computer and Data Sciences, Jena University Hospital, Jena, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Palm</Lastname>
          <LastnameHeading>Palm</LastnameHeading>
          <Firstname>Julia</Firstname>
          <Initials>J</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Institute of Medical Statistics, Computer and Data Sciences, Jena University Hospital, Jena, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Remo</Lastname>
          <LastnameHeading>Remo</LastnameHeading>
          <Firstname>Flavia</Firstname>
          <Initials>F</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Institut f&#252;r Medizinische Statistik, Informatik und Datenwissenschaften (IMSID), Universit&#228;tsklinikum Jena, Jena, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Scherag</Lastname>
          <LastnameHeading>Scherag</LastnameHeading>
          <Firstname>Andr&#233;</Firstname>
          <Initials>A</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Institut f&#252;r Medizinische Statistik, Informatik und Datenwissenschaften, Universit&#228;tsklinikum Jena, Jena, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Leistritz</Lastname>
          <LastnameHeading>Leistritz</LastnameHeading>
          <Firstname>Lutz</Firstname>
          <Initials>L</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Institute of Medical Statistics, Computer and Data Sciences, Jena University Hospital, Jena, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
    </CreatorList>
    <PublisherList>
      <Publisher>
        <Corporation>
          <Corporatename>German Medical Science GMS Publishing House</Corporatename>
        </Corporation>
        <Address>D&#252;sseldorf</Address>
      </Publisher>
    </PublisherList>
    <SubjectGroup>
      <SubjectheadingDDB>610</SubjectheadingDDB>
      <Keyword language="en">imputation</Keyword>
      <Keyword language="en">missing values</Keyword>
      <Keyword language="en">DNA-methylation</Keyword>
    </SubjectGroup>
    <DatePublishedList>
      <DatePublished>20251103</DatePublished>
    </DatePublishedList>
    <Language>engl</Language>
    <License license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
      <AltText language="en">This is an Open Access article distributed under the terms of the Creative Commons Attribution 4.0 License.</AltText>
      <AltText language="de">Dieser Artikel ist ein Open-Access-Artikel und steht unter den Lizenzbedingungen der Creative Commons Attribution 4.0 License (Namensnennung).</AltText>
    </License>
    <SourceGroup>
      <Meeting>
        <MeetingId>M0631</MeetingId>
        <MeetingSequence>180</MeetingSequence>
        <MeetingCorporation>Deutsche Gesellschaft f&#252;r Medizinische Informatik, Biometrie und Epidemiologie</MeetingCorporation>
        <MeetingName>70. Jahrestagung der Deutschen Gesellschaft f&#252;r Medizinische Informatik, Biometrie und Epidemiologie e. V. (GMDS)</MeetingName>
        <MeetingTitle></MeetingTitle>
        <MeetingSession>PS 12: Machine learning and AI applications</MeetingSession>
        <MeetingCity>Jena</MeetingCity>
        <MeetingDate>
          <DateFrom>20250907</DateFrom>
          <DateTo>20250911</DateTo>
        </MeetingDate>
      </Meeting>
    </SourceGroup>
    <ArticleNo>Abstr. 107</ArticleNo>
  </MetaData>
  <OrigData>
    <TextBlock name="Text" linked="yes">
      <MainHeadline>Text</MainHeadline><Pgraph><Mark1>Introduction:</Mark1> Personalized medicine puts the patient&#39;s uniqueness at different omic layers at its center in order to select tailored disease preventions, diagnoses or treatments <TextLink reference="1"></TextLink>. DNA methylation is a key epigenetic omic layer, particularly valued because of its reversible nature <TextLink reference="2"></TextLink>. However, in practice, methylation datasets usually contain a considerable proportion of missing values <TextLink reference="3"></TextLink>. Missing values mainly arise during data collection and represent a ubiquitous problem for downstream data analysis. To impute missing DNA methylation values, several approaches have been proposed from both statistics computer sciences. They all have in common that they are in principle applicable to both DNA methylation microarray and sequencing data, but that they require information from at least two samples.</Pgraph><Pgraph>We propose a time and cost-effective imputation method for replacing missing DNA-methylation values in a single patient methylome, i.e. a method that relies on the personalized medicine idea.</Pgraph><Pgraph><Mark1>Methods:</Mark1> Based on the observation that CpGs closer to each other are more likely to be methylated in a similar way, the method replaces a missing value by an available value of its nearest neighbouring CpG. Thereby, the distance between two CpGs refers to the smallest linear distance along the DNA sequence, measured in base pairs, between two CpG sites on the same chromosome and strand. We compared the new methods with two exemplary methods (impute.knn, methyLImp) using simulations.</Pgraph><Pgraph><Mark1>Results:</Mark1> The proposed method applied to a single methylome yielded an average root mean square error (RMSE) RMSE &#61; 0.27 in &#946;-value units (95&#37;-CI: &#91;0.26, 0.28&#93;) based on publically available 450K BeadChip data set of 3,402 individuals (<Hyperlink href="https:&#47;&#47;download.cncb.ac.cn&#47;ewas&#47;datahub&#47;download&#47;blood&#95;methylation&#95;v1.zip">https:&#47;&#47;download.cncb.ac.cn&#47;ewas&#47;datahub&#47;download&#47;blood&#95;methylation&#95;v1.zip</Hyperlink>) with &#946;-value ranging between 0 and 1. It is possible to consider the affiliation of CpGs to CpG islands when imputing missing methylation values. This improves the imputation accuracy. In addition, the imputation accuracy depends on the density of CpG sites on DNA-methylation microarrays and is higher the denser CpG sites are.</Pgraph><Pgraph><Mark1>Conclusions:</Mark1> The proposed method efficiently imputes missing values from a single methylome with minimal computational cost and memory requirements, making it a valuable addition to the imputation toolbox for single-subject applications. Its imputation accuracy is inferior to approaches exploiting multiple methylation samples. Here, an important aspect is the low density of the current chips compare to the richness of the whole methylome. Looking forward, improved accuracy can be expected the denser the chips or as we move to whole-methylome sequencing.</Pgraph><Pgraph>The authors declare that they have no competing interests.</Pgraph><Pgraph>The authors declare that an ethics committee vote is not required.</Pgraph></TextBlock>
    <References linked="yes">
      <Reference refNo="1">
        <RefAuthor>Rasool M</RefAuthor>
        <RefAuthor>Malik A</RefAuthor>
        <RefAuthor>Naseer MI</RefAuthor>
        <RefAuthor>Manan A</RefAuthor>
        <RefAuthor>Ansari S</RefAuthor>
        <RefAuthor>Begum I</RefAuthor>
        <RefAuthor></RefAuthor>
        <RefTitle>The role of epigenetics in personalized medicine: challenges and opportunities</RefTitle>
        <RefYear>2015</RefYear>
        <RefJournal>BMC Med Genomics</RefJournal>
        <RefPage>S5</RefPage>
        <RefTotal>Rasool M, Malik A, Naseer MI, Manan A, Ansari S, Begum I, et al. The role of epigenetics in personalized medicine: challenges and opportunities. BMC Med Genomics. 2015;8 Suppl 1(Suppl 1):S5.</RefTotal>
      </Reference>
      <Reference refNo="2">
        <RefAuthor>Gupta MK</RefAuthor>
        <RefAuthor>Peng H</RefAuthor>
        <RefAuthor>Li Y</RefAuthor>
        <RefAuthor>Xu CJ</RefAuthor>
        <RefTitle>The role of DNA methylation in personalized medicine for immune-related diseases</RefTitle>
        <RefYear>2023</RefYear>
        <RefJournal>Pharmacol Ther</RefJournal>
        <RefPage>108508</RefPage>
        <RefTotal>Gupta MK, Peng H, Li Y, Xu CJ. The role of DNA methylation in personalized medicine for immune-related diseases. Pharmacol Ther. 2023;250:108508.</RefTotal>
      </Reference>
      <Reference refNo="3">
        <RefAuthor>Di Lena P</RefAuthor>
        <RefAuthor>Sala C</RefAuthor>
        <RefAuthor>Prodi A</RefAuthor>
        <RefAuthor>Nardini C</RefAuthor>
        <RefTitle>Missing value estimation methods for DNA methylation data</RefTitle>
        <RefYear>2019</RefYear>
        <RefJournal>Bioinformatics</RefJournal>
        <RefPage>3786-93</RefPage>
        <RefTotal>Di Lena P, Sala C, Prodi A, Nardini C. Missing value estimation methods for DNA methylation data. Bioinformatics. 2019;35(19):3786-93.</RefTotal>
      </Reference>
    </References>
    <Media>
      <Tables>
        <NoOfTables>0</NoOfTables>
      </Tables>
      <Figures>
        <NoOfPictures>0</NoOfPictures>
      </Figures>
      <InlineFigures>
        <NoOfPictures>0</NoOfPictures>
      </InlineFigures>
      <Attachments>
        <NoOfAttachments>0</NoOfAttachments>
      </Attachments>
    </Media>
  </OrigData>
</GmsArticle>