<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
<!DOCTYPE GmsArticle SYSTEM "http://www.egms.de/dtd/2.0.34/GmsArticle.dtd">
<GmsArticle xmlns:xlink="http://www.w3.org/1999/xlink">
  <MetaData>
    <Identifier>25gmds108</Identifier>
    <IdentifierDoi>10.3205/25gmds108</IdentifierDoi>
    <IdentifierUrn>urn:nbn:de:0183-25gmds1086</IdentifierUrn>
    <ArticleType>Meeting Abstract</ArticleType>
    <TitleGroup>
      <Title language="en">Introducing Medical Semantic Annotation Guidelines for German Clinical Documentation with SNOMED CT</Title>
    </TitleGroup>
    <CreatorList>
      <Creator>
        <PersonNames>
          <Lastname>Hofenbitzer</Lastname>
          <LastnameHeading>Hofenbitzer</LastnameHeading>
          <Firstname>Justin</Firstname>
          <Initials>J</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Technical University of Munich, TUM School of Medicine and Health, Institute for AI and Informatics in Medicine (AIIM), TUM University Hospital, Munich, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Schulz</Lastname>
          <LastnameHeading>Schulz</LastnameHeading>
          <Firstname>Stefan</Firstname>
          <Initials>S</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Medizinische Universit&#228;t Graz, Graz, Austria</Affiliation>
          <Affiliation>Averbis GmbH, Freiburg, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Boeker</Lastname>
          <LastnameHeading>Boeker</LastnameHeading>
          <Firstname>Martin</Firstname>
          <Initials>M</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Technical University of Munich, TUM School of Medicine and Health, Institute for AI and Informatics in Medicine (AIIM), TUM University Hospital, Munich, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Kl&#252;gl</Lastname>
          <LastnameHeading>Kl&#252;gl</LastnameHeading>
          <Firstname>Peter</Firstname>
          <Initials>P</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Averbis GmbH, Freiburg, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Riepenhausen</Lastname>
          <LastnameHeading>Riepenhausen</LastnameHeading>
          <Firstname>Sarah</Firstname>
          <Initials>S</Initials>
        </PersonNames>
        <Address>
          <Affiliation>University of M&#252;nster, Institute of Medical Informatics, M&#252;nster, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Lohr</Lastname>
          <LastnameHeading>Lohr</LastnameHeading>
          <Firstname>Christina</Firstname>
          <Initials>C</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Institute for Medical Informatics, Statistics, and Epidemiology, Leipzig University, Leipzig, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Lammert</Lastname>
          <LastnameHeading>Lammert</LastnameHeading>
          <Firstname>Jacqueline</Firstname>
          <Initials>J</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Technical University of Munich, TUM School of Medicine and Health, Institute for AI and Informatics in Medicine (AIIM), TUM University Hospital, Munich, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Riedel</Lastname>
          <LastnameHeading>Riedel</LastnameHeading>
          <Firstname>Andrea</Firstname>
          <Initials>A</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Erlangen University Hospital, Medical Center for Information and Communication Technology, Erlangen, Germany</Affiliation>
          <Affiliation>Friedrich-Alexander-Universit&#228;t Erlangen-N&#252;rnberg, Medical Informatics, Erlangen, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Modersohn</Lastname>
          <LastnameHeading>Modersohn</LastnameHeading>
          <Firstname>Luise</Firstname>
          <Initials>L</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Technical University of Munich, TUM School of Medicine and Health, Institute for AI and Informatics in Medicine (AIIM), TUM University Hospital, Munich, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
    </CreatorList>
    <PublisherList>
      <Publisher>
        <Corporation>
          <Corporatename>German Medical Science GMS Publishing House</Corporatename>
        </Corporation>
        <Address>D&#252;sseldorf</Address>
      </Publisher>
    </PublisherList>
    <SubjectGroup>
      <SubjectheadingDDB>610</SubjectheadingDDB>
      <Keyword language="de">semantics</Keyword>
      <Keyword language="de">natural language processing</Keyword>
      <Keyword language="de">linguistics</Keyword>
      <Keyword language="de">systematized nomenclature of medicine</Keyword>
      <Keyword language="de">corpus annotation</Keyword>
      <Keyword language="de">German clinical text</Keyword>
    </SubjectGroup>
    <DatePublishedList>
      <DatePublished>20251103</DatePublished>
    </DatePublishedList>
    <Language>engl</Language>
    <License license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
      <AltText language="en">This is an Open Access article distributed under the terms of the Creative Commons Attribution 4.0 License.</AltText>
      <AltText language="de">Dieser Artikel ist ein Open-Access-Artikel und steht unter den Lizenzbedingungen der Creative Commons Attribution 4.0 License (Namensnennung).</AltText>
    </License>
    <SourceGroup>
      <Meeting>
        <MeetingId>M0631</MeetingId>
        <MeetingSequence>108</MeetingSequence>
        <MeetingCorporation>Deutsche Gesellschaft f&#252;r Medizinische Informatik, Biometrie und Epidemiologie</MeetingCorporation>
        <MeetingName>70. Jahrestagung der Deutschen Gesellschaft f&#252;r Medizinische Informatik, Biometrie und Epidemiologie e. V. (GMDS)</MeetingName>
        <MeetingTitle></MeetingTitle>
        <MeetingSession>PS 2: Interoperability and standards</MeetingSession>
        <MeetingCity>Jena</MeetingCity>
        <MeetingDate>
          <DateFrom>20250907</DateFrom>
          <DateTo>20250911</DateTo>
        </MeetingDate>
      </Meeting>
    </SourceGroup>
    <ArticleNo>Abstr. 282</ArticleNo>
  </MetaData>
  <OrigData>
    <TextBlock name="Text" linked="yes">
      <MainHeadline>Text</MainHeadline><Pgraph><Mark1>Introduction:</Mark1> The systematic annotation of clinical free-text documents with standardized terminologies, such as SNOMED CT (SCT), is essential for developing interoperable and semantically enriched healthcare resources. Domain-specific natural language corpora with meaningful and expressive annotations are fundamental for computational linguistics and natural language processing (NLP), enabling tasks like training, fine-tuning, and evaluating large language models. However, there is a shortage of clinical corpora <TextLink reference="1"></TextLink>, and only a few contain semantic annotations <TextLink reference="2"></TextLink>. Prominent counterexamples are projects like AIDAVA (<Hyperlink href="https:&#47;&#47;www.aidava.eu">https:&#47;&#47;www.aidava.eu</Hyperlink>) or JIGSAW (<Hyperlink href="https:&#47;&#47;research.manchester.ac.uk&#47;en&#47;projects&#47;assembling-the-data-jigsaw-powering-robust-research-on-the-causes">https:&#47;&#47;research.manchester.ac.uk&#47;en&#47;projects&#47;assembling-the-data-jigsaw-powering-robust-research-on-the-causes</Hyperlink>), which paved the way towards standardized semantic text annotations using existing clinical ontologies <TextLink reference="3"></TextLink>. The German Medical Text Corpus (GeMTeX) project aims to provide the largest shareable German clinical document collection with conceptual annotations from SCT <TextLink reference="4"></TextLink>, <TextLink reference="5"></TextLink>, and hereby introduces its detailed and methodologically grounded annotation guidelines.</Pgraph><Pgraph><Mark1>Methods:</Mark1> The guidelines are informed by experiences and annotation principles from comparable consortia <TextLink reference="3"></TextLink>. These experiences emphasize focusing on the semantic core of the texts, specifically by utilizing unary predicates, the SCT concepts, and binary relations between them. The annotations should be as literal and straightforward as possible to prevent the annotators from over-interpreting the text spans. </Pgraph><Pgraph><Mark1>Results:</Mark1> The GeMTeX semantic annotation guidelines define three major concept classes: Core Concepts, Modifier Concepts, and Qualifier Concepts. The Core Concepts are the most relevant concepts for our semantic annotation, as they include Clinical Conditions (e.g., the SCT hierarchies <Mark2>Clinical Finding</Mark2> and <Mark2>Disorder</Mark2>), Procedures, Medications, Substances, and Observables. The Modifier and Qualifier Concepts specify specific annotations if a Core Concept does not sufficiently express the corresponding text span. The Modifier Concepts covered by our guidelines comprise the SCT hierarchies <Mark2>Body Structure</Mark2>, <Mark2>Physical Object</Mark2>, and <Mark2>Organism</Mark2>. The Qualifier Concept is identical to the SCT hierarchy qualifier value and contains, among others, dates, units, or factuality statements. Familial diseases are featured as well.</Pgraph><Pgraph>To build traceable knowledge representations, we introduce relations between annotated concepts. The unlabeled and unidirectional relations follow intuitive and concise rules. For example, relations always point from a cause to its corresponding effect or go from the lower to the upper bound within a value range. </Pgraph><Pgraph>In addition, our guidelines are specifically tailored to the German language. Besides universal linguistic challenges like ambiguity, syntactic coordination, or copula constructions, we provide concise and comprehensible instructions for typical German peculiarities, e.g., separable particle verbs or unique terminology.</Pgraph><Pgraph><Mark1>Conclusion:</Mark1> The GeMTeX semantic annotation guidelines are informed by large semantic annotation projects for free-text documents in the clinical domain. They specifically address the usage of SCT and underline linguistic and terminological challenges inherent to the German clinical language. Explicit annotation rules and decision-making criteria accompanied by illustrative examples are integral to enhancing consistency between annotators. This initial version of the annotation guidelines is designed to be a methodological resource for researchers in clinical NLP or terminology annotation, supporting reproducibility and transparency. We aim to contribute to further standardization efforts. The GeMTeX annotation guidelines are available under <Hyperlink href="https:&#47;&#47;doi.org&#47;10.5281&#47;zenodo.15689931">https:&#47;&#47;doi.org&#47;10.5281&#47;zenodo.15689931</Hyperlink>.</Pgraph><Pgraph>The authors declare that they have no competing interests.</Pgraph><Pgraph>The authors declare that a positive ethics committee vote has been obtained.</Pgraph></TextBlock>
    <References linked="yes">
      <Reference refNo="1">
        <RefAuthor>Hahn U</RefAuthor>
        <RefTitle>Clinical Document Corpora -- Real Ones, Translated and Synthetic Substitutes, and Assorted Domain Proxies: A Survey of Diversity in Corpus Design, with Focus on German Text Data</RefTitle>
        <RefYear>2025</RefYear>
        <RefJournal>Jamia Open</RefJournal>
        <RefPage>ooaf024</RefPage>
        <RefTotal>Hahn U. Clinical Document Corpora -- Real Ones, Translated and Synthetic Substitutes, and Assorted Domain Proxies: A Survey of Diversity in Corpus Design, with Focus on German Text Data. Jamia Open. 2025;8(3):ooaf024. DOI: 10.1093&#47;jamiaopen&#47;ooaf024</RefTotal>
        <RefLink>http:&#47;&#47;dx.doi.org&#47;10.1093&#47;jamiaopen&#47;ooaf024</RefLink>
      </Reference>
      <Reference refNo="2">
        <RefAuthor>Jovanovi&#263; J</RefAuthor>
        <RefAuthor>Bagheri E</RefAuthor>
        <RefTitle>Semantic Annotation in Biomedicine: The Current Landscape</RefTitle>
        <RefYear>2017</RefYear>
        <RefJournal>J Biomed Semant</RefJournal>
        <RefPage>44</RefPage>
        <RefTotal>Jovanovi&#263; J, Bagheri E. Semantic Annotation in Biomedicine: The Current Landscape. J Biomed Semant. 2017 Sep;8(1):44.</RefTotal>
      </Reference>
      <Reference refNo="3">
        <RefAuthor>Schulz S</RefAuthor>
        <RefAuthor>Del-Pinto W</RefAuthor>
        <RefAuthor>Han L</RefAuthor>
        <RefAuthor>Kreuzthaler M</RefAuthor>
        <RefAuthor>Aghaei S</RefAuthor>
        <RefAuthor>Nenadic G</RefAuthor>
        <RefTitle>Towards Principles of Ontology-Based Annotation of Clinical Narratives 4.0 International (CC BY 4.0)</RefTitle>
        <RefYear></RefYear>
        <RefBookTitle>Proceedings of the International Conference on Biomedical Ontologies 2023; 2023 Aug 28 - Sep 1; Brasilia, Brazil</RefBookTitle>
        <RefPage></RefPage>
        <RefTotal>Schulz S, Del-Pinto W, Han L, Kreuzthaler M, Aghaei S, Nenadic G. Towards Principles of Ontology-Based Annotation of Clinical Narratives 40 International (CC BY 40). In: Proceedings of the International Conference on Biomedical Ontologies 2023; 2023 Aug 28 - Sep 1; Brasilia, Brazil. (CEUR Workshop Proceedings; 3603). &#91;cited 2025 Apr 25&#93;. Available from: https:&#47;&#47;ceur-ws.org&#47;Vol-3603&#47;Paper4.pdf</RefTotal>
        <RefLink>https:&#47;&#47;ceur-ws.org&#47;Vol-3603&#47;Paper4.pdf</RefLink>
      </Reference>
      <Reference refNo="4">
        <RefAuthor>Meineke F</RefAuthor>
        <RefAuthor>Modersohn L</RefAuthor>
        <RefAuthor>Loeffler M</RefAuthor>
        <RefAuthor>Boeker M</RefAuthor>
        <RefTitle>Announcement of the German Medical Text Corpus Project (GeMTeX)</RefTitle>
        <RefYear>2023</RefYear>
        <RefBookTitle>Caring is Sharing &#8211; Exploiting the Value in Data for Health and Innovation. Proceedings of MIE 2023</RefBookTitle>
        <RefPage></RefPage>
        <RefTotal>Meineke F, Modersohn L, Loeffler M, Boeker M. Announcement of the German Medical Text Corpus Project (GeMTeX). In: Caring is Sharing &#8211; Exploiting the Value in Data for Health and Innovation. Proceedings of MIE 2023. IOS; 2023. DOI: 10.3233&#47;SHTI230283</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.3233&#47;SHTI230283</RefLink>
      </Reference>
      <Reference refNo="5">
        <RefAuthor>Lohr C</RefAuthor>
        <RefAuthor>Matthies F</RefAuthor>
        <RefAuthor>Faller J</RefAuthor>
        <RefAuthor>Modersohn L</RefAuthor>
        <RefAuthor>Riedel A</RefAuthor>
        <RefAuthor>Hahn U</RefAuthor>
        <RefAuthor>Kiser R</RefAuthor>
        <RefAuthor>Boeker M</RefAuthor>
        <RefAuthor>Meineke F</RefAuthor>
        <RefTitle>De-Identifying GRASCCO - A Pilot Study for the De-Identification of the German Medical Text Project (GeMTeX) Corpus</RefTitle>
        <RefYear>2024</RefYear>
        <RefJournal>Stud Health Technol Inform</RefJournal>
        <RefPage>171-179</RefPage>
        <RefTotal>Lohr C, Matthies F, Faller J, Modersohn L, Riedel A, Hahn U, Kiser R, Boeker M, Meineke F. De-Identifying GRASCCO - A Pilot Study for the De-Identification of the German Medical Text Project (GeMTeX) Corpus. Stud Health Technol Inform. 2024 Aug 30;317:171-179. DOI: 10.3233&#47;SHTI240853</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.3233&#47;SHTI240853</RefLink>
      </Reference>
    </References>
    <Media>
      <Tables>
        <NoOfTables>0</NoOfTables>
      </Tables>
      <Figures>
        <NoOfPictures>0</NoOfPictures>
      </Figures>
      <InlineFigures>
        <NoOfPictures>0</NoOfPictures>
      </InlineFigures>
      <Attachments>
        <NoOfAttachments>0</NoOfAttachments>
      </Attachments>
    </Media>
  </OrigData>
</GmsArticle>