<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
<!DOCTYPE GmsArticle SYSTEM "http://www.egms.de/dtd/2.0.34/GmsArticle.dtd">
<GmsArticle xmlns:xlink="http://www.w3.org/1999/xlink">
  <MetaData>
    <Identifier>25gmds020</Identifier>
    <IdentifierDoi>10.3205/25gmds020</IdentifierDoi>
    <IdentifierUrn>urn:nbn:de:0183-25gmds0203</IdentifierUrn>
    <ArticleType>Meeting Abstract</ArticleType>
    <TitleGroup>
      <Title language="en">Generating High-Quality Multiple-Choice Questions Using Small Language Models and Adaptive Agentic Infrastructure</Title>
    </TitleGroup>
    <CreatorList>
      <Creator>
        <PersonNames>
          <Lastname>Gr&#246;&#223;ler</Lastname>
          <LastnameHeading>Gr&#246;&#223;ler</LastnameHeading>
          <Firstname>Michael</Firstname>
          <Initials>M</Initials>
        </PersonNames>
        <Address>
          <Affiliation>University Medical Center Hamburg-Eppendorf, Institute for Applied Medical Informatics, Hamburg, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>D&#252;sterbeck</Lastname>
          <LastnameHeading>D&#252;sterbeck</LastnameHeading>
          <Firstname>Lilly Marie</Firstname>
          <Initials>LM</Initials>
        </PersonNames>
        <Address>
          <Affiliation>University Medical Center Hamburg-Eppendorf, Institute for Applied Medical Informatics, Hamburg, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Credidio</Lastname>
          <LastnameHeading>Credidio</LastnameHeading>
          <Firstname>Graziella</Firstname>
          <Initials>G</Initials>
        </PersonNames>
        <Address>
          <Affiliation>University Medical Center Hamburg-Eppendorf, Institute for Applied Medical Informatics, Hamburg, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Riemann</Lastname>
          <LastnameHeading>Riemann</LastnameHeading>
          <Firstname>Layla Tabea</Firstname>
          <Initials>LT</Initials>
        </PersonNames>
        <Address>
          <Affiliation>University Medical Center Hamburg-Eppendorf, Institute for Applied Medical Informatics, Hamburg, Germany</Affiliation>
        </Address>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
    </CreatorList>
    <PublisherList>
      <Publisher>
        <Corporation>
          <Corporatename>German Medical Science GMS Publishing House</Corporatename>
        </Corporation>
        <Address>D&#252;sseldorf</Address>
      </Publisher>
    </PublisherList>
    <SubjectGroup>
      <SubjectheadingDDB>610</SubjectheadingDDB>
      <Keyword language="en">AI-based MCQ generation</Keyword>
      <Keyword language="en">multi-agent system</Keyword>
      <Keyword language="en">small language models</Keyword>
      <Keyword language="en">personalized learning platform</Keyword>
      <Keyword language="en">agent graph optimization</Keyword>
    </SubjectGroup>
    <DatePublishedList>
      <DatePublished>20251103</DatePublished>
    </DatePublishedList>
    <Language>engl</Language>
    <License license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
      <AltText language="en">This is an Open Access article distributed under the terms of the Creative Commons Attribution 4.0 License.</AltText>
      <AltText language="de">Dieser Artikel ist ein Open-Access-Artikel und steht unter den Lizenzbedingungen der Creative Commons Attribution 4.0 License (Namensnennung).</AltText>
    </License>
    <SourceGroup>
      <Meeting>
        <MeetingId>M0631</MeetingId>
        <MeetingSequence>020</MeetingSequence>
        <MeetingCorporation>Deutsche Gesellschaft f&#252;r Medizinische Informatik, Biometrie und Epidemiologie</MeetingCorporation>
        <MeetingName>70. Jahrestagung der Deutschen Gesellschaft f&#252;r Medizinische Informatik, Biometrie und Epidemiologie e. V. (GMDS)</MeetingName>
        <MeetingTitle></MeetingTitle>
        <MeetingSession>V: Education and science communication</MeetingSession>
        <MeetingCity>Jena</MeetingCity>
        <MeetingDate>
          <DateFrom>20250907</DateFrom>
          <DateTo>20250911</DateTo>
        </MeetingDate>
      </Meeting>
    </SourceGroup>
    <ArticleNo>Abstr. 111</ArticleNo>
  </MetaData>
  <OrigData>
    <TextBlock name="Text" linked="yes">
      <MainHeadline>Text</MainHeadline><Pgraph><Mark1>Introduction:</Mark1> We introduce a novel approach for generating high-quality multiple-choice questions (MCQs) within KiMED, an AI-based learning platform designed to support medical students in reviewing lecture material and preparing for coursework. KiMED is a personalized learning web application that dynamically adjusts question difficulty, topical emphasis, and explanation depth based on individual student performance and curricular context. </Pgraph><Pgraph>A key challenge in this domain lies in balancing strict legal and quality requirements with limited computational resources <TextLink reference="1"></TextLink>. Erroneous material could potentially lead to lawsuits, thus demanding high-quality question generation, while computational limitations arise from the substantial demands to deploy language models locally. Scaling such services to support simultaneous use by many students further intensifies these demands. Our approach addresses these challenges using small language models within a highly adaptive agentic framework. Focusing initially on biochemistry, our approach lays the groundwork for expanding into other subjects and scaling personalized learning tools in resource-constrained educational environments.</Pgraph><Pgraph><Mark1>Methods:</Mark1> Our system employs the GPTSwarm framework <TextLink reference="2"></TextLink> on top of a custom RAG Pipeline <TextLink reference="3"></TextLink>, <TextLink reference="4"></TextLink> for document retrieval, modeling language agents as directed acyclic graphs with functional nodes. Each node handles tasks like concept extraction, distractor generation, or answer validation, while edges manage information flow within and between agents. Agents form a swarm, with both node prompts and inter-agent communication patterns optimized automatically. Optimization occurs at two levels: node optimization refines prompt instructions, and edge optimization adjusts inter-agent information sharing. These processes are guided by reinforcement learning and task-specific feedback. The framework operates effectively with small language models to suit our available resources. The initial data are based on a combination of textbooks, slides, transcripts of those slides, and course scripts. To assess our approach, we created a biochemistry MCQ dataset comprising three sets of 50 questions: AI-generated using only prompt engineering as a baseline, AI-generated with our framework, and human-generated. Two domain experts evaluated each question using binary scores across ten criteria, including clarity, relevance, grammatical correctness, and distractor quality <TextLink reference="5"></TextLink>. Scores were averaged per criterion and over all experts,  and summed to yield a final score ranging from 0 to 10.</Pgraph><Pgraph><Mark1>Results:</Mark1> The baseline AI-generated questions achieved a final score of 4.7, establishing a lower benchmark. Human-authored questions scored 8.9. Our enhanced AI pipeline attained a score of 8.7. Further, it demonstrated improved alignment with criteria such as topic centrality and relevance to learning objectives compared to the human generated. However, generating suitable distractors remained more difficult for the AI system. These results indicate that AI-generated MCQs in biochemistry can effectively support educators in developing high-quality questions.</Pgraph><Pgraph><Mark1>Conclusion:</Mark1> This work shows the potential of a multi-agent, graph-optimized approach to automated MCQ generation in medical education. By using small language models within the GPTSwarm framework, we enable efficient and adaptive MCQ generation for personalized learning. Our code for the platform and the agent-based optimization is going to be open-source to allow the expansion to other topics and medical faculties.</Pgraph><Pgraph>The authors declare that they have no competing interests.</Pgraph><Pgraph>The authors declare that an ethics committee vote is not required.</Pgraph></TextBlock>
    <References linked="yes">
      <Reference refNo="1">
        <RefAuthor>Ali F</RefAuthor>
        <RefAuthor>Talat H</RefAuthor>
        <RefTitle>AI Integration in MCQ Development: Assessing Quality in Medical Education: A Systematic Review</RefTitle>
        <RefYear>2024</RefYear>
        <RefJournal>L&#38;S</RefJournal>
        <RefPage>14</RefPage>
        <RefTotal>Ali F, Talat H. AI Integration in MCQ Development: Assessing Quality in Medical Education: A Systematic Review. L&#38;S. 2024;5(3):14. DOI: 10.37185&#47;LnS.1.1.643</RefTotal>
        <RefLink>http:&#47;&#47;dx.doi.org&#47;10.37185&#47;LnS.1.1.643</RefLink>
      </Reference>
      <Reference refNo="2">
        <RefAuthor>Zhuge M</RefAuthor>
        <RefAuthor>Wang W</RefAuthor>
        <RefAuthor>Kirsch L</RefAuthor>
        <RefAuthor>Faccio F</RefAuthor>
        <RefAuthor>Khizbullin D</RefAuthor>
        <RefAuthor>Schmidhuber J</RefAuthor>
        <RefTitle>PTSwarm: Language Agents as Optimizable Graphs &#91;Preprint&#93;</RefTitle>
        <RefYear>2024</RefYear>
        <RefJournal>arXiv</RefJournal>
        <RefPage></RefPage>
        <RefTotal>Zhuge M, Wang W, Kirsch L, Faccio F, Khizbullin D, Schmidhuber J. GPTSwarm: Language Agents as Optimizable Graphs &#91;Preprint&#93;. arXiv. 2024. DOI: 10.48550&#47;arXiv.2402.16823</RefTotal>
        <RefLink>http:&#47;&#47;dx.doi.org&#47;10.48550&#47;arXiv.2402.16823</RefLink>
      </Reference>
      <Reference refNo="3">
        <RefAuthor>Wu F</RefAuthor>
        <RefAuthor>Li Z</RefAuthor>
        <RefAuthor>Wei F</RefAuthor>
        <RefAuthor>Li Y</RefAuthor>
        <RefAuthor>Ding B</RefAuthor>
        <RefAuthor>Gao J</RefAuthor>
        <RefTitle>Talk to Right Specialists: Routing and Planning in Multi-agent System for Question Answering &#91;Preprint&#93;</RefTitle>
        <RefYear>2025</RefYear>
        <RefJournal>arXiv</RefJournal>
        <RefPage></RefPage>
        <RefTotal>Wu F, Li Z, Wei F, Li Y, Ding B, Gao J. Talk to Right Specialists: Routing and Planning in Multi-agent System for Question Answering &#91;Preprint&#93;. arXiv. 2025. DOI: 10.48550&#47;arXiv.2501.07813</RefTotal>
        <RefLink>http:&#47;&#47;dx.doi.org&#47;10.48550&#47;arXiv.2501.07813</RefLink>
      </Reference>
      <Reference refNo="4">
        <RefAuthor>Gao Y</RefAuthor>
        <RefAuthor>Xiong Y</RefAuthor>
        <RefAuthor>Gao X</RefAuthor>
        <RefAuthor>Jia K</RefAuthor>
        <RefAuthor>Pan J</RefAuthor>
        <RefAuthor>Bi Y</RefAuthor>
        <RefAuthor></RefAuthor>
        <RefTitle>Retrieval-Augmented Generation for Large Language Models: A Survey &#91;Preprint&#93;</RefTitle>
        <RefYear>2024</RefYear>
        <RefJournal>arXiv</RefJournal>
        <RefPage></RefPage>
        <RefTotal>Gao Y, Xiong Y, Gao X, Jia K, Pan J, Bi Y, et al. Retrieval-Augmented Generation for Large Language Models: A Survey &#91;Preprint&#93;. arXiv. 2024. DOI: 10.48550&#47;arXiv.2312.10997</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.48550&#47;arXiv.2312.10997</RefLink>
      </Reference>
      <Reference refNo="5">
        <RefAuthor>Wang J</RefAuthor>
        <RefAuthor>Xiao R</RefAuthor>
        <RefAuthor>Tseng YJ</RefAuthor>
        <RefTitle>Generating AI Literacy MCQs: A Multi-Agent LLM Approach</RefTitle>
        <RefYear>2025</RefYear>
        <RefBookTitle>Proceedings of the 56th ACM Technical Symposium on Computer Science Education V 2</RefBookTitle>
        <RefPage>1651&#8211;2</RefPage>
        <RefTotal>Wang J, Xiao R, Tseng YJ. Generating AI Literacy MCQs: A Multi-Agent LLM Approach. In: Proceedings of the 56th ACM Technical Symposium on Computer Science Education V 2. 2025. p. 1651&#8211;2. DOI: 10.1145&#47;3641555.3705189</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1145&#47;3641555.3705189</RefLink>
      </Reference>
    </References>
    <Media>
      <Tables>
        <NoOfTables>0</NoOfTables>
      </Tables>
      <Figures>
        <NoOfPictures>0</NoOfPictures>
      </Figures>
      <InlineFigures>
        <NoOfPictures>0</NoOfPictures>
      </InlineFigures>
      <Attachments>
        <NoOfAttachments>0</NoOfAttachments>
      </Attachments>
    </Media>
  </OrigData>
</GmsArticle>