<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.2 20190208//EN" "http://jats.nlm.nih.gov/archiving/1.2/JATS-archivearticle1.dtd">
<article article-type="brief-report" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-title-group>
        <journal-title>microPublication Biology</journal-title>
      </journal-title-group>
      <issn pub-type="epub">2578-9430</issn>
      <publisher>
        <publisher-name>Caltech Library</publisher-name>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="doi">10.17912/micropub.biology.001211</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>new finding</subject>
        </subj-group>
        <subj-group subj-group-type="subject">
          <subject>genomic</subject>
        </subj-group>
        <subj-group subj-group-type="species">
          <subject>drosophila</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>
          <italic>Drosophila kikkawai – Sox102F</italic>
        </article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author">
          <name>
            <surname>Mo</surname>
            <given-names>Mia</given-names>
          </name>
          <xref ref-type="aff" rid="aff1">1</xref>
          <xref ref-type="corresp" rid="cor1">§</xref>
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>LoBello</surname>
            <given-names>Larissa </given-names>
          </name>
          <xref ref-type="aff" rid="aff1">1</xref>
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>Hassan Farah</surname>
            <given-names>Ismael</given-names>
          </name>
          <xref ref-type="aff" rid="aff2">2</xref>
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>Agtang</surname>
            <given-names>Elwin </given-names>
          </name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>Ramos</surname>
            <given-names>Edith Luz</given-names>
          </name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>Abdoli</surname>
            <given-names>Reza </given-names>
          </name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>Santander Diaz</surname>
            <given-names>Laura </given-names>
          </name>
          <xref ref-type="aff" rid="aff4">4</xref>
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>Helena Schumann Ferreira</surname>
            <given-names>Larissa</given-names>
          </name>
          <xref ref-type="aff" rid="aff3">3</xref>
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>Kokan</surname>
            <given-names>Nighat</given-names>
          </name>
          <xref ref-type="aff" rid="aff5">5</xref>
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>Sadikot</surname>
            <given-names>Takrima </given-names>
          </name>
          <xref ref-type="aff" rid="aff6">6</xref>
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>Sawa</surname>
            <given-names>Alexa</given-names>
          </name>
          <xref ref-type="aff" rid="aff7">7</xref>
        </contrib>
        <contrib contrib-type="author">
          <name>
            <surname>Arrigo</surname>
            <given-names>Cindy</given-names>
          </name>
          <xref ref-type="aff" rid="aff8">8</xref>
        </contrib>
        <aff id="aff1">
          <label>1</label>
          Washington University in St. Louis, St. Louis, Missouri, United States
        </aff>
        <aff id="aff2">
          <label>2</label>
          Cardinal Stritch University, Milwaukee, Wisconsin, United States
        </aff>
        <aff id="aff3">
          <label>3</label>
          College of the Desert, Palm Desert, California, United States
        </aff>
        <aff id="aff4">
          <label>4</label>
          Washburn University, Topeka, Kansas, United States
        </aff>
        <aff id="aff5">
          <label>5</label>
          Biology, Lakeland University, Plymouth, Wisconsin, United States
        </aff>
        <aff id="aff6">
          <label>6</label>
          Biology, Washburn University, Topeka, Kansas, United States
        </aff>
        <aff id="aff7">
          <label>7</label>
          Biology, College of the Desert, Palm Desert, California, United States
        </aff>
        <aff id="aff8">
          <label>8</label>
          Biology, New Jersey City University, Jersey City, New Jersey, United States
        </aff>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Murphy</surname>
            <given-names>Terence</given-names>
          </name>
        </contrib>
      </contrib-group>
      <author-notes>
        <corresp id="cor1">
          <label>§</label>
          Correspondence to: Mia Mo (
          <email>m.mo@wustl.edu</email>
          )
        </corresp>
        <fn fn-type="coi-statement">
          <p>The authors declare that there are no conflicts of interest present.</p>
        </fn>
      </author-notes>
      <pub-date date-type="pub" publication-format="electronic">
        <day>19</day>
        <month>7</month>
        <year>2024</year>
      </pub-date>
      <pub-date date-type="collection" publication-format="electronic">
        <year>2024</year>
      </pub-date>
      <volume>2024</volume>
      <elocation-id>10.17912/micropub.biology.001211</elocation-id>
      <history>
        <date date-type="received">
          <day>18</day>
          <month>4</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>18</day>
          <month>7</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>19</day>
          <month>7</month>
          <year>2024</year>
        </date>
      </history>
      <permissions>
        <copyright-statement>Copyright: © 2024 by the authors</copyright-statement>
        <copyright-year>2024</copyright-year>
        <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
          <license-p>This is an open-access article distributed under the terms of the Creative Commons Attribution License, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
        </license>
      </permissions>
      <abstract>
        <p>
          The 
          <italic>
            <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">Drosophila kikkawai</ext-link>
          </italic>
           feature with NCBI Gene ID 108084518 was determined to be an ortholog of 
          <italic>
            <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">Drosophila melanogaster</ext-link>
          </italic>
          <italic>
            <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
          </italic>
          , a member of the FlyBase High Mobility Group Box Transcription Factors gene group (FBgg0000748). Five isoforms were constructed using the GEP F element annotation protocol, the longest being novel isoform Sox102F-PNE (identified using the 
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/nuccore/XM_017180752">XM_017180752</ext-link>
           RefSeq prediction and RNA-seq data). Among the isoforms found in both 
          <italic>
            <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
          </italic>
           and 
          <italic>
            <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
          </italic>
          , Sox102F-PB is the longest and exhibits a 1.18x coding span expansion due to transposable element insertion into an intron. All 
          <italic>
            <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
          </italic>
           protein isoforms contain the conserved domain HMG_box_dom (IPR009071).
        </p>
      </abstract>
      <funding-group>
        <award-group>
          <funding-source>
            <institution-wrap>
              <institution>National Science Foundation (United States)</institution>
              <institution-id>https://ror.org/021nxhr62</institution-id>
            </institution-wrap>
          </funding-source>
          <award-id>2114661</award-id>
          <principal-award-recipient>Cindy Arrigo</principal-award-recipient>
        </award-group>
        <award-group>
          <funding-source>
            <institution-wrap>
              <institution>National Institutes of Health (United States)</institution>
              <institution-id>https://ror.org/01cwqze88</institution-id>
            </institution-wrap>
          </funding-source>
          <award-id>R25GM130517</award-id>
          <principal-award-recipient>Laura Reed</principal-award-recipient>
        </award-group>
        <award-group>
          <funding-source>
            <institution-wrap>
              <institution>National Science Foundation (United States)</institution>
              <institution-id>https://ror.org/021nxhr62</institution-id>
            </institution-wrap>
          </funding-source>
          <award-id>1915544</award-id>
          <principal-award-recipient>Laura Reed</principal-award-recipient>
        </award-group>
        <funding-statement>
          This material is based upon work supported by the National Science Foundation (NSF) under Grant No. 2114661 to Dr. Cindy Arrigo. The Genomics Education Partnership (GEP) is supported by the NSF under Grant No. 1915544 and National Institute of General Medical Sciences of the National Institutes of Health under award number R25GM130517 to the Genomics Education Partnership (
          <ext-link ext-link-type="uri" xlink:href="https://thegep.org/">https://thegep.org/</ext-link>
          ). The Genomics Education Partnership is fully financed by Federal moneys. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health.
        </funding-statement>
      </funding-group>
    </article-meta>
  </front>
  <body>
    <fig position="anchor" id="f1">
      <label>Figure 1. </label>
      <caption>
        <p>
          <bold>(A)</bold>
          <bold>
            Synteny diagram comparing the gene neighborhood of 
            <italic>
              <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
            </italic>
             in 
            <italic>
              <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
            </italic>
             and 
            <italic>
              <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
            </italic>
             on the Muller F element.
          </bold>
           The large chevron arrows labeled with gene symbols indicate the gene's coding direction relative to the centromere (black dot)
          <italic>.</italic>
           For 
          <italic>
            <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
          </italic>
          , these labels correspond to the FlyBase gene symbols. For 
          <italic>
            <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
          </italic>
          , the labels correspond to the gene symbols in the NCBI Gene database. For 
          <italic>
            <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
          </italic>
          genes with published gene symbols, the label begins with the gene symbol (e.g., 
          <italic>
            <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
          </italic>
          ) rather than “LOC”, followed by the NCBI Gene ID (e.g., 108084518). 
          <italic>
            <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
          </italic>
          is coded on the minus strand on the F element in both 
          <italic>
            <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
          </italic>
           and 
          <italic>
            <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
            .
          </italic>
           Orthologous genes are denoted with the same color chevron arrow. 
          <bold>(B)</bold>
          <bold>
            Genome Browser image of 
            <italic>
              <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
            </italic>
             gene model(s) on the 
            <italic>
              <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
            </italic>
             DkikHiC1 assembly with evidence tracks.
          </bold>
           The final gene models are labeled with their isoform names and shown in dark blue at the top of the image. Sox102F-PNE is highlighted to indicate the novel isoform in 
          <italic>
            <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
          </italic>
          . The provided evidence tracks in order from top to bottom include RefSeq BLAT alignments, RNA-seq coverage from mixed embryos, combined splice junctions, and RepeatMasker (v4.1.2-p1) which displays the locations of different transposable elements (TEs). The inset shows a zoomed in view of the initial Sox102F-PB CDS and an overlapping internal CDS from Sox102F-PNE with a corresponding splice junction and RNA-seq coverage. 
          <bold>(C)</bold>
          <bold>
            EMBOSS Needle pairwise alignment output highlighting conserved residues between 
            <italic>
              <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
            </italic>
            and
            <italic>
              <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
            </italic>
            Sox102F-PA proteins. 
          </bold>
          A pairwise alignment output comparing the amino acid (AA) sequence of the 
          <italic>
            <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
          </italic>
           Sox102F-PA protein and the resulting AA sequence from the final gene model for Sox102F-PA in 
          <italic>
            <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
          </italic>
          . The blue shading demarcates conserved residues with the highly conserved HMG_box_dom (IPR009071) outlined in red. 
          <bold>(D)</bold>
          <bold>
            Dot plot depicting protein alignment comparing AA sequences of Sox102F-PA in 
            <italic>
              <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
            </italic>
             (y-axis) and 
            <italic>
              <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
            </italic>
             (x-axis)
          </bold>
          . The abundance of gaps in the diagonal line indicates regions of low sequence similarity. The boxes with alternating colors indicate boundaries between different coding exons. The circled region represents the location of the HMG_box_domain inside the second coding sequence (CDS2) which is shared between 
          <italic>
            <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
          </italic>
           and 
          <italic>
            <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
          </italic>
          . 
          <bold>
            (E) ROAST alignment of 36 
            <italic>
              <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7215">Drosophila</ext-link>
            </italic>
            species.
          </bold>
           The ROAST (release March 2008) alignment depicts conservation across 35 
          <italic>
            <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7215">Drosophila</ext-link>
          </italic>
           species against the terminal CDS of all 
          <italic>
            <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
          </italic>
          isoforms in 
          <italic>
            <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
          </italic>
          . The darker coloration indicates higher conservation or similarity between species while the light coloration indicates less conservation or similarity. The HMG_box_dom is located within the red boxed region that depicts a stretch of highly conserved sequence across all 36 species including 
          <italic>
            <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
          </italic>
           which is highlighted in blue.
        </p>
      </caption>
      <graphic xlink:href="25789430-2024-micropub.biology.001211"/>
    </fig>
    <sec>
      <title>Description</title>
      <p>
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">Drosophila melanogaster</ext-link>
        </italic>
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
        </italic>
         has been assigned to the High Mobility Group Box Transcription Factors gene family 
        <xref ref-type="bibr" rid="R16">(Pfreundt et al., 2010; Phochanukul &amp; Russell, 2010; Sessa &amp; Bianchi, 2007)</xref>
        . Proteins from this group regulate the 
        <italic>Wnt</italic>
         signaling pathway and contain a characteristic 80 AA L-shaped DNA minor groove binding domain, which when bound to DNA induces DNA bending. According to FlyBase (release FB2024_02), the 
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
        </italic>
         gene is most likely orthologous to either the human 
        <italic>SOX5</italic>
         or 
        <italic>SOX6 </italic>
        gene, having a DIOPT score of 9/14 when run against both 
        <italic>SOX</italic>
         genes 
        <xref ref-type="bibr" rid="R4">(Gramates et al., 2022; Hu et al., 2011)</xref>
        . In humans, mutations in the 
        <italic>SOX5 </italic>
        gene are related to Lamb Shaffer Syndrome, a neurodevelopmental disorder 
        <xref ref-type="bibr" rid="R10">(Lamb et al., 2012)</xref>
        . Due to its close association to the brain and development, 
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
        </italic>
         in 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7215">Drosophila</ext-link>
        </italic>
         has been used to study Alzheimer's and heart disease in humans 
        <xref ref-type="bibr" rid="R12">(Li et al., 2013, 2017)</xref>
        . 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">Drosophila kikkawai</ext-link>
        </italic>
         belongs to the 
        <italic>melanogaster </italic>
        group of the 
        <italic>Sophophora</italic>
         subgenus (NCBI taxonomy ID: 30033) 
        <xref ref-type="bibr" rid="R21">(Schoch et al., 2020)</xref>
        . This cosmopolitan species is tropical and subtropical, as it is not found above the latitude of 35° 
        <xref ref-type="bibr" rid="R8">(Karan et al., 1998)</xref>
        . 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
        </italic>
        is one of four 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7215">Drosophila</ext-link>
        </italic>
         species (along with 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=29030">Drosophila takahashii</ext-link>
          , 
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7217">Drosophila ananassae</ext-link>
          , 
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=42026">Drosophila bipectinata</ext-link>
        </italic>
        ) examined in the study of the Muller F element expansion and shows an approximate 1.7-fold increase in chromosome size when compared to the 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
        </italic>
         F element 
        <xref ref-type="bibr" rid="R11">(Leung et al., 2023)</xref>
        .
      </p>
      <p>
        <bold>
          <italic>
            <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
          </italic>
           feature with NCBI Gene ID 108084518 is the putative ortholog of 
          <italic>
            <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
          </italic>
        </bold>
        . The ortholog assignment is supported by a tBLASTn (v2.15.0+; Camacho et al., 2009) alignment using the NCBI BLAST server of the 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
        </italic>
         protein sequence for Sox102F-PA (FBpp0088312) against the entire 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
        </italic>
         DkikHiC1 (GenBank Assembly Accession: 
        <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/datasets/genome/GCA_030179895.1">GCA_030179895.1</ext-link>
        ) assembly. The top hit maps to scaffold 
        <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/nucleotide/CM058227.1?report=genbank&amp;log$=nucltop&amp;blast_rank=1&amp;RID=VS13AE7P016">CM058227.1</ext-link>
         (assigned to the F element) and reports an E-value of 7e-118, a percent identity of 74.02, and a percent coverage of 99. The coordinates for the top hit (i.e., the match with lowest E-value) correspond to the location of the 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
        </italic>
        feature with Gene ID 108084518. The next best hit maps to scaffold 
        <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/nucleotide/CM058225.1?report=genbank&amp;log$=nucltop&amp;blast_rank=3&amp;RID=VS13AE7P016">CM058225.1</ext-link>
         (assigned to the D element) and reports a higher E-value of 8e-25, a lower percent identity of 50.43, and a lower percent coverage of 70. Sox102F-PA is representative of the B, C, D and novel NE isoforms due to the significant CDS overlap among the isoforms.
        <italic/>
        The results of three alignment tools within the genome browser (Spaln, BLAT, tBLASTn) map to the same region which corresponds to the location of the current gene model, providing strong evidence for the ortholog assignment, along with the E-value. Local synteny analysis provides further evidence for ortholog assignment. 
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
        </italic>
         is located on chromosome 4 (the F element) in 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
        </italic>
        and surrounded by the genes bent (
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0005666">bt</ext-link>
        </italic>
        ) (FBgn0005666), Mediator complex subunit 26 (
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039923">MED26</ext-link>
        </italic>
        ) (FBgn0039923), forkhead domain 102C (
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039937">fd102C</ext-link>
        </italic>
        ) (FBgn0039937), and Gigyf (
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039936">Gyf</ext-link>
        </italic>
        ) (FBgn0039936). In 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
        </italic>
        , the orthologs of Eye-enriched kainate receptor (
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039916">Ekar</ext-link>
        </italic>
        ) (FBgn0039916) (Gene ID: 108079305) and Mediator complex subunit 26 (
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039923">MED26</ext-link>
        </italic>
        ) (Gene ID: 108079308)
        <italic/>
        are located downstream of the 
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
        </italic>
        ortholog while the orthologs of forkhead domain 102C (
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039937">fd102C</ext-link>
        </italic>
        ) (Gene ID: 108084517), and 
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0051998">CG31998</ext-link>
        </italic>
         (FBgn0051998) (Gene ID: 108083269) are located upstream on the F element. As shown in 
        <xref ref-type="fig" rid="f1">Figure 1A, </xref>
        the two genes immediately flanking 
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
        </italic>
         in 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
        </italic>
        are consistent with 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
        </italic>
        while the next two genes in the genomic neighborhoods differ between the two species. The 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
        </italic>
         feature with the Gene ID 108079305 was determined to be an ortholog of 
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039916">Ekar</ext-link>
        </italic>
         rather than an ortholog of 
        <italic>bt </italic>
        based on the FlyBase BLASTp (v2.2.18; Altschul et al., 1990) search result of the protein product (XP_041632629) derived from the 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
        </italic>
        RefSeq mRNA XM_041776695 against the 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
           “
        </italic>
        Annotated proteins” database. The best BLASTp match is to 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
        </italic>
         Ekar-PB with a normalized score of 1555.81 bits and an E-value of 0 (i.e., E-value &lt; 1e-180). The next best hit to CG11155-PD also has an E-value of 0 but a lower score of 969.53 bits. Similarly, the 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
        </italic>
        feature with the Gene ID 108083269 was determined to be an ortholog of 
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0051998">CG31998</ext-link>
        </italic>
        rather than 
        <italic>Gyf </italic>
        based on the FlyBase BLASTp search result of the protein product (XP_017034489) derived from the 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
        </italic>
        RefSeq mRNA XM_017179000 against the 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
        </italic>
        “Annotated proteins” database. The best and only matches are to the A and B isoforms of the 
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0051998">CG31998</ext-link>
        </italic>
         gene where the top hit to 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
        </italic>
        CG31998-PA reports a normalized score of 1338.94 bits and E-value of 0.
      </p>
      <p>
        <bold>
          Characterizing the A, C and D isoforms for 
          <italic>
            <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
          </italic>
        </bold>
        <italic>. </italic>
        The 
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
        </italic>
         gene is located on the F element of 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
          .
        </italic>
         Isoforms Sox102F-PA, Sox102F-PC, and Sox102F-PD in
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
        </italic>
        are conserved relative to the orthologous isoforms in 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
        </italic>
        and were annotated according to the protocol described in Rele et al., 2023. In both
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
        </italic>
         and 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
        </italic>
        , Sox102F-PA (BK067818), Sox102F-PC (BK067819), and Sox102F-PD (BK067820) are comprised of the same two sequences from the unspliced transcript while Sox102F-PB (BK067821), described in further detail below, is comprised of three coding sequences, two shared with the other isoforms and one unique initial CDS (
        <xref ref-type="fig" rid="f1">Figure 1B</xref>
        ). Further analysis of the 
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
        </italic>
         feature in 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
        </italic>
         led to the discovery of a novel isoform named Sox102F-PNE (BK067822). Nucleotide sequence data reported are available in the Third-Party Annotation Section of the DDBJ/ENA/GenBank databases under the accession numbers TPA: BK067818-BK067822.
      </p>
      <p>
        <bold>Characterizing Sox102F-PB and novel isoform Sox102F-PNE.</bold>
         The third CDS of the novel isoform overlaps with the open reading frame of the initial CDS of Sox102F-PB (inset of 
        <xref ref-type="fig" rid="f1">Figure 1B</xref>
        ). The initial CDS of the Sox102F-PB isoform lacks splice junction support from the combined splice junction track in the GEP UCSC Genome Browser, and the best BLASTx (v2.15.0+) hit does not include the first 6 AA. There are no other nearby in-frame start codons. There were two options to retain the Sox102F-PB isoform, either to modify the gene structure by proposing a novel initial CDS or to truncate the CDS to the nearest start codon at 704,877-704,875. Based on the annotation strategy to construct the most parsimonious gene model compared to the 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
        </italic>
        ortholog, the initial CDS for Sox102F-PB was truncated. Due to evidence of splice junctions and RefSeq predictions upstream of this start position, it was concluded that a novel isoform, Sox102F-PNE, whose CDS overlaps that of Sox102F-PB, exists in 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
        </italic>
        (
        <xref ref-type="fig" rid="f1">Figure 1B</xref>
        ). Combined splice junctions JUNC00109258, JUNC00109265, and JUNC00109267 mapped to the DkikHiC1 assembly and small RNA-seq peaks from adult males and mixed embryo correspond to the splice boundaries predicted by BLAT (RefSeq mRNA 
        <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/nuccore/XM_017180752">XM_017180752</ext-link>
        ), with the latter two junctions scoring reads greater than 10. A combined splice junction score of 10 indicates that the predicted intron is supported by 10 RNA-seq reads, which is the minimum support required for a novel isoform as per protocol. Sox102F-PNE becomes the longest 
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
        </italic>
         isoform in 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
        </italic>
        . 
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
        </italic>
         is involved in the phenomenon known as the F element expansion. The expansion of the 
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
        </italic>
         gene was calculated using Sox102F-PB, the longest isoform whose ortholog can be found in 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
        </italic>
        . The coding span (from start to stop codon and including introns) of the Sox102F-PB gene in 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
        </italic>
         is 26,432 base pairs while its ortholog in 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
        </italic>
         has a coding span of 22,317 base pairs. The ~4,000 base pair, or 1.18x, expansion is attributed to the insertion of LINE transposons (TEs) into the intron between Sox102F-PB CDS2 (3_9492_0) and CDS3 (4_9492_0) which are shared across all isoforms. The insertions of these TEs did not alter the gene structure or the predicted amino acid sequence. 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
        </italic>
         has no identifiable TEs annotated in the corresponding intron.
      </p>
      <p>
        <bold>
          Characterizing HMG_box_domain in 
          <italic>
            <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
          </italic>
        </bold>
        . As seen in the EMBOSS Needle (v6.6.0.0; Rice et al., 2000) alignment (
        <xref ref-type="fig" rid="f1">Figure 1C</xref>
        ), the HMG_box_domain (IPR009071; Paysan-Lafosse et al., 2023) has been identified in Sox102F-PA and is found to be shared in all isoforms, including the novel NE isoform. This confirms that the feature belongs to the High Mobility Group Box Transcription Factors gene family. 
        <xref ref-type="fig" rid="f1">Figure 1D </xref>
        depicts that the domain circled in red shows a much higher level of sequence conservation than the rest of the protein when compared to the orthologous 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
        </italic>
        protein, alluding to its importance to protein function. Sequence outside of the red circle represent variable regions of lower sequence similarity that do not belong to the conserved domain and vary across species due to the accumulation of mutations over evolutionary time. Across 36 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7215">Drosophila</ext-link>
        </italic>
         species the HMG_box_dom is highly conserved in 
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
        </italic>
         which can be seen in a ROAST alignment of the terminal CDS (
        <xref ref-type="fig" rid="f1">Figure 1E</xref>
        ). Proteins belonging to the High Mobility Group Box Transcription Factors gene group at FlyBase (FBgg0000748) have been characterized as ubiquitous regulators of development by binding directly to the minor groove of DNA during transcription 
        <xref ref-type="bibr" rid="R7">(Kamachi &amp; Kondoh, 2013; Sessa &amp; Bianchi, 2007)</xref>
        . The 
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
        </italic>
         protein's role in development is consistent with the fact that the most abundant subset of supporting RNA-seq coverage is from mixed embryos.
      </p>
    </sec>
    <sec>
      <title>Methods</title>
      <p>
        The protocol used to annotate and reconcile the 
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
        </italic>
         gene model and neighboring gene models can be found in the Rele et al., 2023 paper. The annotations are based on the annotated gene models for FlyBase release FB2022_06 (
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
        </italic>
         release 6.49) in the release 6 assembly 
        <xref ref-type="bibr" rid="R5">(Hoskins et al., 2015)</xref>
        . A mirror of the UCSC Genome Browser (v435) 
        <xref ref-type="bibr" rid="R9">(Kent et al., 2002; Navarro Gonzalez et al., 2021)</xref>
         is maintained by the Genomics Education Partnership (GEP) at 
        <ext-link ext-link-type="uri" xlink:href="https://gander.wustl.edu">https://gander.wustl.edu</ext-link>
        . Within the 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
        </italic>
         Hi-C genome browser, tracks displaying the results of experimental data (e.g., RNA-seq) and computational tools such as tBLASTn (v2.13.0+), Spaln (v2.3.3f), and BLAT (v37x1) were used support the assignment of the 
        <italic>
          <ext-link ext-link-type="flybase" xlink:href="FBgn0039938">Sox102F</ext-link>
        </italic>
         ortholog. The 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=30033">D. kikkawai</ext-link>
        </italic>
         RNA-seq data was generated by the modENCODE project 
        <xref ref-type="bibr" rid="R3">(Chen et al., 2014)</xref>
        . The tBLASTn results report the region of the genome with the highest similarity to 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
        </italic>
         protein coding sequences. The Spaln results report the region of the genome with the highest similarity to full-length 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
        </italic>
         proteins. BLAT alignments report the region of the genome with the highest similarity to 
        <italic>
          <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&amp;id=7227">D. melanogaster</ext-link>
        </italic>
        transcripts.
      </p>
    </sec>
    <sec>
      <title>Extended Data</title>
      <p>
        Description: Transcript, peptide and generic feature format version 3 (GFF3) files for all isoforms (A, B, C, D, NE) of Sox102F for DkikHiC1 assembly. Resource Type: Dataset. DOI: 
        <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.22002/vbjfz-zqn36">10.22002/vbjfz-zqn36</ext-link>
      </p>
    </sec>
  </body>
  <back>
    <ack>
      <sec>
        <title>Acknowledgments</title>
        <p>We would like to thank Wilson Leung for developing and maintaining the technological infrastructure that was used to create this gene model. We would also like to thank Dr. Christopher Shaffer for supervising Mia Mo and Larissa LoBello in the reconciliation process and for his feedback on the manuscript.</p>
      </sec>
    </ack>
    <ref-list>
      <ref id="R1">
        <element-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Altschul</surname>
              <given-names>Stephen F.</given-names>
            </name>
            <name>
              <surname>Gish</surname>
              <given-names>Warren</given-names>
            </name>
            <name>
              <surname>Miller</surname>
              <given-names>Webb</given-names>
            </name>
            <name>
              <surname>Myers</surname>
              <given-names>Eugene W.</given-names>
            </name>
            <name>
              <surname>Lipman</surname>
              <given-names>David J.</given-names>
            </name>
          </person-group>
          <year>1990</year>
          <month>10</month>
          <day>1</day>
          <article-title>Basic local alignment search tool</article-title>
          <source>Journal of Molecular Biology</source>
          <volume>215</volume>
          <issue>3</issue>
          <issn>0022-2836</issn>
          <fpage>403</fpage>
          <lpage>410</lpage>
          <pub-id pub-id-type="doi">10.1016/s0022-2836(05)80360-2</pub-id>
        </element-citation>
      </ref>
      <ref id="R2">
        <element-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Camacho</surname>
              <given-names>Christiam</given-names>
            </name>
            <name>
              <surname>Coulouris</surname>
              <given-names>George</given-names>
            </name>
            <name>
              <surname>Avagyan</surname>
              <given-names>Vahram</given-names>
            </name>
            <name>
              <surname>Ma</surname>
              <given-names>Ning</given-names>
            </name>
            <name>
              <surname>Papadopoulos</surname>
              <given-names>Jason</given-names>
            </name>
            <name>
              <surname>Bealer</surname>
              <given-names>Kevin</given-names>
            </name>
            <name>
              <surname>Madden</surname>
              <given-names>Thomas L</given-names>
            </name>
          </person-group>
          <year>2009</year>
          <month>12</month>
          <day>1</day>
          <article-title>BLAST+: architecture and applications</article-title>
          <source>BMC Bioinformatics</source>
          <volume>10</volume>
          <issue>1</issue>
          <issn>1471-2105</issn>
          <pub-id pub-id-type="doi">10.1186/1471-2105-10-421</pub-id>
        </element-citation>
      </ref>
      <ref id="R3">
        <element-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Chen</surname>
              <given-names>Zhen-Xia</given-names>
            </name>
            <name>
              <surname>Sturgill</surname>
              <given-names>David</given-names>
            </name>
            <name>
              <surname>Qu</surname>
              <given-names>Jiaxin</given-names>
            </name>
            <name>
              <surname>Jiang</surname>
              <given-names>Huaiyang</given-names>
            </name>
            <name>
              <surname>Park</surname>
              <given-names>Soo</given-names>
            </name>
            <name>
              <surname>Boley</surname>
              <given-names>Nathan</given-names>
            </name>
            <name>
              <surname>Suzuki</surname>
              <given-names>Ana Maria</given-names>
            </name>
            <name>
              <surname>Fletcher</surname>
              <given-names>Anthony R.</given-names>
            </name>
            <name>
              <surname>Plachetzki</surname>
              <given-names>David C.</given-names>
            </name>
            <name>
              <surname>FitzGerald</surname>
              <given-names>Peter C.</given-names>
            </name>
            <name>
              <surname>Artieri</surname>
              <given-names>Carlo G.</given-names>
            </name>
            <name>
              <surname>Atallah</surname>
              <given-names>Joel</given-names>
            </name>
            <name>
              <surname>Barmina</surname>
              <given-names>Olga</given-names>
            </name>
            <name>
              <surname>Brown</surname>
              <given-names>James B.</given-names>
            </name>
            <name>
              <surname>Blankenburg</surname>
              <given-names>Kerstin P.</given-names>
            </name>
            <name>
              <surname>Clough</surname>
              <given-names>Emily</given-names>
            </name>
            <name>
              <surname>Dasgupta</surname>
              <given-names>Abhijit</given-names>
            </name>
            <name>
              <surname>Gubbala</surname>
              <given-names>Sai</given-names>
            </name>
            <name>
              <surname>Han</surname>
              <given-names>Yi</given-names>
            </name>
            <name>
              <surname>Jayaseelan</surname>
              <given-names>Joy C.</given-names>
            </name>
            <name>
              <surname>Kalra</surname>
              <given-names>Divya</given-names>
            </name>
            <name>
              <surname>Kim</surname>
              <given-names>Yoo-Ah</given-names>
            </name>
            <name>
              <surname>Kovar</surname>
              <given-names>Christie L.</given-names>
            </name>
            <name>
              <surname>Lee</surname>
              <given-names>Sandra L.</given-names>
            </name>
            <name>
              <surname>Li</surname>
              <given-names>Mingmei</given-names>
            </name>
            <name>
              <surname>Malley</surname>
              <given-names>James D.</given-names>
            </name>
            <name>
              <surname>Malone</surname>
              <given-names>John H.</given-names>
            </name>
            <name>
              <surname>Mathew</surname>
              <given-names>Tittu</given-names>
            </name>
            <name>
              <surname>Mattiuzzo</surname>
              <given-names>Nicolas R.</given-names>
            </name>
            <name>
              <surname>Munidasa</surname>
              <given-names>Mala</given-names>
            </name>
            <name>
              <surname>Muzny</surname>
              <given-names>Donna M.</given-names>
            </name>
            <name>
              <surname>Ongeri</surname>
              <given-names>Fiona</given-names>
            </name>
            <name>
              <surname>Perales</surname>
              <given-names>Lora</given-names>
            </name>
            <name>
              <surname>Przytycka</surname>
              <given-names>Teresa M.</given-names>
            </name>
            <name>
              <surname>Pu</surname>
              <given-names>Ling-Ling</given-names>
            </name>
            <name>
              <surname>Robinson</surname>
              <given-names>Garrett</given-names>
            </name>
            <name>
              <surname>Thornton</surname>
              <given-names>Rebecca L.</given-names>
            </name>
            <name>
              <surname>Saada</surname>
              <given-names>Nehad</given-names>
            </name>
            <name>
              <surname>Scherer</surname>
              <given-names>Steven E.</given-names>
            </name>
            <name>
              <surname>Smith</surname>
              <given-names>Harold E.</given-names>
            </name>
            <name>
              <surname>Vinson</surname>
              <given-names>Charles</given-names>
            </name>
            <name>
              <surname>Warner</surname>
              <given-names>Crystal B.</given-names>
            </name>
            <name>
              <surname>Worley</surname>
              <given-names>Kim C.</given-names>
            </name>
            <name>
              <surname>Wu</surname>
              <given-names>Yuan-Qing</given-names>
            </name>
            <name>
              <surname>Zou</surname>
              <given-names>Xiaoyan</given-names>
            </name>
            <name>
              <surname>Cherbas</surname>
              <given-names>Peter</given-names>
            </name>
            <name>
              <surname>Kellis</surname>
              <given-names>Manolis</given-names>
            </name>
            <name>
              <surname>Eisen</surname>
              <given-names>Michael B.</given-names>
            </name>
            <name>
              <surname>Piano</surname>
              <given-names>Fabio</given-names>
            </name>
            <name>
              <surname>Kionte</surname>
              <given-names>Karin</given-names>
            </name>
            <name>
              <surname>Fitch</surname>
              <given-names>David H.</given-names>
            </name>
            <name>
              <surname>Sternberg</surname>
              <given-names>Paul W.</given-names>
            </name>
            <name>
              <surname>Cutter</surname>
              <given-names>Asher D.</given-names>
            </name>
            <name>
              <surname>Duff</surname>
              <given-names>Michael O.</given-names>
            </name>
            <name>
              <surname>Hoskins</surname>
              <given-names>Roger A.</given-names>
            </name>
            <name>
              <surname>Graveley</surname>
              <given-names>Brenton R.</given-names>
            </name>
            <name>
              <surname>Gibbs</surname>
              <given-names>Richard A.</given-names>
            </name>
            <name>
              <surname>Bickel</surname>
              <given-names>Peter J.</given-names>
            </name>
            <name>
              <surname>Kopp</surname>
              <given-names>Artyom</given-names>
            </name>
            <name>
              <surname>Carninci</surname>
              <given-names>Piero</given-names>
            </name>
            <name>
              <surname>Celniker</surname>
              <given-names>Susan E.</given-names>
            </name>
            <name>
              <surname>Oliver</surname>
              <given-names>Brian</given-names>
            </name>
            <name>
              <surname>Richards</surname>
              <given-names>Stephen</given-names>
            </name>
          </person-group>
          <year>2014</year>
          <month>7</month>
          <day>1</day>
          <article-title>
            Comparative validation of the 
            <italic>D. melanogaster</italic>
             modENCODE transcriptome annotation
          </article-title>
          <source>Genome Research</source>
          <volume>24</volume>
          <issue>7</issue>
          <issn>1088-9051</issn>
          <fpage>1209</fpage>
          <lpage>1223</lpage>
          <pub-id pub-id-type="doi">10.1101/gr.159384.113</pub-id>
        </element-citation>
      </ref>
      <ref id="R4">
        <element-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Gramates</surname>
              <given-names>L Sian</given-names>
            </name>
            <name>
              <surname>Agapite</surname>
              <given-names>Julie</given-names>
            </name>
            <name>
              <surname>Attrill</surname>
              <given-names>Helen</given-names>
            </name>
            <name>
              <surname>Calvi</surname>
              <given-names>Brian R</given-names>
            </name>
            <name>
              <surname>Crosby</surname>
              <given-names>Madeline A</given-names>
            </name>
            <name>
              <surname>dos Santos</surname>
              <given-names>Gilberto</given-names>
            </name>
            <name>
              <surname>Goodman</surname>
              <given-names>Joshua L</given-names>
            </name>
            <name>
              <surname>Goutte-Gattat</surname>
              <given-names>Damien</given-names>
            </name>
            <name>
              <surname>Jenkins</surname>
              <given-names>Victoria K</given-names>
            </name>
            <name>
              <surname>Kaufman</surname>
              <given-names>Thomas</given-names>
            </name>
            <name>
              <surname>Larkin</surname>
              <given-names>Aoife</given-names>
            </name>
            <name>
              <surname>Matthews</surname>
              <given-names>Beverley B</given-names>
            </name>
            <name>
              <surname>Millburn</surname>
              <given-names>Gillian</given-names>
            </name>
            <name>
              <surname>Strelets</surname>
              <given-names>Victor B</given-names>
            </name>
            <name>
              <surname>Perrimon</surname>
              <given-names>Norbert</given-names>
            </name>
            <name>
              <surname>Gelbart</surname>
              <given-names>Susan Russo</given-names>
            </name>
            <name>
              <surname>Agapite</surname>
              <given-names>Julie</given-names>
            </name>
            <name>
              <surname>Broll</surname>
              <given-names>Kris</given-names>
            </name>
            <name>
              <surname>Crosby</surname>
              <given-names>Lynn</given-names>
            </name>
            <name>
              <surname>dos Santos</surname>
              <given-names>Gil</given-names>
            </name>
            <name>
              <surname>Falls</surname>
              <given-names>Kathleen</given-names>
            </name>
            <name>
              <surname>Gramates</surname>
              <given-names>L Sian</given-names>
            </name>
            <name>
              <surname>Jenkins</surname>
              <given-names>Victoria</given-names>
            </name>
            <name>
              <surname>Longden</surname>
              <given-names>Ian</given-names>
            </name>
            <name>
              <surname>Matthews</surname>
              <given-names>Beverley</given-names>
            </name>
            <name>
              <surname>Seme</surname>
              <given-names>Jolene</given-names>
            </name>
            <name>
              <surname>Tabone</surname>
              <given-names>Christopher J</given-names>
            </name>
            <name>
              <surname>Zhou</surname>
              <given-names>Pinglei</given-names>
            </name>
            <name>
              <surname>Zytkovicz</surname>
              <given-names>Mark</given-names>
            </name>
            <name>
              <surname>Brown</surname>
              <given-names>Nick</given-names>
            </name>
            <name>
              <surname>Antonazzo</surname>
              <given-names>Giulia</given-names>
            </name>
            <name>
              <surname>Attrill</surname>
              <given-names>Helen</given-names>
            </name>
            <name>
              <surname>Garapati</surname>
              <given-names>Phani</given-names>
            </name>
            <name>
              <surname>Goutte-Gattat</surname>
              <given-names>Damien</given-names>
            </name>
            <name>
              <surname>Larkin</surname>
              <given-names>Aoife</given-names>
            </name>
            <name>
              <surname>Marygold</surname>
              <given-names>Steven</given-names>
            </name>
            <name>
              <surname>McLachlan</surname>
              <given-names>Alex</given-names>
            </name>
            <name>
              <surname>Millburn</surname>
              <given-names>Gillian</given-names>
            </name>
            <name>
              <surname>Öztürk-Çolak</surname>
              <given-names>Arzu</given-names>
            </name>
            <name>
              <surname>Pilgrim</surname>
              <given-names>Clare</given-names>
            </name>
            <name>
              <surname>Trovisco</surname>
              <given-names>Vitor</given-names>
            </name>
            <name>
              <surname>Calvi</surname>
              <given-names>Brian</given-names>
            </name>
            <name>
              <surname>Kaufman</surname>
              <given-names>Thomas</given-names>
            </name>
            <name>
              <surname>Goodman</surname>
              <given-names>Josh</given-names>
            </name>
            <name>
              <surname>Krishna</surname>
              <given-names>Pravija</given-names>
            </name>
            <name>
              <surname>Strelets</surname>
              <given-names>Victor</given-names>
            </name>
            <name>
              <surname>Thurmond</surname>
              <given-names>Jim</given-names>
            </name>
            <name>
              <surname>Cripps</surname>
              <given-names>Richard</given-names>
            </name>
            <name>
              <surname>Lovato</surname>
              <given-names>TyAnna</given-names>
            </name>
            <collab>the FlyBase Consortium</collab>
          </person-group>
          <year>2022</year>
          <month>3</month>
          <day>10</day>
          <article-title>FlyBase: a guided tour of highlighted features</article-title>
          <source>Genetics</source>
          <volume>220</volume>
          <issue>4</issue>
          <issn>1943-2631</issn>
          <pub-id pub-id-type="doi">10.1093/genetics/iyac035</pub-id>
        </element-citation>
      </ref>
      <ref id="R5">
        <element-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Hoskins</surname>
              <given-names>Roger A.</given-names>
            </name>
            <name>
              <surname>Carlson</surname>
              <given-names>Joseph W.</given-names>
            </name>
            <name>
              <surname>Wan</surname>
              <given-names>Kenneth H.</given-names>
            </name>
            <name>
              <surname>Park</surname>
              <given-names>Soo</given-names>
            </name>
            <name>
              <surname>Mendez</surname>
              <given-names>Ivonne</given-names>
            </name>
            <name>
              <surname>Galle</surname>
              <given-names>Samuel E.</given-names>
            </name>
            <name>
              <surname>Booth</surname>
              <given-names>Benjamin W.</given-names>
            </name>
            <name>
              <surname>Pfeiffer</surname>
              <given-names>Barret D.</given-names>
            </name>
            <name>
              <surname>George</surname>
              <given-names>Reed A.</given-names>
            </name>
            <name>
              <surname>Svirskas</surname>
              <given-names>Robert</given-names>
            </name>
            <name>
              <surname>Krzywinski</surname>
              <given-names>Martin</given-names>
            </name>
            <name>
              <surname>Schein</surname>
              <given-names>Jacqueline</given-names>
            </name>
            <name>
              <surname>Accardo</surname>
              <given-names>Maria Carmela</given-names>
            </name>
            <name>
              <surname>Damia</surname>
              <given-names>Elisabetta</given-names>
            </name>
            <name>
              <surname>Messina</surname>
              <given-names>Giovanni</given-names>
            </name>
            <name>
              <surname>Méndez-Lago</surname>
              <given-names>María</given-names>
            </name>
            <name>
              <surname>de Pablos</surname>
              <given-names>Beatriz</given-names>
            </name>
            <name>
              <surname>Demakova</surname>
              <given-names>Olga V.</given-names>
            </name>
            <name>
              <surname>Andreyeva</surname>
              <given-names>Evgeniya N.</given-names>
            </name>
            <name>
              <surname>Boldyreva</surname>
              <given-names>Lidiya V.</given-names>
            </name>
            <name>
              <surname>Marra</surname>
              <given-names>Marco</given-names>
            </name>
            <name>
              <surname>Carvalho</surname>
              <given-names>A. Bernardo</given-names>
            </name>
            <name>
              <surname>Dimitri</surname>
              <given-names>Patrizio</given-names>
            </name>
            <name>
              <surname>Villasante</surname>
              <given-names>Alfredo</given-names>
            </name>
            <name>
              <surname>Zhimulev</surname>
              <given-names>Igor F.</given-names>
            </name>
            <name>
              <surname>Rubin</surname>
              <given-names>Gerald M.</given-names>
            </name>
            <name>
              <surname>Karpen</surname>
              <given-names>Gary H.</given-names>
            </name>
            <name>
              <surname>Celniker</surname>
              <given-names>Susan E.</given-names>
            </name>
          </person-group>
          <year>2015</year>
          <month>1</month>
          <day>14</day>
          <article-title>
            The Release 6 reference sequence of the 
            <italic>Drosophila melanogaster</italic>
             genome
          </article-title>
          <source>Genome Research</source>
          <volume>25</volume>
          <issue>3</issue>
          <issn>1088-9051</issn>
          <fpage>445</fpage>
          <lpage>458</lpage>
          <pub-id pub-id-type="doi">10.1101/gr.185579.114</pub-id>
        </element-citation>
      </ref>
      <ref id="R6">
        <element-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Hu</surname>
              <given-names>Yanhui</given-names>
            </name>
            <name>
              <surname>Flockhart</surname>
              <given-names>Ian</given-names>
            </name>
            <name>
              <surname>Vinayagam</surname>
              <given-names>Arunachalam</given-names>
            </name>
            <name>
              <surname>Bergwitz</surname>
              <given-names>Clemens</given-names>
            </name>
            <name>
              <surname>Berger</surname>
              <given-names>Bonnie</given-names>
            </name>
            <name>
              <surname>Perrimon</surname>
              <given-names>Norbert</given-names>
            </name>
            <name>
              <surname>Mohr</surname>
              <given-names>Stephanie E</given-names>
            </name>
          </person-group>
          <year>2011</year>
          <month>8</month>
          <day>31</day>
          <article-title>An integrative approach to ortholog prediction for disease-focused and other functional studies</article-title>
          <source>BMC Bioinformatics</source>
          <volume>12</volume>
          <issue>1</issue>
          <issn>1471-2105</issn>
          <pub-id pub-id-type="doi">10.1186/1471-2105-12-357</pub-id>
        </element-citation>
      </ref>
      <ref id="R7">
        <element-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Kamachi</surname>
              <given-names>Yusuke</given-names>
            </name>
            <name>
              <surname>Kondoh</surname>
              <given-names>Hisato</given-names>
            </name>
          </person-group>
          <year>2013</year>
          <month>10</month>
          <day>15</day>
          <article-title>Sox proteins: regulators of cell fate specification and differentiation</article-title>
          <source>Development</source>
          <volume>140</volume>
          <issue>20</issue>
          <issn>1477-9129</issn>
          <fpage>4129</fpage>
          <lpage>4144</lpage>
          <pub-id pub-id-type="doi">10.1242/dev.091793</pub-id>
        </element-citation>
      </ref>
      <ref id="R8">
        <element-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>KARAN</surname>
              <given-names>DEV</given-names>
            </name>
            <name>
              <surname>MUNJAL</surname>
              <given-names>ASHOK K.</given-names>
            </name>
            <name>
              <surname>GIBERT</surname>
              <given-names>PATRICIA</given-names>
            </name>
            <name>
              <surname>MORETEAU</surname>
              <given-names>BRIGITTE</given-names>
            </name>
            <name>
              <surname>PARKASH</surname>
              <given-names>RAVI</given-names>
            </name>
            <name>
              <surname>DAVID</surname>
              <given-names>JEAN R.</given-names>
            </name>
          </person-group>
          <year>1998</year>
          <month>2</month>
          <day>1</day>
          <article-title>
            Latitudinal clines for morphometrical traits in 
            <italic>Drosophila kikkawai</italic>
            : a study of natural populations from the Indian subcontinent
          </article-title>
          <source>Genetical Research</source>
          <volume>71</volume>
          <issue>1</issue>
          <issn>0016-6723</issn>
          <fpage>31</fpage>
          <lpage>38</lpage>
          <pub-id pub-id-type="doi">10.1017/s0016672397003054</pub-id>
        </element-citation>
      </ref>
      <ref id="R9">
        <element-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Kent</surname>
              <given-names>W. James</given-names>
            </name>
            <name>
              <surname>Sugnet</surname>
              <given-names>Charles W.</given-names>
            </name>
            <name>
              <surname>Furey</surname>
              <given-names>Terrence S.</given-names>
            </name>
            <name>
              <surname>Roskin</surname>
              <given-names>Krishna M.</given-names>
            </name>
            <name>
              <surname>Pringle</surname>
              <given-names>Tom H.</given-names>
            </name>
            <name>
              <surname>Zahler</surname>
              <given-names>Alan M.</given-names>
            </name>
            <name>
              <surname>Haussler</surname>
              <given-names>and David</given-names>
            </name>
          </person-group>
          <year>2002</year>
          <month>5</month>
          <day>16</day>
          <article-title>The Human Genome Browser at UCSC</article-title>
          <source>Genome Research</source>
          <volume>12</volume>
          <issue>6</issue>
          <issn>1088-9051</issn>
          <fpage>996</fpage>
          <lpage>1006</lpage>
          <pub-id pub-id-type="doi">10.1101/gr.229102</pub-id>
        </element-citation>
      </ref>
      <ref id="R10">
        <element-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Lamb</surname>
              <given-names>Allen N.</given-names>
            </name>
            <name>
              <surname>Rosenfeld</surname>
              <given-names>Jill A.</given-names>
            </name>
            <name>
              <surname>Neill</surname>
              <given-names>Nicholas J.</given-names>
            </name>
            <name>
              <surname>Talkowski</surname>
              <given-names>Michael E.</given-names>
            </name>
            <name>
              <surname>Blumenthal</surname>
              <given-names>Ian</given-names>
            </name>
            <name>
              <surname>Girirajan</surname>
              <given-names>Santhosh</given-names>
            </name>
            <name>
              <surname>Keelean-Fuller</surname>
              <given-names>Debra</given-names>
            </name>
            <name>
              <surname>Fan</surname>
              <given-names>Zheng</given-names>
            </name>
            <name>
              <surname>Pouncey</surname>
              <given-names>Jill</given-names>
            </name>
            <name>
              <surname>Stevens</surname>
              <given-names>Cathy</given-names>
            </name>
            <name>
              <surname>Mackay-Loder</surname>
              <given-names>Loren</given-names>
            </name>
            <name>
              <surname>Terespolsky</surname>
              <given-names>Deborah</given-names>
            </name>
            <name>
              <surname>Bader</surname>
              <given-names>Patricia I.</given-names>
            </name>
            <name>
              <surname>Rosenbaum</surname>
              <given-names>Kenneth</given-names>
            </name>
            <name>
              <surname>Vallee</surname>
              <given-names>Stephanie E.</given-names>
            </name>
            <name>
              <surname>Moeschler</surname>
              <given-names>John B.</given-names>
            </name>
            <name>
              <surname>Ladda</surname>
              <given-names>Roger</given-names>
            </name>
            <name>
              <surname>Sell</surname>
              <given-names>Susan</given-names>
            </name>
            <name>
              <surname>Martin</surname>
              <given-names>Judith</given-names>
            </name>
            <name>
              <surname>Ryan</surname>
              <given-names>Shawnia</given-names>
            </name>
            <name>
              <surname>Jones</surname>
              <given-names>Marilyn C.</given-names>
            </name>
            <name>
              <surname>Moran</surname>
              <given-names>Rocio</given-names>
            </name>
            <name>
              <surname>Shealy</surname>
              <given-names>Amy</given-names>
            </name>
            <name>
              <surname>Madan-Khetarpal</surname>
              <given-names>Suneeta</given-names>
            </name>
            <name>
              <surname>McConnell</surname>
              <given-names>Juliann</given-names>
            </name>
            <name>
              <surname>Surti</surname>
              <given-names>Urvashi</given-names>
            </name>
            <name>
              <surname>Delahaye</surname>
              <given-names>Andrée</given-names>
            </name>
            <name>
              <surname>Heron-Longe</surname>
              <given-names>Bénédicte</given-names>
            </name>
            <name>
              <surname>Pipiras</surname>
              <given-names>Eva</given-names>
            </name>
            <name>
              <surname>Benzacken</surname>
              <given-names>Brigitte</given-names>
            </name>
            <name>
              <surname>Passemard</surname>
              <given-names>Sandrine</given-names>
            </name>
            <name>
              <surname>Verloes</surname>
              <given-names>Alain</given-names>
            </name>
            <name>
              <surname>Isidor</surname>
              <given-names>Bertrand</given-names>
            </name>
            <name>
              <surname>Le Caignec</surname>
              <given-names>Cedric</given-names>
            </name>
            <name>
              <surname>Glew</surname>
              <given-names>Gwen M.</given-names>
            </name>
            <name>
              <surname>Opheim</surname>
              <given-names>Kent E.</given-names>
            </name>
            <name>
              <surname>Descartes</surname>
              <given-names>Maria</given-names>
            </name>
            <name>
              <surname>Eichler</surname>
              <given-names>Evan E.</given-names>
            </name>
            <name>
              <surname>Morton</surname>
              <given-names>Cynthia C.</given-names>
            </name>
            <name>
              <surname>Gusella</surname>
              <given-names>James F.</given-names>
            </name>
            <name>
              <surname>Schultz</surname>
              <given-names>Roger A.</given-names>
            </name>
            <name>
              <surname>Ballif</surname>
              <given-names>Blake C.</given-names>
            </name>
            <name>
              <surname>Shaffer</surname>
              <given-names>Lisa G.</given-names>
            </name>
          </person-group>
          <year>2012</year>
          <month>3</month>
          <day>12</day>
          <article-title>
            Haploinsufficiency of
            <italic>SOX5</italic>
            at 12p12.1 is associated with developmental delays with prominent language delay, behavior problems, and mild dysmorphic features
          </article-title>
          <source>Human Mutation</source>
          <volume>33</volume>
          <issue>4</issue>
          <issn>1059-7794</issn>
          <fpage>728</fpage>
          <lpage>740</lpage>
          <pub-id pub-id-type="doi">10.1002/humu.22037</pub-id>
        </element-citation>
      </ref>
      <ref id="R11">
        <element-citation publication-type="posted-content">
          <person-group person-group-type="author">
            <name>
              <surname>Leung</surname>
              <given-names>Wilson</given-names>
            </name>
            <name>
              <surname>Torosin</surname>
              <given-names>Nicole</given-names>
            </name>
            <name>
              <surname>Cao</surname>
              <given-names>Weihuan</given-names>
            </name>
            <name>
              <surname>Reed</surname>
              <given-names>Laura K</given-names>
            </name>
            <name>
              <surname>Arrigo</surname>
              <given-names>Cindy</given-names>
            </name>
            <name>
              <surname>Elgin</surname>
              <given-names>C R Sarah</given-names>
            </name>
            <name>
              <surname>Ellison</surname>
              <given-names>Christopher E</given-names>
            </name>
          </person-group>
          <year>2023</year>
          <month>5</month>
          <day>24</day>
          <article-title>
            Long-read genome assemblies for the study of chromosome expansion:
            <italic>Drosophila kikkawai</italic>
            ,
            <italic>Drosophila takahashii</italic>
            ,
            <italic>Drosophila bipectinata</italic>
            , and
            <italic>Drosophila ananassae</italic>
          </article-title>
          <pub-id pub-id-type="doi">10.1101/2023.05.22.541758</pub-id>
        </element-citation>
      </ref>
      <ref id="R12">
        <element-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Li</surname>
              <given-names>A.</given-names>
            </name>
            <name>
              <surname>Ahsen</surname>
              <given-names>O. O.</given-names>
            </name>
            <name>
              <surname>Liu</surname>
              <given-names>J. J.</given-names>
            </name>
            <name>
              <surname>Du</surname>
              <given-names>C.</given-names>
            </name>
            <name>
              <surname>McKee</surname>
              <given-names>M. L.</given-names>
            </name>
            <name>
              <surname>Yang</surname>
              <given-names>Y.</given-names>
            </name>
            <name>
              <surname>Wasco</surname>
              <given-names>W.</given-names>
            </name>
            <name>
              <surname>Newton-Cheh</surname>
              <given-names>C. H.</given-names>
            </name>
            <name>
              <surname>O'Donnell</surname>
              <given-names>C. J.</given-names>
            </name>
            <name>
              <surname>Fujimoto</surname>
              <given-names>J. G.</given-names>
            </name>
            <name>
              <surname>Zhou</surname>
              <given-names>C.</given-names>
            </name>
            <name>
              <surname>Tanzi</surname>
              <given-names>R. E.</given-names>
            </name>
          </person-group>
          <year>2013</year>
          <month>5</month>
          <day>21</day>
          <article-title>Silencing of the Drosophila ortholog of SOX5 in heart leads to cardiac dysfunction as detected by optical coherence tomography</article-title>
          <source>Human Molecular Genetics</source>
          <volume>22</volume>
          <issue>18</issue>
          <issn>0964-6906</issn>
          <fpage>3798</fpage>
          <lpage>3806</lpage>
          <pub-id pub-id-type="doi">10.1093/hmg/ddt230</pub-id>
        </element-citation>
      </ref>
      <ref id="R13">
        <element-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Li</surname>
              <given-names>Airong</given-names>
            </name>
            <name>
              <surname>Hooli</surname>
              <given-names>Basavaraj</given-names>
            </name>
            <name>
              <surname>Mullin</surname>
              <given-names>Kristina</given-names>
            </name>
            <name>
              <surname>Tate</surname>
              <given-names>Rebecca E.</given-names>
            </name>
            <name>
              <surname>Bubnys</surname>
              <given-names>Adele</given-names>
            </name>
            <name>
              <surname>Kirchner</surname>
              <given-names>Rory</given-names>
            </name>
            <name>
              <surname>Chapman</surname>
              <given-names>Brad</given-names>
            </name>
            <name>
              <surname>Hofmann</surname>
              <given-names>Oliver</given-names>
            </name>
            <name>
              <surname>Hide</surname>
              <given-names>Winston</given-names>
            </name>
            <name>
              <surname>Tanzi</surname>
              <given-names>Rudolph E.</given-names>
            </name>
          </person-group>
          <year>2017</year>
          <month>2</month>
          <day>10</day>
          <article-title>Silencing of the Drosophila ortholog of SOX5 leads to abnormal neuronal development and behavioral impairment</article-title>
          <source>Human Molecular Genetics</source>
          <volume>26</volume>
          <issue>8</issue>
          <issn>0964-6906</issn>
          <fpage>1472</fpage>
          <lpage>1482</lpage>
          <pub-id pub-id-type="doi">10.1093/hmg/ddx051</pub-id>
        </element-citation>
      </ref>
      <ref id="R14">
        <element-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Navarro Gonzalez</surname>
              <given-names>Jairo</given-names>
            </name>
            <name>
              <surname>Zweig</surname>
              <given-names>Ann S</given-names>
            </name>
            <name>
              <surname>Speir</surname>
              <given-names>Matthew L</given-names>
            </name>
            <name>
              <surname>Schmelter</surname>
              <given-names>Daniel</given-names>
            </name>
            <name>
              <surname>Rosenbloom</surname>
              <given-names>Kate R</given-names>
            </name>
            <name>
              <surname>Raney</surname>
              <given-names>Brian J</given-names>
            </name>
            <name>
              <surname>Powell</surname>
              <given-names>Conner C</given-names>
            </name>
            <name>
              <surname>Nassar</surname>
              <given-names>Luis R</given-names>
            </name>
            <name>
              <surname>Maulding</surname>
              <given-names>Nathan D</given-names>
            </name>
            <name>
              <surname>Lee</surname>
              <given-names>Christopher M</given-names>
            </name>
            <name>
              <surname>Lee</surname>
              <given-names>Brian T</given-names>
            </name>
            <name>
              <surname>Hinrichs</surname>
              <given-names>Angie S</given-names>
            </name>
            <name>
              <surname>Fyfe</surname>
              <given-names>Alastair C</given-names>
            </name>
            <name>
              <surname>Fernandes</surname>
              <given-names>Jason D</given-names>
            </name>
            <name>
              <surname>Diekhans</surname>
              <given-names>Mark</given-names>
            </name>
            <name>
              <surname>Clawson</surname>
              <given-names>Hiram</given-names>
            </name>
            <name>
              <surname>Casper</surname>
              <given-names>Jonathan</given-names>
            </name>
            <name>
              <surname>Benet-Pagès</surname>
              <given-names>Anna</given-names>
            </name>
            <name>
              <surname>Barber</surname>
              <given-names>Galt P</given-names>
            </name>
            <name>
              <surname>Haussler</surname>
              <given-names>David</given-names>
            </name>
            <name>
              <surname>Kuhn</surname>
              <given-names>Robert M</given-names>
            </name>
            <name>
              <surname>Haeussler</surname>
              <given-names>Maximilian</given-names>
            </name>
            <name>
              <surname>Kent</surname>
              <given-names>W James</given-names>
            </name>
          </person-group>
          <year>2020</year>
          <month>11</month>
          <day>22</day>
          <article-title>The UCSC Genome Browser database: 2021 update</article-title>
          <source>Nucleic Acids Research</source>
          <volume>49</volume>
          <issue>D1</issue>
          <issn>0305-1048</issn>
          <fpage>D1046</fpage>
          <lpage>D1057</lpage>
          <pub-id pub-id-type="doi">10.1093/nar/gkaa1070</pub-id>
        </element-citation>
      </ref>
      <ref id="R15">
        <element-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Paysan-Lafosse</surname>
              <given-names>Typhaine</given-names>
            </name>
            <name>
              <surname>Blum</surname>
              <given-names>Matthias</given-names>
            </name>
            <name>
              <surname>Chuguransky</surname>
              <given-names>Sara</given-names>
            </name>
            <name>
              <surname>Grego</surname>
              <given-names>Tiago</given-names>
            </name>
            <name>
              <surname>Pinto</surname>
              <given-names>Beatriz Lázaro</given-names>
            </name>
            <name>
              <surname>Salazar</surname>
              <given-names>Gustavo A</given-names>
            </name>
            <name>
              <surname>Bileschi</surname>
              <given-names>Maxwell L</given-names>
            </name>
            <name>
              <surname>Bork</surname>
              <given-names>Peer</given-names>
            </name>
            <name>
              <surname>Bridge</surname>
              <given-names>Alan</given-names>
            </name>
            <name>
              <surname>Colwell</surname>
              <given-names>Lucy</given-names>
            </name>
            <name>
              <surname>Gough</surname>
              <given-names>Julian</given-names>
            </name>
            <name>
              <surname>Haft</surname>
              <given-names>Daniel H</given-names>
            </name>
            <name>
              <surname>Letunić</surname>
              <given-names>Ivica</given-names>
            </name>
            <name>
              <surname>Marchler-Bauer</surname>
              <given-names>Aron</given-names>
            </name>
            <name>
              <surname>Mi</surname>
              <given-names>Huaiyu</given-names>
            </name>
            <name>
              <surname>Natale</surname>
              <given-names>Darren A</given-names>
            </name>
            <name>
              <surname>Orengo</surname>
              <given-names>Christine A</given-names>
            </name>
            <name>
              <surname>Pandurangan</surname>
              <given-names>Arun P</given-names>
            </name>
            <name>
              <surname>Rivoire</surname>
              <given-names>Catherine</given-names>
            </name>
            <name>
              <surname>Sigrist</surname>
              <given-names>Christian J A</given-names>
            </name>
            <name>
              <surname>Sillitoe</surname>
              <given-names>Ian</given-names>
            </name>
            <name>
              <surname>Thanki</surname>
              <given-names>Narmada</given-names>
            </name>
            <name>
              <surname>Thomas</surname>
              <given-names>Paul D</given-names>
            </name>
            <name>
              <surname>Tosatto</surname>
              <given-names>Silvio C E</given-names>
            </name>
            <name>
              <surname>Wu</surname>
              <given-names>Cathy H</given-names>
            </name>
            <name>
              <surname>Bateman</surname>
              <given-names>Alex</given-names>
            </name>
          </person-group>
          <year>2022</year>
          <month>11</month>
          <day>9</day>
          <article-title>InterPro in 2022</article-title>
          <source>Nucleic Acids Research</source>
          <volume>51</volume>
          <issue>D1</issue>
          <issn>0305-1048</issn>
          <fpage>D418</fpage>
          <lpage>D427</lpage>
          <pub-id pub-id-type="doi">10.1093/nar/gkac993</pub-id>
        </element-citation>
      </ref>
      <ref id="R16">
        <element-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Pfreundt</surname>
              <given-names>Ulrike</given-names>
            </name>
            <name>
              <surname>James</surname>
              <given-names>Daniel P.</given-names>
            </name>
            <name>
              <surname>Tweedie</surname>
              <given-names>Susan</given-names>
            </name>
            <name>
              <surname>Wilson</surname>
              <given-names>Derek</given-names>
            </name>
            <name>
              <surname>Teichmann</surname>
              <given-names>Sarah A.</given-names>
            </name>
            <name>
              <surname>Adryan</surname>
              <given-names>Boris</given-names>
            </name>
          </person-group>
          <year>2009</year>
          <month>10</month>
          <day>31</day>
          <article-title>FlyTF: improved annotation and enhanced functionality of the Drosophila transcription factor database</article-title>
          <source>Nucleic Acids Research</source>
          <volume>38</volume>
          <issue>suppl_1</issue>
          <issn>0305-1048</issn>
          <fpage>D443</fpage>
          <lpage>D447</lpage>
          <pub-id pub-id-type="doi">10.1093/nar/gkp910</pub-id>
        </element-citation>
      </ref>
      <ref id="R17">
        <element-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Phochanukul</surname>
              <given-names>Nichanun</given-names>
            </name>
            <name>
              <surname>Russell</surname>
              <given-names>Steven</given-names>
            </name>
          </person-group>
          <year>2010</year>
          <month>3</month>
          <day>1</day>
          <article-title>No backbone but lots of Sox: Invertebrate Sox genes</article-title>
          <source>The International Journal of Biochemistry &amp; Cell Biology</source>
          <volume>42</volume>
          <issue>3</issue>
          <issn>1357-2725</issn>
          <fpage>453</fpage>
          <lpage>464</lpage>
          <pub-id pub-id-type="doi">10.1016/j.biocel.2009.06.013</pub-id>
        </element-citation>
      </ref>
      <ref id="R18">
        <element-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Rele</surname>
              <given-names>Chinmay P.</given-names>
            </name>
            <name>
              <surname>Sandlin</surname>
              <given-names>Katie M.</given-names>
            </name>
            <name>
              <surname>Leung</surname>
              <given-names>Wilson</given-names>
            </name>
            <name>
              <surname>Reed</surname>
              <given-names>Laura K.</given-names>
            </name>
          </person-group>
          <year>2023</year>
          <month>10</month>
          <day>13</day>
          <article-title>Manual annotation of Drosophila genes: a Genomics Education Partnership protocol</article-title>
          <source>F1000Research</source>
          <volume>11</volume>
          <issn>2046-1402</issn>
          <fpage>1579</fpage>
          <lpage>1579</lpage>
          <pub-id pub-id-type="doi">10.12688/f1000research.126839.3</pub-id>
        </element-citation>
      </ref>
      <ref id="R19">
        <element-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Rice</surname>
              <given-names>Peter</given-names>
            </name>
            <name>
              <surname>Longden</surname>
              <given-names>Ian</given-names>
            </name>
            <name>
              <surname>Bleasby</surname>
              <given-names>Alan</given-names>
            </name>
          </person-group>
          <year>2000</year>
          <month>6</month>
          <day>1</day>
          <article-title>EMBOSS: The European Molecular Biology Open Software Suite</article-title>
          <source>Trends in Genetics</source>
          <volume>16</volume>
          <issue>6</issue>
          <issn>0168-9525</issn>
          <fpage>276</fpage>
          <lpage>277</lpage>
          <pub-id pub-id-type="doi">10.1016/s0168-9525(00)02024-2</pub-id>
        </element-citation>
      </ref>
      <ref id="R20">
        <element-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Sessa</surname>
              <given-names>Luca</given-names>
            </name>
            <name>
              <surname>Bianchi</surname>
              <given-names>Marco E.</given-names>
            </name>
          </person-group>
          <year>2007</year>
          <month>1</month>
          <day>1</day>
          <article-title>The evolution of High Mobility Group Box (HMGB) chromatin proteins in multicellular animals</article-title>
          <source>Gene</source>
          <volume>387</volume>
          <issue>1-2</issue>
          <issn>0378-1119</issn>
          <fpage>133</fpage>
          <lpage>140</lpage>
          <pub-id pub-id-type="doi">10.1016/j.gene.2006.08.034</pub-id>
        </element-citation>
      </ref>
      <ref id="R21">
        <element-citation publication-type="journal">
          <person-group person-group-type="author">
            <name>
              <surname>Schoch</surname>
              <given-names>Conrad L</given-names>
            </name>
            <name>
              <surname>Ciufo</surname>
              <given-names>Stacy</given-names>
            </name>
            <name>
              <surname>Domrachev</surname>
              <given-names>Mikhail</given-names>
            </name>
            <name>
              <surname>Hotton</surname>
              <given-names>Carol L</given-names>
            </name>
            <name>
              <surname>Kannan</surname>
              <given-names>Sivakumar</given-names>
            </name>
            <name>
              <surname>Khovanskaya</surname>
              <given-names>Rogneda</given-names>
            </name>
            <name>
              <surname>Leipe</surname>
              <given-names>Detlef</given-names>
            </name>
            <name>
              <surname>Mcveigh</surname>
              <given-names>Richard</given-names>
            </name>
            <name>
              <surname>O’Neill</surname>
              <given-names>Kathleen</given-names>
            </name>
            <name>
              <surname>Robbertse</surname>
              <given-names>Barbara</given-names>
            </name>
            <name>
              <surname>Sharma</surname>
              <given-names>Shobha</given-names>
            </name>
            <name>
              <surname>Soussov</surname>
              <given-names>Vladimir</given-names>
            </name>
            <name>
              <surname>Sullivan</surname>
              <given-names>John P</given-names>
            </name>
            <name>
              <surname>Sun</surname>
              <given-names>Lu</given-names>
            </name>
            <name>
              <surname>Turner</surname>
              <given-names>Seán</given-names>
            </name>
            <name>
              <surname>Karsch-Mizrachi</surname>
              <given-names>Ilene</given-names>
            </name>
          </person-group>
          <year>2020</year>
          <month>1</month>
          <day>1</day>
          <article-title>NCBI Taxonomy: a comprehensive update on curation, resources and tools</article-title>
          <source>Database</source>
          <volume>2020</volume>
          <issn>1758-0463</issn>
          <pub-id pub-id-type="doi">10.1093/database/baaa062</pub-id>
        </element-citation>
      </ref>
    </ref-list>
  </back>
</article>