Group nodes within the same time preriod

2019-08-22 10:16发布

This is a simplified version of an my actual data (gps tracking points). I have a list of times in ascending order.

<gpx>
    <trk>
        <trkseg>
            <trkpt>
                <time>2000-01-01T15:25:00Z</time>
            </trkpt>
            <trkpt>
                <time>2000-01-01T15:26:00Z</time>
            </trkpt>
            <trkpt>
                <time>2000-01-01T15:27:00Z</time>
            </trkpt>
        </trkseg>
    </trk>
    <trk>
        <trkseg>
            <trkpt>
                <time>2000-01-01T15:28:00Z</time>
            </trkpt>
            <trkpt>
                <time>2000-01-01T15:29:00Z</time>
            </trkpt>            
        </trkseg>
        <trkseg>
            <trkpt>
                <time>2000-01-01T16:00:00Z</time>
            </trkpt>
            <trkpt>
                <time>2000-01-01T16:01:00Z</time>
            </trkpt>            
        </trkseg>
    </trk>
</gpx>

I want to detect the "stops" and group the times that are contiguous. For example, with a "stop" being an absence of data for 5 minutes or more, I would get

<gpx >
    <trk>
        <trkseg>
            <trkpt>
                <time>2000-01-01T15:25:00Z</time>
            </trkpt>
            <trkpt>
                <time>2000-01-01T15:26:00Z</time>
            </trkpt>
            <trkpt>
                <time>2000-01-01T15:27:00Z</time>
            </trkpt>
            <trkpt>
                <time>2000-01-01T15:28:00Z</time>
            </trkpt>
            <trkpt>
                <time>2000-01-01T15:29:00Z</time>
            </trkpt>    
        </trkseg>
    </trk>
    <trk>
        <trkseg>
            <trkpt>
                <time>2000-01-01T16:00:00Z</time>
            </trkpt>
            <trkpt>
                <time>2000-01-01T16:01:00Z</time>
            </trkpt>            
        </trkseg>
    </trk>
</gpx>

标签: xslt
1条回答
该账号已被封号
2楼-- · 2019-08-22 11:05

This is a good use case for a tumbling window clause in XQuery 3.0/3.1:

declare namespace output = "http://www.w3.org/2010/xslt-xquery-serialization";
declare option output:indent "yes";

<gpx>
{
let $dateTimes := //time/xs:dateTime(.),
    $stopDuration := xs:dayTimeDuration('PT5M')
for tumbling window $w in $dateTimes
start when true()
end $e next $n when $n - $e gt $stopDuration
return
  <trk>
    <trkseg>
    {
      $w!<trkpt><time>{.}</time></trkpt>
    }    
    </trkseg>
  </trk>
}
</gpx>

Result is

<?xml version="1.0" encoding="UTF-8"?>
<gpx>
   <trk>
      <trkseg>
         <trkpt>
            <time>2000-01-01T15:25:00Z</time>
         </trkpt>
         <trkpt>
            <time>2000-01-01T15:26:00Z</time>
         </trkpt>
         <trkpt>
            <time>2000-01-01T15:27:00Z</time>
         </trkpt>
         <trkpt>
            <time>2000-01-01T15:28:00Z</time>
         </trkpt>
         <trkpt>
            <time>2000-01-01T15:29:00Z</time>
         </trkpt>
      </trkseg>
   </trk>
   <trk>
      <trkseg>
         <trkpt>
            <time>2000-01-01T16:00:00Z</time>
         </trkpt>
         <trkpt>
            <time>2000-01-01T16:01:00Z</time>
         </trkpt>
      </trkseg>
   </trk>
</gpx>

So if your XSLT processor for instance is Saxon 9 which also supports XQuery you could consider doing it with XQuery instead of XSLT.

If you want to use it with XSLT then here is an XSLT 3.0 stylesheet that can be run with Saxon 9.8 (any edition) or Altova (2017 release) that tries to use xsl:iterate to emulate the XQuery approach of a tumbling window computation shown above:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    xmlns:math="http://www.w3.org/2005/xpath-functions/math"
    xmlns:mf="http://example.com/mf"
    exclude-result-prefixes="xs math mf"
    expand-text="yes"
    version="3.0">

    <xsl:variable name="stop" as="xs:dayTimeDuration" select="xs:dayTimeDuration('PT5M')"/>

    <xsl:output indent="yes"/>

    <xsl:function name="mf:wrap" as="element(trk)">
        <xsl:param name="dateTimes" as="xs:dateTime*"/>
        <trk>
            <trkseq>
                <xsl:for-each select="$dateTimes">
                    <trkpt>
                        <time>
                            <xsl:value-of select="."/>
                        </time>
                    </trkpt>
                </xsl:for-each>
            </trkseq>
        </trk>
    </xsl:function>

    <xsl:template match="gpx">
        <xsl:copy>
            <xsl:variable name="dateTimes" as="xs:dateTime*" select=".//time/xs:dateTime(.)"/>
            <xsl:iterate select="$dateTimes">
                <xsl:param name="w" as="xs:dateTime*" select="()"/>
                <xsl:param name="e" as="xs:dateTime?" select="()"/>
                <xsl:on-completion>
                    <xsl:sequence select="mf:wrap($w)"/>
                </xsl:on-completion>
                <xsl:variable name="n" as="xs:dateTime" select="."/>
                <xsl:choose>
                    <xsl:when test="exists($e) and exists($n) and $n - $e gt $stop">
                        <xsl:sequence select="mf:wrap($w)"/>
                        <xsl:next-iteration>
                            <xsl:with-param name="w" select="$n"/>
                            <xsl:with-param name="e" select="$n"/>
                        </xsl:next-iteration>
                    </xsl:when>
                    <xsl:otherwise>
                        <xsl:next-iteration>
                            <xsl:with-param name="w" select="$w, $n"/>
                            <xsl:with-param name="e" select="$n"/>
                        </xsl:next-iteration>
                    </xsl:otherwise>
                </xsl:choose>            
            </xsl:iterate>
        </xsl:copy>
    </xsl:template>

</xsl:stylesheet>

If you need to do it in XSLT 2.0 one way is to write a function that recursively processes the dateTime sequence item by item and returns a group respectively trk element once it finds a dateTime having a greater difference than the allowed limit (so basically implements the XQuery check end $e next $n when $n - $e gt $stopDuration):

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:mf="http://example.com/mf"
    exclude-result-prefixes="xs mf" version="2.0">

    <xsl:param name="stop" as="xs:dayTimeDuration" select="xs:dayTimeDuration('PT5M')"/>

    <xsl:output indent="yes"/>

    <xsl:function name="mf:group" as="element(trk)*">
        <xsl:param name="dateTimes" as="xs:dateTime*"/>
        <xsl:param name="stop" as="xs:dayTimeDuration"/>
        <xsl:sequence select="mf:group($dateTimes[1], $dateTimes[position() gt 1], $stop)"/>
    </xsl:function>

    <xsl:function name="mf:group" as="element(trk)*">
        <xsl:param name="group" as="xs:dateTime*"/>
        <xsl:param name="dateTimes" as="xs:dateTime*"/>
        <xsl:param name="stop" as="xs:dayTimeDuration"/>
        <xsl:variable name="next" as="xs:dateTime?" select="$dateTimes[1]"/>
        <xsl:variable name="end" as="xs:dateTime" select="$group[last()]"/>
        <xsl:choose>
            <xsl:when test="not(exists($next))">
                <xsl:sequence select="mf:wrap($group)"/>
            </xsl:when>
            <xsl:when test="$next - $end gt $stop">
                <xsl:sequence select="mf:wrap($group)"/>
                <xsl:sequence select="mf:group($next, $dateTimes[position() gt 1], $stop)"/>
            </xsl:when>
            <xsl:otherwise>
                <xsl:sequence select="mf:group(($group, $next), $dateTimes[position() gt 1], $stop)"
                />
            </xsl:otherwise>
        </xsl:choose>
    </xsl:function>

    <xsl:function name="mf:wrap" as="element(trk)">
        <xsl:param name="dateTimes" as="xs:dateTime*"/>
        <trk>
            <trkseq>
                <xsl:for-each select="$dateTimes">
                    <trkpt>
                        <time>
                            <xsl:value-of select="."/>
                        </time>
                    </trkpt>
                </xsl:for-each>
            </trkseq>
        </trk>
    </xsl:function>

    <xsl:template match="gpx">
        <xsl:copy>
            <xsl:sequence select="mf:group(.//time/xs:dateTime(.), $stop)"/>
        </xsl:copy>
    </xsl:template>

</xsl:stylesheet>
查看更多
登录 后发表回答