Fragment input xhtml to seperate xhtml files with

2019-07-30 00:35发布

Can someone help me on this. Really appreciated.

Requirement:

  1. To generate separate html files for each pal:fragment element, for the source document attached ,the files gets name based on @fragment-id which is present on the pal:fragment elements.
  2. The 'pal:fragment' can be nested, wrapped with 'div' element.
  3. The fragmented file not only should include all child elements of pal:fragment, but also should include its ancestor 'div' element. Each separate file generated from 'pal:fragment' should not include the content of its descendant 'pal:fragment content.

See below examples for requirement understanding

example1: if a fragment file is generated for 'pal:fragment' fragment-id="DLM3989039", the content should look like this (removed html markup for easy understanding)

<div class="bill" id="DLM3988900">
    <div class="schedule-group">
        <div class="schedule" id="DLM3989039">
            <p>DLM3989039 dummy text </p>
            <div class="schedule-provisions"></div>
        </div>
    </div>
</div>

example 2: if a fragment file is generated for 'pal:fragment' fragment-id="DLM3989044", the content should look like this. (removed html markup for easy understanding)

<div class="bill" id="DLM3988900">
    <div class="schedule-group">
        <div class="schedule" id="DLM3989039">
            <div class="schedule-provisions">
                <div class="part" id="DLM3989044">
                    <p>DLM3989044 dummy content</p>
                </div>
            </div>
        </div>
    </div>
</div>      

XML document I used

<?xml version="1.0" encoding="UTF-8"?>
<div class="bill" id="DLM3988900" xmlns:pal="http://www.google.nz/rendition-info"
    xmlns="http://www.w3.org/1999/xhtml">
    <div class="billdetail">
        <pal:fragment fragment-id="DLM3988901" fragment-type="explnote">
            <div class="explnote" id="DLM3988901">
                <p>DLM3988901 dummy text</p>
            </div>
        </pal:fragment>
    </div>
    <pal:fragment fragment-id="DLM3988906" fragment-type="contents">
        <div class="cover" id="DLM3988906">
            <p>DLM3988906 dummy text</p>
        </div>
        <div class="body" id="DLM3988910">
            <pal:fragment fragment-id="DLM3988963" fragment-type="part">
                <div class="part" id="DLM3988963">
                    <p>DLM3988963 dummy text</p>
                    <pal:fragment fragment-id="DLM3988965" fragment-type="prov">
                        <div class="prov" id="DLM3988965">
                            <p>DLM3988965 dummy text</p>
                        </div>
                    </pal:fragment>
                </div>
            </pal:fragment>
            <pal:fragment fragment-id="DLM3989003" fragment-type="part">
                <div class="part" id="DLM3989003">
                    <p>DLM3989003 dummy text</p>
                    <pal:fragment fragment-id="DLM3989004" fragment-type="subpart">
                        <div class="subpart" id="DLM3989004">
                            <p>DLM3989004 dummy text</p>
                            <pal:fragment fragment-id="DLM3989005" fragment-type="prov">
                                <div class="prov" id="DLM3989005">
                                    <p>DLM3989005 dummy text</p>
                                </div>
                            </pal:fragment>
                        </div>
                    </pal:fragment>
                </div>
            </pal:fragment>
        </div>
    </pal:fragment>
    <div class="schedule-group">
        <pal:fragment fragment-id="DLM3989039" fragment-type="schedule">
            <div class="schedule" id="DLM3989039">
                <p>DLM3989039 dummy text </p>
                <div class="schedule-provisions">
                    <pal:fragment fragment-id="DLM3989044" fragment-type="part">
                        <div class="part" id="DLM3989044">
                            <p>DLM3989044 dummy content</p>
                            <pal:fragment fragment-id="DLM3989057" fragment-type="subpart">
                                <div class="subpart" id="DLM3989057">
                                    <p>DLM3989057 dummy content</p>
                                    <pal:fragment fragment-id="DLM3989059" fragment-type="prov">
                                        <div class="prov" id="DLM3989059">
                                            <p> DLM3989059 dummy coent</p>
                                        </div>
                                    </pal:fragment>
                                </div>
                            </pal:fragment>
                        </div>
                    </pal:fragment>
                </div>
            </div>
        </pal:fragment>
    </div>
</div>

=============

XSLT stylesheet I created:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    xmlns:foo="http://www.google.nz/rendition-info"
    xmlns:html="http://www.w3.org/1999/xhtml" exclude-result-prefixes="xs foo html" version="2.0">

    <xsl:strip-space elements="*"/>

    <xsl:param name="target_directory" select="''" as="xs:string"/>
    <xsl:param name="input_doc_loc"
        select="xs:anyURI(substring-before(document-uri(/),$input_file_name))" as="xs:anyURI"/>
    <xsl:param name="output_dir" as="xs:anyURI">
        <xsl:choose>
            <xsl:when test="$target_directory = ''">
                <xsl:value-of select="$input_doc_loc"/>
            </xsl:when>
            <xsl:otherwise>
                <xsl:value-of select="concat($target_directory, '/')"/>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:param>

    <xsl:param name="input_file_name" as="xs:anyURI">
        <xsl:variable name="filename">
            <xsl:call-template name="get_file_name">
                <xsl:with-param name="file_name" select="document-uri(/)" as="xs:anyURI"/>
            </xsl:call-template>
        </xsl:variable>
        <xsl:value-of select="$filename"/>
    </xsl:param>

    <xsl:template name="get_file_name">
        <xsl:param name="file_name" as="xs:anyURI"/>
        <xsl:choose>
            <xsl:when test="contains($file_name, '/')">
                <xsl:call-template name="get_file_name">
                    <xsl:with-param name="file_name"
                        select="xs:anyURI(substring-after($file_name, '/'))"/>
                </xsl:call-template>
            </xsl:when>
            <xsl:otherwise>
                <xsl:value-of select="$file_name"/>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>

    <xsl:param name="frag_file_ext" select="'.html'" as="xs:string"/>

    <xsl:output media-type="text/html" method="html"/>

    <xsl:template name="generate_output_doc">
        <xsl:variable name="fragment_name" as="xs:anyURI"
            select="xs:anyURI(concat(@fragment-id,$frag_file_ext))"/>
        <xsl:variable name="fragment_file" as="xs:anyURI"
            select="xs:anyURI(concat($output_dir,$fragment_name))"/>

        <xsl:variable name="child-content" select="node()"/>

        <xsl:variable name="ancestor-divs" as="node()*">
            <xsl:apply-templates select="parent::html:div[1]" mode="div-content"/>
            <xsl:apply-templates select="node()"/>
        </xsl:variable>
        <xsl:variable name="reverse-ancestor-divs" as="node()*">
            <xsl:sequence select="reverse($ancestor-divs)"/>
        </xsl:variable>


        <xsl:result-document href="{$fragment_file}">
            <html xmlns="http://www.w3.org/1999/xhtml">
                <head>
                    <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
                </head>
                <body>
                    <xsl:sequence select="$ancestor-divs"/>
                </body>
            </html>
        </xsl:result-document>
    </xsl:template>

    <xsl:template match="foo:fragment">
        <xsl:call-template name="generate_output_doc"/>
    </xsl:template>

    <xsl:template match="html:div">
        <xsl:copy>
            <xsl:apply-templates select="@*|node()"/>
        </xsl:copy>
    </xsl:template>

    <xsl:template match="html:div" mode="div-content">
        <xsl:copy>
            <xsl:apply-templates select="@*"/>
            <xsl:apply-templates select="ancestor::html:div[1]" mode="div-content"/>
        </xsl:copy>
    </xsl:template>

    <!-- The standard identity template -->
    <xsl:template match="*| @*">
        <xsl:copy>
            <xsl:apply-templates select="@*|node()"/>
        </xsl:copy>
    </xsl:template>

</xsl:stylesheet>

============

Though using the sample file attached/mentioned above, number of separate html files generated are “11”, which(the number of files) is as expected. There are Problems with my XSLT stylesheet. The generated output looks like this(mentioend below ):

  1. ancestor nodes for a 'foo:fragment' are not nested properly and does not include the fragment-content as it child/descendant, instead writing as sibling.

Took generated html separate file produced by "DLM3989039" 'pal:fragment' element

<html xmlns="http://www.w3.org/1999/xhtml">
   <head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8"></meta>
   </head>
   <body>
      <div class="schedule-group">
         <div class="bill" id="DLM3988900"></div>
      </div>
      <div xmlns:foo="http://www.google.nz/rendition-info" class="schedule" id="DLM3989039">
         <p>DLM3989039 dummy text </p>
         <div class="schedule-provisions"></div>
      </div>
   </body>
</html>

Thanks and Regards, Suresh.

1条回答
别忘想泡老子
2楼-- · 2019-07-30 00:52

I am not sure I completely understand your requirement but here is my implementation of what I believe you want to achieve:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    xmlns:pal="http://www.google.nz/rendition-info"
    xmlns:html="http://www.w3.org/1999/xhtml" exclude-result-prefixes="xs pal html" version="2.0"
    xmlns="http://www.w3.org/1999/xhtml">

<xsl:param name="frag_file_ext" select="'.html'" as="xs:string"/>

<xsl:output method="xhtml" indent="yes" media-type="text/html" omit-xml-declaration="yes"/>
<xsl:strip-space elements="*"/>

<xsl:template match="/">
  <xsl:apply-templates select="//pal:fragment" mode="doc"/>
</xsl:template>

<xsl:template match="pal:fragment" mode="doc">
  <xsl:result-document href="{@fragment-id}{$frag_file_ext}">
      <html>
          <head>
          </head>
          <body>
              <xsl:apply-templates select="ancestor::html:div[last()]">
                <xsl:with-param name="doc-frag" as="element(pal:fragment)" select="current()" tunnel="yes"/>
              </xsl:apply-templates>
          </body>
      </html>
  </xsl:result-document>
</xsl:template>

<xsl:template match="@* | node()">
  <xsl:copy>
    <xsl:apply-templates select="@* , node()"/>
  </xsl:copy>
</xsl:template>

<xsl:template match="html:*">
  <xsl:param name="doc-frag" as="element(pal:fragment)" tunnel="yes"/>
  <xsl:variable name="doc-anc" as="element()*" select="$doc-frag/ancestor-or-self::*"/>
  <xsl:choose>
    <xsl:when test="$doc-anc[. is current()]">
      <xsl:element name="{name()}">
        <xsl:apply-templates select="@* , *[descendant-or-self::*[. is $doc-frag]]"/>
      </xsl:element>
    </xsl:when>
    <xsl:when test="ancestor::pal:fragment[. is $doc-frag]">
      <xsl:element name="{name()}">
        <xsl:apply-templates select="@* , node()"/>
      </xsl:element>
    </xsl:when>
  </xsl:choose>
</xsl:template>

<xsl:template match="pal:fragment">
  <xsl:param name="doc-frag" as="element(pal:fragment)" tunnel="yes"/>
  <xsl:if test="$doc-frag/ancestor-or-self::pal:fragment[. is current()]">
    <xsl:apply-templates/>
  </xsl:if>
</xsl:template>

</xsl:stylesheet>

When I apply that stylesheet with Saxon 9.4 HE to the input

<?xml version="1.0" encoding="UTF-8"?>
<div class="bill" id="DLM3988900" xmlns:pal="http://www.google.nz/rendition-info"
    xmlns="http://www.w3.org/1999/xhtml">
    <div class="billdetail">
        <pal:fragment fragment-id="DLM3988901" fragment-type="explnote">
            <div class="explnote" id="DLM3988901">
                <p>DLM3988901 dummy text</p>
            </div>
        </pal:fragment>
    </div>
    <pal:fragment fragment-id="DLM3988906" fragment-type="contents">
        <div class="cover" id="DLM3988906">
            <p>DLM3988906 dummy text</p>
        </div>
        <div class="body" id="DLM3988910">
            <pal:fragment fragment-id="DLM3988963" fragment-type="part">
                <div class="part" id="DLM3988963">
                    <p>DLM3988963 dummy text</p>
                    <pal:fragment fragment-id="DLM3988965" fragment-type="prov">
                        <div class="prov" id="DLM3988965">
                            <p>DLM3988965 dummy text</p>
                        </div>
                    </pal:fragment>
                </div>
            </pal:fragment>
            <pal:fragment fragment-id="DLM3989003" fragment-type="part">
                <div class="part" id="DLM3989003">
                    <p>DLM3989003 dummy text</p>
                    <pal:fragment fragment-id="DLM3989004" fragment-type="subpart">
                        <div class="subpart" id="DLM3989004">
                            <p>DLM3989004 dummy text</p>
                            <pal:fragment fragment-id="DLM3989005" fragment-type="prov">
                                <div class="prov" id="DLM3989005">
                                    <p>DLM3989005 dummy text</p>
                                </div>
                            </pal:fragment>
                        </div>
                    </pal:fragment>
                </div>
            </pal:fragment>
        </div>
    </pal:fragment>
    <div class="schedule-group">
        <pal:fragment fragment-id="DLM3989039" fragment-type="schedule">
            <div class="schedule" id="DLM3989039">
                <p>DLM3989039 dummy text </p>
                <div class="schedule-provisions">
                    <pal:fragment fragment-id="DLM3989044" fragment-type="part">
                        <div class="part" id="DLM3989044">
                            <p>DLM3989044 dummy content</p>
                            <pal:fragment fragment-id="DLM3989057" fragment-type="subpart">
                                <div class="subpart" id="DLM3989057">
                                    <p>DLM3989057 dummy content</p>
                                    <pal:fragment fragment-id="DLM3989059" fragment-type="prov">
                                        <div class="prov" id="DLM3989059">
                                            <p> DLM3989059 dummy coent</p>
                                        </div>
                                    </pal:fragment>
                                </div>
                            </pal:fragment>
                        </div>
                    </pal:fragment>
                </div>
            </div>
        </pal:fragment>
    </div>
</div>

I get 11 DMLxxx.html result files

Mode                LastWriteTime     Length Name
----                -------------     ------ ----
-a---        24.10.2012     11:38        387 DLM3988901.html
-a---        24.10.2012     11:38        375 DLM3988906.html
-a---        24.10.2012     11:38        393 DLM3988963.html
-a---        24.10.2012     11:38        468 DLM3988965.html
-a---        24.10.2012     11:38        393 DLM3989003.html
-a---        24.10.2012     11:38        471 DLM3989004.html
-a---        24.10.2012     11:38        552 DLM3989005.html
-a---        24.10.2012     11:38        447 DLM3989039.html
-a---        24.10.2012     11:38        549 DLM3989044.html
-a---        24.10.2012     11:38        639 DLM3989057.html
-a---        24.10.2012     11:38        731 DLM3989059.html

where for instance DLM3988901.html is

<html xmlns="http://www.w3.org/1999/xhtml">
   <head>
      <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
   </head>
   <body>
      <div class="bill" id="DLM3988900">
         <div class="billdetail">
            <div class="explnote" id="DLM3988901">
               <p>DLM3988901 dummy text</p>
            </div>
         </div>
      </div>
   </body>
</html>

and DLM3989044.html is

<html xmlns="http://www.w3.org/1999/xhtml">
   <head>
      <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
   </head>
   <body>
      <div class="bill" id="DLM3988900">
         <div class="schedule-group">
            <div class="schedule" id="DLM3989039">
               <div class="schedule-provisions">
                  <div class="part" id="DLM3989044">
                     <p>DLM3989044 dummy content</p>
                  </div>
               </div>
            </div>
         </div>
      </div>
   </body>
</html>
查看更多
登录 后发表回答