I have the following XML:
<?xml version="1.0" encoding="utf-8" ?>
<DOCUMENT>
<SECTION>
<PARAGRAPH TRACK="4">
<SENTENCE NAME="PRIMARY" COUNT="4">
<TOKEN BEGIN="9" END="11" SENTENCE_BEGIN="0" SENTENCE_END="156" />
<TOKEN BEGIN="32" END="37" SENTENCE_BEGIN="0" SENTENCE_END="156" />
<TOKEN BEGIN="167" END="169" SENTENCE_BEGIN="158" SENTENCE_END="316" />
<TOKEN BEGIN="210" END="215" SENTENCE_BEGIN="158" SENTENCE_END="316" />
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="2">
<TOKEN BEGIN="139" END="141" SENTENCE_BEGIN="0" SENTENCE_END="156" PROP="A" DOUBLE="YES" />
<TOKEN BEGIN="143" END="145" SENTENCE_BEGIN="0" SENTENCE_END="156" PROP="B" />
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="17" END="19" SENTENCE_BEGIN="0" SENTENCE_END="156" PROP="C" DOUBLE="YES" />
</SENTENCE>
</PARAGRAPH>
</SECTION>
</DOCUMENT>
And I need to obtain the following result, with the node TOKEN selected by the IF statement, updated with the attribute NEW:
<?xml version="1.0" encoding="utf-8" ?>
<DOCUMENT>
<SECTION>
<PARAGRAPH TRACK="4">
<SENTENCE NAME="PRIMARY" COUNT="4">
<TOKEN BEGIN="9" END="11" SENTENCE_BEGIN="0" SENTENCE_END="156" />
<TOKEN BEGIN="32" END="37" SENTENCE_BEGIN="0" SENTENCE_END="156" />
<TOKEN BEGIN="167" END="169" SENTENCE_BEGIN="158" SENTENCE_END="316" />
<TOKEN BEGIN="210" END="215" SENTENCE_BEGIN="158" SENTENCE_END="316" />
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="2">
<TOKEN BEGIN="139" END="141" SENTENCE_BEGIN="0" SENTENCE_END="156" PROP="A" DOUBLE="YES" />
<TOKEN BEGIN="143" END="145" SENTENCE_BEGIN="0" SENTENCE_END="156" PROP="B" />
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="17" END="19" SENTENCE_BEGIN="0" SENTENCE_END="156" PROP="C" DOUBLE="YES" NEW="YES" />
<TOKEN></TOKEN>
</SENTENCE>
</PARAGRAPH>
</SECTION>
</DOCUMENT>
Using the following XSLT transformation I obtain a wrong result, where the node TOKEN selected by the IF statement is copied at the beginning of the tree:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output indent="yes"/>
<xsl:strip-space elements="*"/>
<xsl:template match="#* | node()">
<xsl:copy>
<xsl:apply-templates select="#* | node()"/>
</xsl:copy>
</xsl:template>
<xsl:key name="primary_tokens"
match="SENTENCE[#NAME='PRIMARY']/TOKEN"
use="concat(#SENTENCE_BEGIN,'|',#SENTENCE_END)"/>
<xsl:template match="/*">
<xsl:for-each select=".//TOKEN[#DOUBLE='YES'][key('primary_tokens',concat(#SENTENCE_BEGIN,'|',#SENTENCE_END))]">
<xsl:if test="key('primary_tokens',concat(#SENTENCE_BEGIN,'|',#SENTENCE_END))[#BEGIN > current()/#BEGIN]">
<xsl:copy>
<xsl:attribute name="NEW">YES</xsl:attribute>
<xsl:apply-templates select="#*|node()"/>
</xsl:copy>
</xsl:if>
</xsl:for-each>
<xsl:copy>
<xsl:apply-templates select="#*|node()"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
Unwanted result:
<?xml version="1.0" encoding="utf-16"?>
<TOKEN NEW="YES" BEGIN="17" END="19" SENTENCE_BEGIN="0" SENTENCE_END="156" PROP="C" DOUBLE="YES" />
<DOCUMENT>
<SECTION>
<PARAGRAPH TRACK="4">
<SENTENCE NAME="PRIMARY" COUNT="4">
<TOKEN BEGIN="9" END="11" SENTENCE_BEGIN="0" SENTENCE_END="156" />
<TOKEN BEGIN="32" END="37" SENTENCE_BEGIN="0" SENTENCE_END="156" />
<TOKEN BEGIN="167" END="169" SENTENCE_BEGIN="158" SENTENCE_END="316" />
<TOKEN BEGIN="210" END="215" SENTENCE_BEGIN="158" SENTENCE_END="316" />
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="2">
<TOKEN BEGIN="139" END="141" SENTENCE_BEGIN="0" SENTENCE_END="156" PROP="A" DOUBLE="YES" />
<TOKEN BEGIN="143" END="145" SENTENCE_BEGIN="0" SENTENCE_END="156" PROP="B" />
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="17" END="19" SENTENCE_BEGIN="0" SENTENCE_END="156" PROP="C" DOUBLE="YES" />
</SENTENCE>
</PARAGRAPH>
</SECTION>
</DOCUMENT>
I came up with a possible solution, adding in xsl:template the match="//SECTION/PARAGRAPH/SENTENCE[#NAME='SECONDARY']/TOKEN condition. Unfortunately even this solution is not correct, because the content TOKENnode is simply copied in the SENTENCE node:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output indent="yes"/>
<xsl:strip-space elements="*"/>
<xsl:template match="#* | node()">
<xsl:copy>
<xsl:apply-templates select="#* | node()"/>
</xsl:copy>
</xsl:template>
<xsl:key name="primary_tokens"
match="SENTENCE[#NAME='PRIMARY']/TOKEN"
use="concat(#SENTENCE_BEGIN,'|',#SENTENCE_END)"/>
<xsl:template match="//SECTION/PARAGRAPH/SENTENCE[#NAME='SECONDARY']/TOKEN">
<xsl:for-each select="current()[key('primary_tokens',concat(#SENTENCE_BEGIN,'|',#SENTENCE_END))]">
<xsl:if test="key('primary_tokens',concat(#SENTENCE_BEGIN,'|',#SENTENCE_END))[#BEGIN > current()/#BEGIN]">
<xsl:attribute name="NEW">YES</xsl:attribute>
<xsl:apply-templates select="#*|node()"/>
</xsl:if>
</xsl:for-each>
<xsl:copy>
<xsl:apply-templates select="#*|node()"/>
</xsl:copy>
</xsl:template>
Wrong output with TOKEN in SENTENCE node:
<?xml version="1.0" encoding="utf-16"?>
<DOCUMENT>
<SECTION>
<PARAGRAPH TRACK="4">
<SENTENCE NAME="PRIMARY" COUNT="4">
<TOKEN BEGIN="9" END="11" SENTENCE_BEGIN="0" SENTENCE_END="156" />
<TOKEN BEGIN="32" END="37" SENTENCE_BEGIN="0" SENTENCE_END="156" />
<TOKEN BEGIN="167" END="169" SENTENCE_BEGIN="158" SENTENCE_END="316" />
<TOKEN BEGIN="210" END="215" SENTENCE_BEGIN="158" SENTENCE_END="316" />
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="2">
<TOKEN BEGIN="139" END="141" SENTENCE_BEGIN="0" SENTENCE_END="156" PROP="A" DOUBLE="YES" />
<TOKEN BEGIN="143" END="145" SENTENCE_BEGIN="0" SENTENCE_END="156" PROP="B" />
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1" NEW="YES" BEGIN="17" END="19" SENTENCE_BEGIN="0" SENTENCE_END="156" PROP="C" DOUBLE="YES">
<TOKEN BEGIN="17" END="19" SENTENCE_BEGIN="0" SENTENCE_END="156" PROP="C" DOUBLE="YES" />
<TOKEN />
</SENTENCE>
</PARAGRAPH>
</SECTION>
</DOCUMENT>
My question is: How do I obtain the wanted result reported above? Is my second try any close to a good solution?
I think your second try is close; you do want to match TOKEN if that's what you want to update.
Try this...
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output indent="yes"/>
<xsl:strip-space elements="*"/>
<xsl:key name="primary_tokens"
match="SENTENCE[#NAME='PRIMARY']/TOKEN"
use="concat(#SENTENCE_BEGIN,'|',#SENTENCE_END)"/>
<xsl:template match="#*|node()">
<xsl:copy>
<xsl:apply-templates select="#*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="TOKEN[#DOUBLE='YES']">
<xsl:copy>
<xsl:if test="key('primary_tokens',concat(#SENTENCE_BEGIN,'|',#SENTENCE_END))[#BEGIN > current()/#BEGIN]">
<xsl:attribute name="NEW">YES</xsl:attribute>
</xsl:if>
<xsl:apply-templates select="#*|node()"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
Fiddle: http://xsltfiddle.liberty-development.net/bdxtpX
Related
I'm trying to transform the following XML file, to remove each <AGGREGATION> node if followed by a <MULTIPLE> node.
<?xml version="1.0" encoding="UTF-8"?>
<RECORD TEMPLATE="PRODUCTS" TRACK="1">
<FIELD NAME="PRODUCT" BASE="CT300" COUNT="2">
<AGGREGATION DOMAIN="4" />
<MULTIPLE TYPE="YES" />
<MULTIPLE TYPE="YES" />
<MULTIPLE TYPE="YES" />
<MULTIPLE TYPE="YES" />
<MULTIPLE TYPE="YES" />
<TOKEN TEXT="CT300" BEGIN="11379" END="11384"/>
<AGGREGATION DOMAIN="9" />
<AGGREGATION DOMAIN="4" />
<AGGREGATION DOMAIN="4" />
<MULTIPLE TYPE="YES" />
<MULTIPLE TYPE="YES" />
<MULTIPLE TYPE="YES" />
<MULTIPLE TYPE="YES" />
<MULTIPLE TYPE="YES" />
<TOKEN TEXT="CT300" BEGIN="11379" END="11384"/>
</FIELD>
</RECORD>
With the following xslt transformation, I was able to remove only the first occurrence of the <AGGREGATION> node:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="#*|node()">
<xsl:copy>
<xsl:apply-templates select="#*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="//RECORD[#TEMPLATE='PRODUCTS']/FIELD[#NAME='PRODUCT']/AGGREGATION[following-sibling::*[1][self::MULTIPLE]]">
<xsl:choose>
<xsl:when test="//RECORD[#TEMPLATE='PRODUCTS']/FIELD[#NAME='PRODUCT']/AGGREGATION[following-sibling::*[1][self::MULTIPLE]]">
</xsl:when>
<xsl:otherwise>
<xsl:copy>
<xsl:apply-templates select="#*|node()" />
</xsl:copy>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>
This is the output I receive:
<?xml version="1.0" encoding="UTF-8"?>
<RECORD TEMPLATE="PRODUCTS" TRACK="1">
<FIELD BASE="CT300" COUNT="2" NAME="PRODUCT">
<MULTIPLE TYPE="YES"/>
<MULTIPLE TYPE="YES"/>
<MULTIPLE TYPE="YES"/>
<MULTIPLE TYPE="YES"/>
<MULTIPLE TYPE="YES"/>
<TOKEN BEGIN="11379" END="11384" TEXT="CT300"/>
<AGGREGATION DOMAIN="9"/>
<AGGREGATION DOMAIN="4"/>
<MULTIPLE TYPE="YES"/>
<MULTIPLE TYPE="YES"/>
<MULTIPLE TYPE="YES"/>
<MULTIPLE TYPE="YES"/>
<MULTIPLE TYPE="YES"/>
<TOKEN BEGIN="11379" END="11384" TEXT="CT300"/>
</FIELD>
</RECORD>
While the desired output is the following:
<?xml version="1.0" encoding="UTF-8"?>
<RECORD TEMPLATE="PRODUCTS" TRACK="1">
<FIELD BASE="CT300" COUNT="2" NAME="PRODUCT">
<MULTIPLE TYPE="YES"/>
<MULTIPLE TYPE="YES"/>
<MULTIPLE TYPE="YES"/>
<MULTIPLE TYPE="YES"/>
<MULTIPLE TYPE="YES"/>
<TOKEN BEGIN="11379" END="11384" TEXT="CT300"/>
<MULTIPLE TYPE="YES"/>
<MULTIPLE TYPE="YES"/>
<MULTIPLE TYPE="YES"/>
<MULTIPLE TYPE="YES"/>
<MULTIPLE TYPE="YES"/>
<TOKEN BEGIN="11379" END="11384" TEXT="CT300"/>
</FIELD>
</RECORD>
How can I implement a recursive deletion of the <AGGREGATION> node?
My solution is as follows:
Write a template matching AGGREGATION.
The decision whether it shoud copy anything is as follows:
Take the first following sibling with name != AGGREGATION.
Check whether its name is MULTIPLE.
If yes, do nothing (skip the current AGGREGATION element).
If not, apply templates for the current element.
You need also the identity template.
So the whole script looks like below:
<?xml version="1.0" encoding="UTF-8" ?>
<xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" encoding="UTF-8" indent="yes" />
<xsl:strip-space elements="*"/>
<xsl:template match="AGGREGATION">
<xsl:if test="not(following-sibling::*[name()!='AGGREGATION'][1]
[name()='MULTIPLE'])">
<AGGREGATION>
<xsl:apply-templates select="#*|node()"/>
</AGGREGATION>
</xsl:if>
</xsl:template>
<xsl:template match="#*|node()">
<xsl:copy><xsl:apply-templates select="#*|node()"/></xsl:copy>
</xsl:template>
</xsl:transform>
For a working example see http://xsltransform.net/bEJaofQ/1
Just use this empty template plus the identity template to eradicate the unwanted <AGGREGATION> elements conditionally:
<xsl:template match="AGGREGATION[following-sibling::*[1] = (self::MULTIPLE or self::AGGREGATION)]" />
I think you just need to ignore the following sibling AGGREGATION elements...
<xsl:template match="RECORD[#TEMPLATE='PRODUCTS']/FIELD[#NAME='PRODUCT']/AGGREGATION[following-sibling::*[not(self::AGGREGATION)][1][self::MULTIPLE]]"/>
I Have an XML file like the one below. What I need to do is to add a Flag to each <TOKEN> node with a <MULTIPLE TYPE='YES'> node when:
there exist more than one <TOKEN> with the same #BEGIN-#END
at least one of the <TOKEN> #TEXT is different from the others with the same#BEGIN-#END
The xml code is the following:
<?xml version="1.0" encoding="UTF-8"?>
<ALL>
<RECORD TEMPLATE="PRODUCTS" DB="0">
<FIELD NAME="PRODUCT" BASE="AST" >
<TOKEN TEXT="AST" BEGIN="0" END="100"/>
</FIELD>
</RECORD>
<RECORD TEMPLATE="PRODUCTS" DB="1" >
<FIELD NAME="PRODUCT" BASE="BUC" >
<TOKEN TEXT="BUC" BEGIN="0" END="100"/>
<TOKEN TEXT="BUC" BEGIN="0" END="100"/>
<TOKEN TEXT="BUC" BEGIN="0" END="100"/>
<TOKEN TEXT="BUC" BEGIN="0" END="100"/>
<TOKEN TEXT="BUC" BEGIN="102" END="133"/>
<TOKEN TEXT="BUC" BEGIN="102" END="133"/>
</FIELD>
</RECORD>
<RECORD TEMPLATE="PRODUCTS" DB="1" >
<FIELD NAME="PRODUCT" BASE="BUC" >
<TOKEN TEXT="ART" BEGIN="300" END="450"/>
<TOKEN TEXT="ART" BEGIN="300" END="450"/>
</FIELD>
</RECORD>
</ALL>
The desired output is the following:
<ALL>
<RECORD DB="0" TEMPLATE="PRODUCTS">
<FIELD BASE="AST" NAME="PRODUCT">
<MULTIPLE TYPE="YES"/>
<TOKEN BEGIN="0" END="100" TEXT="AST"/>
</FIELD>
</RECORD>
<RECORD DB="1" TEMPLATE="PRODUCTS">
<FIELD BASE="BUC" NAME="PRODUCT">
<MULTIPLE TYPE="YES"/>
<TOKEN BEGIN="0" END="100" TEXT="BUC"/>
<MULTIPLE TYPE="YES"/>
<TOKEN BEGIN="0" END="100" TEXT="BUC"/>
<MULTIPLE TYPE="YES"/>
<TOKEN BEGIN="0" END="100" TEXT="BUC"/>
<MULTIPLE TYPE="YES"/>
<TOKEN BEGIN="0" END="100" TEXT="BUC"/>
<TOKEN BEGIN="102" END="133" TEXT="BUC"/>
<TOKEN BEGIN="102" END="133" TEXT="BUC"/>
</FIELD>
</RECORD>
<RECORD DB="1" TEMPLATE="PRODUCTS">
<FIELD BASE="BUC" NAME="PRODUCT">
<TOKEN BEGIN="300" END="450" TEXT="ART"/>
<TOKEN BEGIN="300" END="450" TEXT="ART"/>
</FIELD>
</RECORD>
</ALL>
I tried using the following xslt, and try to match either the #BEGIN-#END and the #TEXT, but it didn't work.
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output encoding="UTF-8" cdata-section-elements="DOCUMENT" method="xml" indent="yes" omit-xml-declaration="no" />
<xsl:key name="token" match="//RECORD[#TEMPLATE='PRODUCTS']/FIELD[#NAME='PRODUCT']/TOKEN" use="concat(#BEGIN, '|', #END)"/>
<xsl:key name="text" match="//RECORD[#TEMPLATE='PRODUCTS']/FIELD[#NAME='PRODUCT']/TOKEN" use="#TEXT"/>
<xsl:template match="#*|node()" name="identity">
<xsl:copy>
<xsl:apply-templates select="#*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="//RECORD[#TEMPLATE='PRODUCTS']/FIELD[#NAME='PRODUCT']/TOKEN [key('token', concat(#BEGIN, '|', #END))[2]] [(key('word', #TEXT)[2])] ">
<xsl:element name="MULTIPLE">
<xsl:attribute name="TYPE">YES</xsl:attribute>
</xsl:element>
<xsl:call-template name="identity" />
</xsl:template>
</xsl:stylesheet>
Removing form the xslt above the [(key('word', #TEXT)[2])] part of code, the <MULTIPLE TYPE='YES'> node is added, but on every <TOKEN> with the same #BEGIN-#END, not checking if exists a #TEXT node different from the others.
Can you check this:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output encoding="UTF-8" method="xml" indent="yes" omit-xml-declaration="no" />
<xsl:template match="#* | node()">
<xsl:copy>
<xsl:apply-templates select="#* | node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="TOKEN">
<xsl:variable name="current" select="concat(#BEGIN,#END)"/>
<xsl:variable name="text" select="#TEXT"/>
<xsl:for-each select="ancestor::RECORD/following-sibling::RECORD/FIELD/TOKEN|ancestor::RECORD/preceding-sibling::RECORD/FIELD/TOKEN">
<xsl:if test="(concat(#BEGIN,#END) = $current) and (#TEXT!=$text)">
<MULTIPLE TYPE="YES"/>
</xsl:if>
</xsl:for-each>
<TOKEN>
<xsl:apply-templates select="#*"/>
<xsl:apply-templates/>
</TOKEN>
</xsl:template>
</xsl:stylesheet>
I think I'm missing something obvious here but here goes. I have the below xml and I need to group the KEY nodes of the matched instances together. This is specified by the match attribute and it can contain more than one item number. There can be any number of ITEM nodes and any number of KEY nodes. Also, there is no limit to the depth of the ITEM nodes. And, the matched instances need not be under the same parent. I'm also limited to XSLT 1.0 and the Microsoft parser.
<?xml version="1.0" encoding="utf-8" ?>
<ITEM number='1'>
<ITEM number='2'>
<ITEM number='3' match='5,11'>
<KEY name='key1' value='x' />
<KEY name='key2' value='y' />
<KEY name='key3' value='z' />
<ITEM number ='4' />
</ITEM>
<ITEM number='5' match='3,11'>
<KEY name='key1' value='x' />
<KEY name='key2' value='y' />
<KEY name='key3' value='z' />
</ITEM>
<ITEM number='6' match='10'>
<KEY name='key1' value='x' />
<KEY name='key2' value='y' />
<KEY name='key4' value='a' />
</ITEM>
<ITEM number='7' />
<ITEM number='8'>
<KEY name='key1' value='x' />
</ITEM>
</ITEM>
<ITEM number='9'>
<ITEM number='10' match='6'>
<KEY name='key1' value='x' />
<KEY name='key3' value='z' />
<KEY name='key5' value='b' />
</ITEM>
</ITEM>
<ITEM number='11' match='3,5'>
<KEY name='key2' value='y' />
<KEY name='key3' value='z' />
</ITEM>
</ITEM>
My expected result would look something like this...
<?xml version="1.0" encoding="utf-8" ?>
<Result>
<Group number="1" />
<Group number="2" />
<Group number="3,5,11">
<KEY name='key1' value='x' />
<KEY name='key2' value='y' />
<KEY name='key3' value='z' />
</Group>
<Group number="4" />
<Group number="6,10">
<KEY name='key1' value='x' />
<KEY name='key2' value='y' />
<KEY name='key3' value='z' />
<KEY name='key4' value='a' />
<KEY name='key5' value='b' />
</Group>
<Group number="7" />
<Group number="8">
<KEY name='key1' value='x' />
</Group>
<Group number="9" />
</Result>
What I actually get is...
<?xml version="1.0" encoding="utf-8"?>
<Result>
<Group number="1" />
<Group number="2" />
<Group number="3,5,11">
<KEY name="key1" value="x" />
<KEY name="key2" value="y" />
<KEY name="key3" value="z" />
</Group>
<Group number="4" />
<Group number="6,10">
<KEY name="key4" value="a" />
<KEY name="key5" value="b" />
</Group>
<Group number="7" />
<Group number="8" />
<Group number="9" />
</Result>
I'm using a key and it looks like once I access that particular value from the key function, I cannot access it again. Group number 6,10 should contain all 5 keys but is missing the first 3 which are already present in group number 3,5. Similarly for group number 8, it should contain 1 key. I've used recursion to skip over the matched instances but I don't think there is any issue over there, it seems to be related to the key functionality. I've attached my xslt below, please take a look and tell me what I'm doing wrong. Any tips for performance improvements are also appreciated :)
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:msxsl="urn:schemas-microsoft-com:xslt" exclude-result-prefixes="msxsl">
<xsl:output method="xml" indent="yes"/>
<xsl:key name="kKeyByName" match="KEY" use="#name" />
<xsl:template name="ProcessItem">
<!--pItemsList - node set containing items that need to be processed-->
<xsl:param name="pItemsList" />
<!--pProcessedList - string containing processed item numbers in the format |1|2|3|-->
<xsl:param name="pProcessedList" />
<xsl:variable name="vCurrItem" select="$pItemsList[1]" />
<!--Recursion exit condition - check if we have a valid Item-->
<xsl:if test="$vCurrItem">
<xsl:variable name="vNum" select="$vCurrItem/#number" />
<!--Skip processed instances-->
<xsl:if test="not(contains($pProcessedList, concat('|', $vNum, '|')))">
<xsl:element name="Group">
<!--If the item is matched with another item, only the distinct keys of the 2 should be displayed-->
<xsl:choose>
<xsl:when test="$vCurrItem/#match">
<xsl:attribute name="number">
<xsl:value-of select="concat($vNum, ',', $vCurrItem/#match)" />
</xsl:attribute>
<xsl:for-each select="(//ITEM[#number=$vNum or #match=$vNum]/KEY)[generate-id(.)=generate-id(key('kKeyByName', #name)[1])]">
<xsl:apply-templates select="." />
</xsl:for-each>
</xsl:when>
<xsl:otherwise>
<xsl:attribute name="number">
<xsl:value-of select="$vNum" />
</xsl:attribute>
<xsl:apply-templates select="KEY" />
</xsl:otherwise>
</xsl:choose>
</xsl:element>
</xsl:if>
<!--Append processed instances to list to pass on in recursive function-->
<xsl:variable name="vNewList">
<xsl:value-of select="$pProcessedList" />
<xsl:value-of select="concat($vNum, '|')" />
<xsl:if test="$vCurrItem/#match">
<xsl:value-of select="concat($vCurrItem/#match, '|')" />
</xsl:if>
</xsl:variable>
<!--Call template recursively to process the rest of the instances-->
<xsl:call-template name="ProcessItem">
<xsl:with-param name="pItemsList" select="$pItemsList[position() > 1]" />
<xsl:with-param name="pProcessedList" select="$vNewList" />
</xsl:call-template>
</xsl:if>
</xsl:template>
<xsl:template match="KEY">
<xsl:copy>
<xsl:copy-of select="#*|node()" />
</xsl:copy>
</xsl:template>
<xsl:template match="/">
<xsl:element name="Result">
<xsl:call-template name="ProcessItem">
<xsl:with-param name="pItemsList" select="//ITEM" />
<xsl:with-param name="pProcessedList" select="'|'" />
</xsl:call-template>
</xsl:element>
</xsl:template>
</xsl:stylesheet>
IF there is only one match or none to each item you can give the following xslt a try:
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" >
<xsl:output method="xml" indent="yes" />
<xsl:strip-space elements="*"/>
<xsl:key name="kItemNr" match="ITEM" use="#number" />
<xsl:key name="kNumberKey" match="KEY" use="concat(../#number, '|', #name )" />
<xsl:template match="#*|node()">
<xsl:copy>
<xsl:apply-templates select="#*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="ITEM">
<xsl:if test="not(preceding::ITEM[#number = current()/#match])" >
<Group>
<xsl:attribute name="number">
<xsl:value-of select="#number"/>
<xsl:if test="#match" >
<xsl:text>,</xsl:text>
<xsl:value-of select="#match"/>
</xsl:if>
</xsl:attribute>
<xsl:variable name="itemNr" select="#number"/>
<xsl:apply-templates select="KEY | key('kItemNr',#match )/KEY[
not (key('kNumberKey', concat($itemNr, '|', #name) ) )] ">
<xsl:sort select="#name"/>
</xsl:apply-templates>
</Group>
</xsl:if>
</xsl:template>
<xsl:template match="/" >
<Result>
<xsl:for-each select="//ITEM[count(. | key('kItemNr',number ) ) = 1 ]" >
<xsl:apply-templates select="." />
</xsl:for-each>
</Result>
</xsl:template>
</xsl:stylesheet>
Which will generate the following output:
<?xml version="1.0"?>
<Result>
<Group number="1"/>
<Group number="2"/>
<Group number="3,5">
<KEY name="key1" value="x"/>
<KEY name="key2" value="y"/>
<KEY name="key3" value="z"/>
</Group>
<Group number="4"/>
<Group number="6,10">
<KEY name="key1" value="x"/>
<KEY name="key2" value="y"/>
<KEY name="key3" value="z"/>
<KEY name="key4" value="a"/>
<KEY name="key5" value="b"/>
</Group>
<Group number="7"/>
<Group number="8">
<KEY name="key1" value="x"/>
</Group>
<Group number="9"/>
</Result>
Update because of changed request:
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" >
<xsl:output method="xml" indent="yes"/>
<xsl:key name="kItemNr" match="ITEM" use="#number" />
<xsl:template match="#*|node()">
<xsl:copy >
<xsl:apply-templates select="#*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="ITEM">
<xsl:variable name="matchStr" select=" concat(',', current()/#match, ',')"/>
<xsl:if test="not(preceding::ITEM[ contains($matchStr, concat(',', #number, ',') )])" >
<Group>
<xsl:attribute name="number">
<xsl:value-of select="#number"/>
<xsl:if test="#match" >
<xsl:text>,</xsl:text>
<xsl:value-of select="#match"/>
</xsl:if>
</xsl:attribute>
<xsl:apply-templates select="(KEY |
//ITEM[
contains( $matchStr, concat(',', #number, ',') )
]/KEY[
not((preceding::ITEM[
contains( $matchStr, concat(',', #number, ',') )
] | current() )/KEY/#name = #name)
]) ">
<xsl:sort select="#name"/>
</xsl:apply-templates>
</Group>
</xsl:if>
</xsl:template>
<xsl:template match="/" >
<Result>
<xsl:for-each select="//ITEM[count(. | key('kItemNr',number ) ) = 1 ]" >
<xsl:apply-templates select="." />
</xsl:for-each>
</Result>
</xsl:template>
</xsl:stylesheet>
This may be quite slow for bigger input data but any way.
Say I have the following XML:
<root>
<tokens>
<token ID="t1">blah</token>
<token ID="t2">blabla</token>
<token ID="t3">shovel</token>
</tokens>
<relatedStuff>
<group gID="s1">
<references tokID="t1"/>
<references tokID="t2"/>
</group>
<group gID="s2">
<references tokID="t3"/>
</group>
</relatedStuff>
</root>
Now, considering that a for-each loop for every token would be pretty inefficient and a bad idea, how would one go about using template matching, to transform this xml into the following?
<s id="everything_merged">
<tok id="t1" gID="s1" >blah</tok>
<tok id="t2" gID="s1" >blabla</tok>
<tok id="t3" gID="s2" >shovel</tok>
</s>
All I want from <s> is the "gID", the gID corresponding to the token in the <tokens>.
<xsl:for-each select="b:root/a:tokens/a:token">
<!-- and here some template matching -->
<xsl:attribute name="gID">
<xsl:value-of select="--correspondingNode's--#gID"/>
</xsl:attribute>
</xsl:for-each>
I'm pretty fuzzy on this sort of thing, so thank you very much for any help!
The following stylesheet:
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="/">
<s id="everything_merged">
<xsl:apply-templates select="/root/tokens/token" />
</s>
</xsl:template>
<xsl:template match="token">
<tok id="{#ID}" gID="{/root/relatedStuff/group[
references[#tokID=current()/#ID]]/#gID}">
<xsl:apply-templates />
</tok>
</xsl:template>
</xsl:stylesheet>
Applied to this input (corrected for well-formedness):
<root>
<tokens>
<token ID="t1">blah</token>
<token ID="t2">blabla</token>
<token ID="t3">shovel</token>
</tokens>
<relatedStuff>
<group gID="s1">
<references tokID="t1" />
<references tokID="t2" />
</group>
<group gID="s2">
<references tokID="t3" />
</group>
</relatedStuff>
</root>
Produces:
<s id="everything_merged">
<tok id="t1" gID="s1">blah</tok>
<tok id="t2" gID="s1">blabla</tok>
<tok id="t3" gID="s2">shovel</tok>
</s>
A solution using keys and pure "push-style:
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output omit-xml-declaration="yes" indent="yes"/>
<xsl:strip-space elements="*"/>
<xsl:key name="kgIDfromTokId" match="#gID"
use="../*/#tokID"/>
<xsl:template match="tokens">
<s id="everything_merged">
<xsl:apply-templates/>
</s>
</xsl:template>
<xsl:template match="token">
<tok id="{#ID}" gID="{key('kgIDfromTokId', #ID)}">
<xsl:apply-templates/>
</tok>
</xsl:template>
</xsl:stylesheet>
when applied on the provided XML document:
<root>
<tokens>
<token ID="t1">blah</token>
<token ID="t2">blabla</token>
<token ID="t3">shovel</token>
</tokens>
<relatedStuff>
<group gID="s1">
<references tokID="t1" />
<references tokID="t2" />
</group>
<group gID="s2">
<references tokID="t3" />
</group>
</relatedStuff>
</root>
the wanted, correct result is produced:
<s id="everything_merged">
<tok id="t1" gID="s1">blah</tok>
<tok id="t2" gID="s1">blabla</tok>
<tok id="t3" gID="s2">shovel</tok>
</s>
I have looked at Muenchian Grouping - group within a node, not within the entire document but it is not quite working for me. The Muenchian method alone does not do it either for me.
I have also looked at XSLT 1.0: grouping and removing duplicate but cannot follow it completely.
I have the following XML:
<?xml version="1.0" encoding="UTF-8"?>
<MT_MATERIALDATA>
<items item="475053">
<Recordset>
<CodeBusinessUnit>99</CodeBusinessUnit>
<PriceValue>250</PriceValue>
</Recordset>
<Recordset>
<CodeBusinessUnit>1</CodeBusinessUnit>
<PriceValue>250</PriceValue>
</Recordset>
</items>
<items item="475054">
<Recordset>
<CodeBusinessUnit>1</CodeBusinessUnit>
<PriceValue>255.34</PriceValue>
</Recordset>
<Recordset>
<CodeBusinessUnit>10</CodeBusinessUnit>
<PriceValue>299</PriceValue>
</Recordset>
</items>
</MT_MATERIALDATA>
The outcome should look like this:
<?xml version="1.0" encoding="UTF-8"?>
<MT_MATERIALDATA>
<Mi item="475053">
<PriceList>
<Prices>
<Price Value="250"/>
<PriceConfig>
<Stores>99,1</Stores>
</PriceConfig>
</Prices>
</PriceList>
</Mi>
<Mi item="475054">
<PriceList>
<Prices>
<Price Value="255.34"/>
<PriceConfig>
<Stores>1</Stores>
</PriceConfig>
</Prices>
<Prices>
<Price Value="299"/>
<PriceConfig>
<Stores>10</Stores>
</PriceConfig>
</Prices>
</PriceList>
</Mi>
</MT_MATERIALDATA>
So for matching <PriceValue> elements
in <Recordset>, all respective <CodeBusinessUnits> need to be listed in <Stores>.
If not, an extra <Prices> node needs to be created.
I have been trying for hours but either the Store-numbers are always duplicate or they are not aggregated even if the PriceValue is the same.
This transformation:
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output omit-xml-declaration="yes" indent="yes"/>
<xsl:key name="kPriceByValAndItem" match="PriceValue"
use="concat(../../#item, '|', .)"/>
<xsl:template match="/*">
<MT_MATERIALDATA>
<xsl:apply-templates/>
</MT_MATERIALDATA>
</xsl:template>
<xsl:template match="items">
<MI item="{#item}">
<PriceList>
<xsl:for-each select=
"*/PriceValue
[generate-id()
=
generate-id(key('kPriceByValAndItem',
concat(../../#item, '|', .)
)[1]
)
]
">
<Prices>
<Price Value="{.}"/>
<PriceConfig>
<Stores>
<xsl:for-each select=
"key('kPriceByValAndItem',
concat(../../#item, '|', .)
)">
<xsl:value-of select="../CodeBusinessUnit"/>
<xsl:if test="not(position()=last())">,</xsl:if>
</xsl:for-each>
</Stores>
</PriceConfig>
</Prices>
</xsl:for-each>
</PriceList>
</MI>
</xsl:template>
</xsl:stylesheet>
when applied on the provided XML document:
<MT_MATERIALDATA>
<items item="475053">
<Recordset>
<CodeBusinessUnit>99</CodeBusinessUnit>
<PriceValue>250</PriceValue>
</Recordset>
<Recordset>
<CodeBusinessUnit>1</CodeBusinessUnit>
<PriceValue>250</PriceValue>
</Recordset>
</items>
<items item="475054">
<Recordset>
<CodeBusinessUnit>1</CodeBusinessUnit>
<PriceValue>255.34</PriceValue>
</Recordset>
<Recordset>
<CodeBusinessUnit>10</CodeBusinessUnit>
<PriceValue>299</PriceValue>
</Recordset>
</items>
</MT_MATERIALDATA>
produces the wanted, correct result:
<MT_MATERIALDATA>
<MI item="475053">
<PriceList>
<Prices>
<Price Value="250"/>
<PriceConfig>
<Stores>99,1</Stores>
</PriceConfig>
</Prices>
</PriceList>
</MI>
<MI item="475054">
<PriceList>
<Prices>
<Price Value="255.34"/>
<PriceConfig>
<Stores>1</Stores>
</PriceConfig>
</Prices>
<Prices>
<Price Value="299"/>
<PriceConfig>
<Stores>10</Stores>
</PriceConfig>
</Prices>
</PriceList>
</MI>
</MT_MATERIALDATA>
I think the following solves the problem, at least for the grouping:
<xsl:stylesheet
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
version="1.0">
<xsl:output indent="yes"/>
<xsl:strip-space elements="*"/>
<xsl:key name="k1" match="items/Recordset" use="concat(generate-id(..), '|', PriceValue)"/>
<xsl:template match="#* | node()">
<xsl:copy>
<xsl:apply-templates select="#* | node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="items">
<Mi item="{#item}">
<PriceList>
<xsl:apply-templates select="Recordset[generate-id() = generate-id(key('k1', concat(generate-id(..), '|', PriceValue))[1])]"/>
</PriceList>
</Mi>
</xsl:template>
<xsl:template match="Recordset">
<Prices>
<Price Value="{PriceValue}"/>
<PriceConfig>
<Stores>
<xsl:apply-templates select="key('k1', concat(generate-id(..), '|', PriceValue))/CodeBusinessUnit"/>
</Stores>
</PriceConfig>
</Prices>
</xsl:template>
<xsl:template match="CodeBusinessUnit">
<xsl:if test="position() > 1">,</xsl:if>
<xsl:value-of select="."/>
</xsl:template>
</xsl:stylesheet>
I'm also going to post an stylesheet, because everybody do it:
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:key name="kBUnitByItem-Price"
match="CodeBusinessUnit"
use="concat(../../#item, '++', ../PriceValue)"/>
<xsl:template match="/">
<MT_MATERIALDATA>
<xsl:apply-templates/>
</MT_MATERIALDATA>
</xsl:template>
<xsl:template match="items">
<MI item="{#item}">
<PriceList>
<xsl:apply-templates/>
</PriceList>
</MI>
</xsl:template>
<xsl:template match="CodeBusinessUnit[
count(.|key('kBUnitByItem-Price',
concat(../../#item,'++',../PriceValue)
)[1]
) = 1
]">
<Prices>
<Price Value="{../PriceValue}"/>
<PriceConfig>
<Stores>
<xsl:apply-templates
select="key('kBUnitByItem-Price',
concat(../../#item,'++',../PriceValue))"
mode="sequence"/>
</Stores>
</PriceConfig>
</Prices>
</xsl:template>
<xsl:template match="text()"/>
<xsl:template match="node()" mode="sequence">
<xsl:if test="position()!=1">,</xsl:if>
<xsl:value-of select="."/>
</xsl:template>
</xsl:stylesheet>
Note: Grouping stores by item and price. A little more pull than push style (That's because there is no duplicate #item.)
Output:
<MT_MATERIALDATA>
<MI item="475053">
<PriceList>
<Prices>
<Price Value="250" />
<PriceConfig>
<Stores>99,1</Stores>
</PriceConfig>
</Prices>
</PriceList>
</MI>
<MI item="475054">
<PriceList>
<Prices>
<Price Value="255.34" />
<PriceConfig>
<Stores>1</Stores>
</PriceConfig>
</Prices>
<Prices>
<Price Value="299" />
<PriceConfig>
<Stores>10</Stores>
</PriceConfig>
</Prices>
</PriceList>
</MI>
</MT_MATERIALDATA>
I think we cover all the variations: key value, push-pull, sequence separator condition.