Example

Input data

$ nzsql -d mapreduce_db -c "select * from wordcount_input"
ID | TEXT
----+-----------------------------------
1 | Hello World Bye World
2 | Hello mapreduce Goodbye mapreduce
3 | Hello INZA
(3 rows)

Configuration (wordcount.xml)

<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<property><name>mapreduce.job.run.dir</name><value>/nz/export/ae/workspac
e/mapreduce/myjob</value></property>
<property><name>mapreduce.job.jar</name><value>/nz/export/ae/products/net
ezza/mapreduce/current/mapreduce-examples.jar</value></property>
<property><name>mapreduce.job.map.class</name><value>org.netezza.inza.mr.
examples.WordCount$Map</value></property>
<property><name>mapreduce.map.input.key.class</name><value>org.netezza.in
za.mr.io.LongWritable</value></property>
<property><name>mapreduce.map.input.value.class</name><value>org.netezza.
inza.mr.io.Text</value></property>
<property><name>mapreduce.map.output.key.class</name><value>org.netezza.i
nza.mr.io.Text</value></property>
<property><name>mapreduce.map.output.key.columns.sizes</name><value>100</
value></property>
<property><name>mapreduce.map.output.value.class</name><value>org.netezza
.inza.mr.io.IntWritable</value></property>
<property><name>mapreduce.map.output.value.columns.sizes</name><value>-
1</value></property>
<property><name>mapreduce.job.reduce.class</name><value>org.netezza.inza.
mr.examples.WordCount$Reduce</value></property>
<property><name>mapreduce.reduce.output.key.class</name><value>org.netezz
a.inza.mr.io.Text</value></property>
<property><name>mapreduce.reduce.output.key.columns.sizes</name><value>10
0</value></property>
<property><name>mapreduce.reduce.output.value.class</name><value>org.nete
zza.inza.mr.io.IntWritable</value></property>
<property><name>mapreduce.reduce.output.value.columns.sizes</name><value>
-1</value></property>
</configuration>

SQL query (wordcount.sql)

CREATE TABLE wordcount_output AS
WITH
MAPOUT_RAW AS (
SELECT DATASLICEID, M.K0 AS K0, M.V0 AS V0
FROM wordcount_input AS I, TABLE WITH FINAL (INZA..MAPAE(I.id,
I.text,
'NZAE_PARAMETER2="/nz/export/ae/workspace/mapreduce/myjob/wordcount.xml"'
)) AS M
)
,
INTERMEDIATEOUT_REDISTRIBUTED_GLOBALLY AS (
SELECT RIN.K0 AS K0, RIN.V0 AS V0,
RANK() OVER (PARTITION BY RIN.K0 ORDER BY -1) AS RK
FROM MAPOUT_RAW AS RIN
)
,
REDUCEOUT_RAW AS (
SELECT
CASE WHEN IRG.RK IS NULL OR IRG.RK IS NOT NULL THEN R.K0
END AS K0
,
R.V0 AS V0
FROM INTERMEDIATEOUT_REDISTRIBUTED_GLOBALLY AS IRG,
TABLE WITH FINAL(INZA..REDUCEAE(IRG.K0, IRG.V0,
'NZAE_PARAMETER2="/nz/export/ae/workspace/mapreduce/myjob/wordcount.xml"'
)) AS R
)
SELECT JO.K0 AS word, JO.V0 AS count
FROM REDUCEOUT_RAW AS JO

Output

$ nzsql -d mapreduce_db -c "select * from wordcount_output"
WORD | COUNT
-----------+-------
Hello | 3
INZA | 1
World | 2
mapreduce | 2
Bye | 1
Goodbye | 1
(6 rows)