Package org.apfloat.aparapi
Class IntKernel
- java.lang.Object
-
- com.aparapi.Kernel
-
- org.apfloat.aparapi.IntKernel
-
- All Implemented Interfaces:
java.lang.Cloneable
class IntKernel extends com.aparapi.Kernel
Kernel for theint
element type. Contains everything needed for the NTT. The data is organized in columns, not rows, for efficient processing on the GPU.Due to the extreme parallelization requirements (global size should be at lest 1024) this algorithm works efficiently only with 4 million decimal digit calculations or bigger. However with 4 million digits, it's only approximately as fast as the pure-Java version (depending on the GPU and CPU hardware). On the other hand, the algorithm mathematically only works up to about 226 million digits. So the useful range is only somewhere around 10-200 million digits.
Some notes about the aparapi specific requirements for code that must be converted to OpenCL:
assert()
does not work- Can't check for null
- Can't get array length
- Arrays referenced by the kernel can't be null even if they are not accessed
- Arrays referenced by the kernel can't be zero-length even if they are not accessed
- Can't invoke methods in other classes e.g. enclosing class of an inner class
- Early return statements do not work
- Variables used inside loops must be initialized before the loop
- Must compile the class with full debug information i.e. with
-g
- Since:
- 1.8.3
- Version:
- 1.9.0
-
-
Nested Class Summary
-
Nested classes/interfaces inherited from class com.aparapi.Kernel
com.aparapi.Kernel.Constant, com.aparapi.Kernel.Entry, com.aparapi.Kernel.EXECUTION_MODE, com.aparapi.Kernel.KernelState, com.aparapi.Kernel.Local, com.aparapi.Kernel.NoCL, com.aparapi.Kernel.OpenCLDelegate, com.aparapi.Kernel.OpenCLMapping, com.aparapi.Kernel.PrivateMemorySpace
-
-
Field Summary
Fields Modifier and Type Field Description private int
columns
private int[]
data
private int[]
index
private int
indexCount
static int
INVERSE_TRANSFORM_COLUMNS
static int
INVERSE_TRANSFORM_ROWS
private long
inverseModulus
private static java.lang.ThreadLocal<IntKernel>
kernel
private int
length
private int
modulus
static int
MULTIPLY_ELEMENTS
private int
n2
private int
offset
private int
op
private int[]
permutationTable
private int
permutationTableLength
static int
PERMUTE
private int
rows
private int
scaleFactor
private int
startColumn
private int
startRow
private int
stride
static int
TRANSFORM_COLUMNS
static int
TRANSFORM_ROWS
static int
TRANSPOSE
private int
w
private int
w1
private int
w2
private int[]
wTable
private int
ww
-
Constructor Summary
Constructors Modifier Constructor Description private
IntKernel()
-
Method Summary
All Methods Static Methods Instance Methods Concrete Methods Modifier and Type Method Description private void
columnScramble(int offset)
private void
columnTableFNT()
static IntKernel
getInstance()
int
getModulus()
private void
inverseColumnTableFNT()
private int
modAdd(int a, int b)
private int
modMultiply(int a, int b)
private int
modPow(int a, int n)
private int
modSubtract(int a, int b)
private void
multiplyElements()
private void
permute()
void
run()
void
setArrayAccess(ArrayAccess arrayAccess)
void
setColumns(int columns)
void
setIndex(int[] index)
void
setIndexCount(int indexCount)
void
setLength(int length)
void
setModulus(int modulus)
void
setN2(int n2)
void
setOp(int op)
void
setPermutationTable(int[] permutationTable)
void
setRows(int rows)
void
setScaleFactor(int scaleFactor)
void
setStartColumn(int startColumn)
void
setStartRow(int startRow)
void
setW(int w)
void
setW1(int w1)
void
setW2(int w2)
void
setWTable(int[] wTable)
void
setWw(int ww)
private void
transformColumns()
private void
transpose()
-
Methods inherited from class com.aparapi.Kernel
abs, abs, abs, abs, acos, acos, acospi, acospi, addExecutionModes, asin, asin, asinpi, asinpi, atan, atan, atan2, atan2, atan2pi, atan2pi, atanpi, atanpi, atomicAdd, atomicAdd, atomicAnd, atomicCmpXchg, atomicDec, atomicGet, atomicInc, atomicMax, atomicMin, atomicOr, atomicSet, atomicSub, atomicXchg, atomicXor, cancelMultiPass, cbrt, cbrt, ceil, ceil, cleanUpArrays, clone, clz, clz, compile, compile, cos, cos, cosh, cosh, cospi, cospi, createRange, dispose, execute, execute, execute, execute, execute, execute, executeFallbackAlgorithm, exp, exp, exp10, exp10, exp2, exp2, expm1, expm1, floor, floor, fma, fma, get, get, get, get, get, get, get, get, get, get, get, get, get, get, get, get, get, get, get, get, get, getAccumulatedExecutionTime, getAccumulatedExecutionTimeAllThreads, getAccumulatedExecutionTimeCurrentThread, getCancelState, getConversionTime, getCurrentPass, getExecutionMode, getExecutionTime, getGlobalId, getGlobalId, getGlobalSize, getGlobalSize, getGroupId, getGroupId, getKernelCompileWorkGroupSize, getKernelLocalMemSizeInUse, getKernelMaxWorkGroupSize, getKernelMinimumPrivateMemSizeInUsePerWorkItem, getKernelPreferredWorkGroupSizeMultiple, getKernelState, getLocalId, getLocalId, getLocalSize, getLocalSize, getMappedMethodName, getNumGroups, getNumGroups, getPassId, getProfileInfo, getProfileReportCurrentThread, getProfileReportLastThread, getTargetDevice, globalBarrier, hasFallbackAlgorithm, hasNextExecutionMode, hypot, hypot, IEEEremainder, IEEEremainder, invalidateCaches, isAllowDevice, isAutoCleanUpArrays, isExecuting, isExplicit, isMappedMethod, isOpenCLDelegateMethod, isRunningCL, localBarrier, localGlobalBarrier, log, log, log10, log10, log1p, log1p, log2, log2, mad, mad, max, max, max, max, min, min, min, min, nextAfter, nextAfter, popcount, popcount, pow, pow, put, put, put, put, put, put, put, put, put, put, put, put, put, put, put, put, put, put, put, put, put, registerProfileReportObserver, rint, rint, round, round, rsqrt, rsqrt, setAutoCleanUpArrays, setExecutionMode, setExecutionModeWithoutFallback, setExplicit, setFallbackExecutionMode, sin, sin, sinh, sinh, sinpi, sinpi, sqrt, sqrt, tan, tan, tanh, tanh, tanpi, tanpi, toDegrees, toDegrees, toRadians, toRadians, toString, tryNextExecutionMode, usesAtomic32, usesAtomic64
-
-
-
-
Field Detail
-
kernel
private static java.lang.ThreadLocal<IntKernel> kernel
-
TRANSFORM_ROWS
public static final int TRANSFORM_ROWS
- See Also:
- Constant Field Values
-
INVERSE_TRANSFORM_ROWS
public static final int INVERSE_TRANSFORM_ROWS
- See Also:
- Constant Field Values
-
stride
private int stride
-
length
private int length
-
data
private int[] data
-
offset
private int offset
-
wTable
private int[] wTable
-
permutationTable
private int[] permutationTable
-
permutationTableLength
private int permutationTableLength
-
modulus
private int modulus
-
inverseModulus
private long inverseModulus
-
TRANSPOSE
public static final int TRANSPOSE
- See Also:
- Constant Field Values
-
PERMUTE
public static final int PERMUTE
- See Also:
- Constant Field Values
-
n2
private int n2
-
index
private int[] index
-
indexCount
private int indexCount
-
MULTIPLY_ELEMENTS
public static final int MULTIPLY_ELEMENTS
- See Also:
- Constant Field Values
-
startRow
private int startRow
-
startColumn
private int startColumn
-
rows
private int rows
-
columns
private int columns
-
w
private int w
-
scaleFactor
private int scaleFactor
-
TRANSFORM_COLUMNS
public static final int TRANSFORM_COLUMNS
- See Also:
- Constant Field Values
-
INVERSE_TRANSFORM_COLUMNS
public static final int INVERSE_TRANSFORM_COLUMNS
- See Also:
- Constant Field Values
-
op
private int op
-
ww
private int ww
-
w1
private int w1
-
w2
private int w2
-
-
Method Detail
-
getInstance
public static IntKernel getInstance()
-
setLength
public void setLength(int length)
-
setArrayAccess
public void setArrayAccess(ArrayAccess arrayAccess) throws ApfloatRuntimeException
- Throws:
ApfloatRuntimeException
-
setWTable
public void setWTable(int[] wTable)
-
setPermutationTable
public void setPermutationTable(int[] permutationTable)
-
columnTableFNT
private void columnTableFNT()
-
inverseColumnTableFNT
private void inverseColumnTableFNT()
-
columnScramble
private void columnScramble(int offset)
-
modMultiply
private int modMultiply(int a, int b)
-
modAdd
private int modAdd(int a, int b)
-
modSubtract
private int modSubtract(int a, int b)
-
setModulus
public void setModulus(int modulus)
-
getModulus
public int getModulus()
-
setN2
public void setN2(int n2)
-
setIndex
public void setIndex(int[] index)
-
setIndexCount
public void setIndexCount(int indexCount)
-
transpose
private void transpose()
-
permute
private void permute()
-
setStartRow
public void setStartRow(int startRow)
-
setStartColumn
public void setStartColumn(int startColumn)
-
setRows
public void setRows(int rows)
-
setColumns
public void setColumns(int columns)
-
setW
public void setW(int w)
-
setScaleFactor
public void setScaleFactor(int scaleFactor)
-
multiplyElements
private void multiplyElements()
-
modPow
private int modPow(int a, int n)
-
setOp
public void setOp(int op)
-
setWw
public void setWw(int ww)
-
setW1
public void setW1(int w1)
-
setW2
public void setW2(int w2)
-
run
public void run()
- Specified by:
run
in classcom.aparapi.Kernel
-
transformColumns
private void transformColumns()
-
-