以下是对Nvidia CUDA的rootbeer示例代码,我与熊蜂和optirun在笔记本电脑上运行与Ubuntu 12.04(精确)。 这款笔记本电脑采用了NVIDIA的Optimus,因此optirun。 该GPU恰好是了NVIDIA GeForce GT 540M其中Nvidia的网站上说,有96个内核。 我得到几乎没有吞吐量增益。 问题是什么?
package com.random.test;
import java.util.ArrayList;
import java.util.Formatter;
import java.util.List;
import edu.syr.pcpratts.rootbeer.runtime.Kernel;
import edu.syr.pcpratts.rootbeer.runtime.Rootbeer;
public class ArraySumApp {
final static int numberOfJobs = 1024; // 1024 in the original example
final static int sizeOfArray = 512; // 512 in the original example
final static int theAnswer = 130816;
public int[] sumArrays(List<int[]> arrays) {
List<Kernel> jobs = new ArrayList<Kernel>();
int[] ret = new int[arrays.size()];
for (int i = 0; i < arrays.size(); ++i) {
jobs.add(new ArraySum(arrays.get(i), ret, i));
}
Rootbeer rootbeer = new Rootbeer();
rootbeer.runAll(jobs);
return ret;
}
private static long measureOneJob() {
int[] source = new int[ArraySumApp.sizeOfArray];
int[] destination = new int[1];
for (int i = 0; i < ArraySumApp.sizeOfArray; i++)
source[i] = i;
Kernel job = new ArraySum(source, destination, 0);
ElapsedTimer et = new ElapsedTimer();
job.gpuMethod();
long timeInMs = et.stopInMilliseconds();
System.out.println("measureOneJob " + et.stringInMilliseconds());
assert destination[0] == ArraySumApp.theAnswer : "cosmic rays";
return timeInMs;
}
public static void main(String[] args) {
Helper.assertAssertionEnabled();
// measure the time to do one job
ArraySumApp.measureOneJob();
long oneJob = ArraySumApp.measureOneJob();
ArraySumApp app = new ArraySumApp();
List<int[]> arrays = new ArrayList<int[]>();
// you want 1000s of threads to run on the GPU all at once for speedups
for (int i = 0; i < ArraySumApp.numberOfJobs; ++i) {
int[] array = new int[ArraySumApp.sizeOfArray];
for (int j = 0; j < array.length; ++j) {
array[j] = j;
}
arrays.add(array);
}
ElapsedTimer et = new ElapsedTimer();
int[] sums = app.sumArrays(arrays);
long allJobs = et.stopInMilliseconds();
System.out.println("measureAllJobs " + et.stringInMilliseconds());
double gainFactor = ((double) ArraySumApp.numberOfJobs) * oneJob
/ allJobs;
System.out.println(String.format(
"throughput gain factor %.1f\nthroughput gain %.1f\n",
gainFactor, gainFactor - 1.0d));
// check the number of answers is correct
assert sums.length == ArraySumApp.numberOfJobs : "cosmic rays";
// check they all have the answer
for (int i = 0; i < ArraySumApp.numberOfJobs; i++)
assert sums[i] == ArraySumApp.theAnswer : "cosmic rays";
}
}
class ArraySum implements Kernel {
final static int repetitionFactor = 100000;
private int[] source;
private int[] ret;
private int index;
public ArraySum(int[] src, int[] dst, int i) {
source = src;
ret = dst;
index = i;
}
public void gpuMethod() {
for (int repetition = 0; repetition < ArraySum.repetitionFactor; repetition++) {
int sum = 0;
for (int i = 0; i < source.length; ++i) {
sum += source[i];
}
ret[index] = sum;
}
}
}
class Helper {
private Helper() {
}
static void assertAssertionEnabled() {
try {
assert false;
} catch (AssertionError e) {
return;
}
Helper.noteCosmicRays();
}
static void noteCosmicRays() // programmer design or logic error
{
throw new RuntimeException("cosmic rays");
}
}
class ElapsedTimer {
private org.joda.time.DateTime t0;
private long savedStopInMilliseconds;
public ElapsedTimer() {
this.t0 = new org.joda.time.DateTime();
}
public long stopInMilliseconds() {
return stop();
}
public String stringInMilliseconds() // relies on a saved stop
{
Formatter f = new Formatter();
f.format("%d ms", this.savedStopInMilliseconds);
String s = f.toString();
f.close();
return s;
}
public String stopStringInMilliseconds() {
stop();
return stringInMilliseconds();
}
public String stringInSecondsAndMilliseconds() // relies on a saved stop
{
Formatter f = new Formatter();
f.format("%5.3f s", this.savedStopInMilliseconds / 1000.0d);
String s = f.toString();
f.close();
return s;
}
public String stopStringInSecondsAndMilliseconds() {
stop();
return stringInSecondsAndMilliseconds();
}
public long stopInSeconds() {
return (stop() + 500L) / 1000L; // rounding
}
public String stringInSeconds() // relies on a saved stop
{
Formatter f = new Formatter();
long elapsed = (this.savedStopInMilliseconds + 500L) / 1000L; // rounding
f.format("%d s", elapsed);
String s = f.toString();
f.close();
return s;
}
public String stopStringInSeconds() {
stop();
return stringInSeconds();
}
/**
* This is private. Use the stopInMilliseconds method if this is what you
* need.
*/
private long stop() {
org.joda.time.DateTime t1 = new org.joda.time.DateTime();
savedStopInMilliseconds = t1.getMillis() - this.t0.getMillis();
return savedStopInMilliseconds;
}
}
这是输出:
measureOneJob 110 ms
measureOneJob 26 ms
CudaRuntime2 ctor: elapsedTimeMillis: 609
measureAllJobs 24341 ms
throughput gain factor 1.1
throughput gain 0.1