Prime

From Łukasz , 6 Years ago, written in C++.

Embed

Download Paste or View Raw
Hits: 260

#include "cuda_runtime.h"

#include "device_launch_parameters.h"

#include <stdio.h>

#include <iostream>

#include <cstdio>

#include <ctime>

using namespace std;

cudaError_t addWithCuda(int *is_prime, unsigned long long int *number, unsigned long long int *sqrtt);

__global__ void addKernel(int *is_prime, unsigned long long int *number, unsigned long long int *sqrtt)

{

unsigned long long int index = blockIdx.x * blockDim.x + threadIdx.x;

unsigned long long int i = index;

while (i < *sqrtt + 1)

{

if (i >1)

{

if (*number%i == 0)

{

*is_prime = 1;

}

}

i += blockDim.x*gridDim.x;

}

cudaThreadSynchronize;

}

int is_prime_number(unsigned long long int *number)

{

for (long long int i = 3; i < sqrt(*number); i += 1)

{

if (*number%i == 0)

{

cout << "(cpu)liczba nie jest pierwszan";

return 0;

}

}

cout << "(cpu) liczba pierwszan";

return 1;

}

int main()

{

unsigned long long int *number =new unsigned long long int;

unsigned long long int *sqrtt = new unsigned long long int;

int *is_prime = new int;

*number = ((unsigned long long int)1) << 61;

*number -= 1;

*sqrtt = sqrt(*number);

*is_prime = 0;

clock_t start1 = clock();

is_prime_number(number);

printf("Czas wykonywania: %lu msn", clock() - start1);

// Add vectors in parallel.

cudaError_t cudaStatus = addWithCuda(is_prime,number,sqrtt);

if (cudaStatus != cudaSuccess) {

fprintf(stderr, "addWithCuda failed!");

system("pause");

return 1;

}

//printf("{1,2,3,4,5} + {10,20,30,40,50} = {%d,%d,%d,%d,%d}n",

//c[0], c[1], c[2], c[3], c[4]);

// cudaDeviceReset must be called before exiting in order for profiling and

// tracing tools such as Nsight and Visual Profiler to show complete traces.

cudaStatus = cudaDeviceReset();

if (cudaStatus != cudaSuccess) {

fprintf(stderr, "cudaDeviceReset failed!");

system("pause");

return 1;

}

system("pause");

return 0;

}

// Helper function for using CUDA to add vectors in parallel.

cudaError_t addWithCuda(int *is_prime, unsigned long long int *number, unsigned long long int *sqrtt)

{

unsigned long long int *dev_a = 0;

unsigned long long int *dev_b = 0;

int *dev_c = 0;

cudaError_t cudaStatus;

// Choose which GPU to run on, change this on a multi-GPU system.

cudaStatus = cudaSetDevice(0);

if (cudaStatus != cudaSuccess) {

fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?");

goto Error;

}

// Allocate GPU buffers for three vectors (two input, one output) .

cudaStatus = cudaMalloc((void**)&dev_c, sizeof(int));

if (cudaStatus != cudaSuccess) {

fprintf(stderr, "cudaMalloc failed!");

goto Error;

}

cudaStatus = cudaMalloc((void**)&dev_a, sizeof(unsigned long long int));

if (cudaStatus != cudaSuccess) {

fprintf(stderr, "cudaMalloc failed!");

goto Error;

}

cudaStatus = cudaMalloc((void**)&dev_b, sizeof(unsigned long long int));

if (cudaStatus != cudaSuccess) {

fprintf(stderr, "cudaMalloc failed!");

goto Error;

}

// Copy input vectors from host memory to GPU buffers.

cudaStatus = cudaMemcpy(dev_a, number, sizeof(unsigned long long int), cudaMemcpyHostToDevice);

if (cudaStatus != cudaSuccess) {

fprintf(stderr, "cudaMemcpy failed!");

goto Error;

}

cudaStatus = cudaMemcpy(dev_b, sqrtt,sizeof(int), cudaMemcpyHostToDevice);

if (cudaStatus != cudaSuccess) {

fprintf(stderr, "cudaMemcpy failed!");

goto Error;

}

clock_t start = clock();

// Launch a kernel on the GPU with one thread for each element.

addKernel<<<12, 512>>>(dev_c, dev_a, dev_b);

// Check for any errors launching the kernel

cudaStatus = cudaGetLastError();

if (cudaStatus != cudaSuccess) {

fprintf(stderr, "addKernel launch failed: %sn", cudaGetErrorString(cudaStatus));

goto Error;

}

// cudaDeviceSynchronize waits for the kernel to finish, and returns

// any errors encountered during the launch.

/* cudaStatus = cudaDeviceSynchronize();

if (cudaStatus != cudaSuccess) {

fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!n", cudaStatus);

goto Error;

}*/

// Copy output vector from GPU buffer to host memory.

cudaStatus = cudaMemcpy(is_prime, dev_c, sizeof(int), cudaMemcpyDeviceToHost);

if (cudaStatus != cudaSuccess) {

fprintf(stderr, "cudaMemcpy failed!");

goto Error;

}

if (*is_prime == 1) {

cout << "(GPU)Liczba nie jest pierwsza n";

}

else {

cout << "(GPU)liczba pierwsza n";

}

printf("Czas wykonywania: %lu msn", clock() - start);

Error:

cudaFree(dev_c);

cudaFree(dev_a);

cudaFree(dev_b);

return cudaStatus;

}

Author

Title

Language

Your paste - Paste your paste here

#include &quot;cuda_runtime.h&quot;
#include &quot;device_launch_parameters.h&quot;
#include &lt;stdio.h&gt;
#include &lt;iostream&gt;
#include &lt;cstdio&gt;
#include &lt;ctime&gt;

using namespace std;

cudaError_t addWithCuda(int *is_prime, unsigned long long int *number, unsigned long long int *sqrtt);

__global__ void addKernel(int *is_prime, unsigned long long int *number, unsigned long long int *sqrtt)
{
	unsigned long long int index = blockIdx.x * blockDim.x + threadIdx.x;
	unsigned long long int i = index;
	while (i &lt; *sqrtt + 1)
	{
		if (i &gt;1)
		{
			if (*number%i == 0)
			{
				*is_prime = 1;
			}
		}
		i += blockDim.x*gridDim.x;
	}
	cudaThreadSynchronize;
}

int is_prime_number(unsigned long long int *number)
{

for (long long int i = 3; i &lt; sqrt(*number); i += 1)
	{

if (*number%i == 0)
		{
			cout &lt;&lt; &quot;(cpu)liczba nie jest pierwszan&quot;;
			return 0;
		}

}
	cout &lt;&lt; &quot;(cpu) liczba pierwszan&quot;;
	return 1;

}

int main()
{
   
	unsigned long long int *number =new unsigned long long int;
	unsigned long long int *sqrtt = new unsigned long long int;
	int *is_prime = new int;

*number = ((unsigned long long int)1) &lt;&lt; 61;
	*number -= 1;
	*sqrtt = sqrt(*number);
	*is_prime = 0;

clock_t start1 = clock();
	is_prime_number(number);
	printf(&quot;Czas wykonywania: %lu msn&quot;, clock() - start1);

// Add vectors in parallel.
    cudaError_t cudaStatus = addWithCuda(is_prime,number,sqrtt);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, &quot;addWithCuda failed!&quot;);
		system(&quot;pause&quot;);
        return 1;
    }

//printf(&quot;{1,2,3,4,5} + {10,20,30,40,50} = {%d,%d,%d,%d,%d}n&quot;,
        //c[0], c[1], c[2], c[3], c[4]);

// cudaDeviceReset must be called before exiting in order for profiling and
    // tracing tools such as Nsight and Visual Profiler to show complete traces.
    cudaStatus = cudaDeviceReset();
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, &quot;cudaDeviceReset failed!&quot;);
		system(&quot;pause&quot;);
        return 1;
    }

system(&quot;pause&quot;);
    return 0;
}

// Helper function for using CUDA to add vectors in parallel.
cudaError_t addWithCuda(int *is_prime, unsigned long long int *number, unsigned long long  int *sqrtt)
{
    unsigned long long int *dev_a = 0;
    unsigned long long int *dev_b = 0;
    int *dev_c = 0;
    cudaError_t cudaStatus;

// Choose which GPU to run on, change this on a multi-GPU system.
    cudaStatus = cudaSetDevice(0);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, &quot;cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?&quot;);
        goto Error;
    }

// Allocate GPU buffers for three vectors (two input, one output)    .
    cudaStatus = cudaMalloc((void**)&amp;dev_c, sizeof(int));
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, &quot;cudaMalloc failed!&quot;);
        goto Error;
    }

cudaStatus = cudaMalloc((void**)&amp;dev_a, sizeof(unsigned long long int));
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, &quot;cudaMalloc failed!&quot;);
        goto Error;
    }

cudaStatus = cudaMalloc((void**)&amp;dev_b, sizeof(unsigned long long int));
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, &quot;cudaMalloc failed!&quot;);
        goto Error;
    }

// Copy input vectors from host memory to GPU buffers.
    cudaStatus = cudaMemcpy(dev_a, number, sizeof(unsigned long long int), cudaMemcpyHostToDevice);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, &quot;cudaMemcpy failed!&quot;);
        goto Error;
    }

cudaStatus = cudaMemcpy(dev_b, sqrtt,sizeof(int), cudaMemcpyHostToDevice);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, &quot;cudaMemcpy failed!&quot;);
        goto Error;
    }

clock_t start = clock();

// Launch a kernel on the GPU with one thread for each element.
    addKernel&lt;&lt;&lt;12, 512&gt;&gt;&gt;(dev_c, dev_a, dev_b);

// Check for any errors launching the kernel
    cudaStatus = cudaGetLastError();
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, &quot;addKernel launch failed: %sn&quot;, cudaGetErrorString(cudaStatus));
        goto Error;
    }
    
    // cudaDeviceSynchronize waits for the kernel to finish, and returns
    // any errors encountered during the launch.
   /* cudaStatus = cudaDeviceSynchronize();
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, &quot;cudaDeviceSynchronize returned error code %d after launching addKernel!n&quot;, cudaStatus);
        goto Error;
    }*/

// Copy output vector from GPU buffer to host memory.
    cudaStatus = cudaMemcpy(is_prime, dev_c, sizeof(int), cudaMemcpyDeviceToHost);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, &quot;cudaMemcpy failed!&quot;);
        goto Error;
    }

if (*is_prime == 1) {
		cout &lt;&lt; &quot;(GPU)Liczba nie jest pierwsza n&quot;;

}
	else {
		cout &lt;&lt; &quot;(GPU)liczba pierwsza n&quot;;

}
	printf(&quot;Czas wykonywania: %lu msn&quot;, clock() - start);

Error:
    cudaFree(dev_c);
    cudaFree(dev_a);
    cudaFree(dev_b);
    
    return cudaStatus;
}

Private - Private paste aren't shown in recent listings.

Delete After - When should we delete your paste?

Spam protection -

Reply to "Prime"