Topic
  • No replies
rakuraku
rakuraku
4 Posts

Pinned topic How to load more than one SPU program into PPU?

‏2012-05-26T20:07:24Z |
Hello, I have the following problem - I want to load more than one SPU program into my PPU, because I want to have more functions computed by SPU.

I have function1() which i.e. changes all values in my table into 5 - and it's in spu1 program.

I have function2() which i.e. changes all values in my table into 12 - and it's in spu2 program.

The only way I can use these two functions via SPU is to get them into another programs and load them into different contexts, but I'm not sure if
there's another way than this?

How to remake this code, which is using one spu program, to use two or more? Or compute more functions?:

PPE CODE


/* --------------------------------------------------------------- */ 
/* Licensed Materials - Property of IBM                            */ 
/* 5724-S84                                                        */ 
/* (C) Copyright IBM Corp. 2008       All Rights Reserved          */ 
/* US Government Users Restricted Rights - Use, duplication or     */ 
/* disclosure restricted by GSA ADP Schedule Contract with         */ 
/* IBM Corp.                                                       */ 
/* --------------------------------------------------------------- */ 
/* PROLOG END TAG zYx                                              */ #include <sched.h> #include <libspe2.h> #include <pthread.h> #include <dirent.h> #include <stdio.h> #include <stdint.h> #include <errno.h> #include <sys/wait.h> #include <string.h> #include <simple_dma.h>   
/* we allocate one control block, to correspond to one SPE */ control_block cb __attribute__ ((aligned (128))); control_block cb2 __attribute__ ((aligned (128))); control_block cb3 __attribute__ ((aligned (128))); 
/* this is the pointer to the SPE code, to be used at thread creation time */ extern spe_program_handle_t hello_spu;   
/* this is the handle which will be returned by "spe_context_create."  */ spe_context_ptr_t speid;   
/* this variable is the SPU entry point address which is initially set to the default */ unsigned 

int entry = SPE_DEFAULT_ENTRY;   
/* this variable is used to return data regarding an abnormal return from the SPE */ spe_stop_info_t stop_info;   
/* here is the variable to hold the address returned by the malloc() call. */ 

float *data;     

int SendToSpu(

float *tablica)
{   unsigned 

int entry = SPE_DEFAULT_ENTRY; 

int i, rc; rc = posix_memalign ((void*)(&data), 128, DATA_BUFFER_SIZE); 

if (rc != 0) 
{ fprintf (stderr, 
"Failed allocating space for data array\n"); exit (1); 
}   
/* Fill the data array with a fibonacci sequence. */ 
// data[0] = data[1] = 1; 

for (i=0; i<DATA_BUFFER_ENTRIES; i++) 
{ printf(
"tablica PPE: %f\n", data[i]); 
}   

if (spe_cpu_info_get(SPE_COUNT_PHYSICAL_SPES, -1) < 1) 
{ fprintf(stderr, 
"System doesn't have a working SPE.  I'm leaving.\n"); 

return -1; 
}   printf(
"Address being sent in control block: %p\n", data);   
/* load the address into the control block */ cb.addr = (unsigned 

long 

long)((uintptr_t)data); 
/* create the SPE context */ 

if ((speid = spe_context_create(0, NULL)) == NULL) 
{ fprintf (stderr, 
"FAILED: spe_context_create(errno=%d strerror=%s)\n", errno, strerror(errno)); exit (1); 
} 
/* load the SPE program into the SPE context */ 

if (spe_program_load(speid, &hello_spu) != 0) 
{ fprintf (stderr, 
"FAILED: spe_program_load(errno=%d strerror=%s)\n", errno, strerror(errno)); exit (1); 
}   
/* run the SPE context */ 

if (spe_context_run(speid, &entry, 0, &cb, NULL, &stop_info) < 0) 
{ fprintf (stderr, 
"FAILED: spe_context_run(errno=%d strerror=%s)\n", errno, strerror(errno)); exit (1); 
}   
/* destroy the SPE context */ 

if (spe_context_destroy(speid) != 0) 
{ fprintf (stderr, 
"FAILED: spe_context_destroy(errno=%d strerror=%s)\n", errno, strerror(errno)); exit (1); 
}   
/* check the SPE status */ 

if (stop_info.stop_reason == SPE_EXIT) 
{ 

if (stop_info.result.spe_exit_code != 0) 
{ fprintf(stderr, 
"FAILED: SPE returned a non-zero exit status\n"); exit(1); 
} 
} 

else 
{ fprintf(stderr, 
"FAILED: SPE abnormally terminated\n"); exit(1); 
}         

for (i=2; i<DATA_BUFFER_ENTRIES; i++) 
{ printf(
"tablica SPE: %f\n", data[i]); data[i]=3; 
}     free(data); printf(
"free mema");   
}   

int main() 
{ printf(
"DATA BUFFER ENTRIES %i", DATA_BUFFER_ENTRIES); printf(
"DATA BUFFER SIZE %i", DATA_BUFFER_SIZE); 

return 0;   

float tablica[256]; 

int x; 

for(x=0; x<256; x++)
{ tablica[x] = 5.59;   
}   SendToSpu(tablica);   
/* Here is the malloc call a data array aligned to a cacheline boundary for efficient transfer. */       printf(
"PASSED\n");   

return 0; 
}


SPE CODE


/* --------------------------------------------------------------- */ 
/* Licensed Materials - Property of IBM                            */ 
/* 5724-S84                                                        */ 
/* (C) Copyright IBM Corp. 2008       All Rights Reserved          */ 
/* US Government Users Restricted Rights - Use, duplication or     */ 
/* disclosure restricted by GSA ADP Schedule Contract with         */ 
/* IBM Corp.                                                       */ 
/* --------------------------------------------------------------- */ 
/* PROLOG END TAG zYx                                              */ #include <spu_mfcio.h> #include <stdio.h> #include <simple_dma.h>   
/* Here's the local copy of the control block, to be filled by the DMA */ 

volatile control_block cb __attribute__ ((aligned (128)));   
/* Here's the local copy of the data array, to be filled by the DMA */ 

int data[DATA_BUFFER_ENTRIES] __attribute__ ((aligned (128)));   

int main(unsigned 

long 

long speid __attribute__ ((unused)), unsigned 

long 

long argp, unsigned 

long 

long envp __attribute__ ((unused))) 
{ 

int i; unsigned 

int tag_id;   
/* Reserve a tag for application usage */ 

if ((tag_id = mfc_tag_reserve()) == MFC_TAG_INVALID) 
{ printf(
"ERROR: unable to reserve a tag\n"); 

return 1; 
}   
/* Here is the actual DMA call */ 
/* the first parameter is the address in local store to place the data */ 
/* the second parameter holds the main memory address                  */ 
/* the third parameter holds the number of bytes to DMA                */ 
/* the fourth parameter identifies a "tag" to associate with this DMA  */ 
/* (this should be a number between 0 and 31, inclusive)               */ 
/* the last two parameters are only useful if you've implemented your  */ 
/* own cache replacement management policy.  Otherwise set them to 0.  */   mfc_get(&cb, argp, sizeof(cb), tag_id, 0, 0);   
/* Now, we set the "tag bit" into the correct channel on the hardware  */ 
/* this is always 1 left-shifted by the tag specified with the DMA     */ 
/* for whose completion you wish to wait.                              */ mfc_write_tag_mask(1<<tag_id);   
/* Now, issue the read and wait to guarantee DMA completion before we  */ 
/* continue. */ mfc_read_tag_status_all();   
/* DMA the data from system memory to our local store buffer. */ mfc_get(data, cb.addr, DATA_BUFFER_SIZE, tag_id, 0, 0);     printf(
"Address received through control block = 0x%llx\n", cb.addr);     
/* Wait for the data array DMA to complete. */ mfc_read_tag_status_all();   
/* Verify that the data array contains a valid fibonacci sequence. */ 

int p; 

for (p=2; p<DATA_BUFFER_ENTRIES; p++) 
{ 
/*  if (data[i] != data[i-1] + data[i-2]) { printf("ERROR: fibonacci sequence error at entry %d. Expected %d, Got %d\n", i, data[i-1] + data[i-2], data[i]); return (1); } */ data[p] = 5; 
} mfc_put(data, cb.addr, DATA_BUFFER_SIZE, tag_id, 0, 0); 
/* Reserve a tag for application usage */ 

if ((tag_id = mfc_tag_reserve()) == MFC_TAG_INVALID) 
{ printf(
"ERROR: unable to reserve a tag\n"); 

return 1; 
}   printf(
"done \n"); 
//  mfc_put(data, cb.addr, DATA_BUFFER_SIZE, tag_id, 0, 0); 

return 0; 
}