hello everyone,i have some questions about DMA transfer,i want to do array addition using DMA transfer.
array_a and array_b are defined and initialized in PPE program,and i want to do addition on one SPE.
The program below is run correctly,but i have a question:
In my SPE program,there are five DMA transfer processes,the first transfer the parameter,from the second to the fourth transfer array_a,array_b and array_c from memory to LS,the last transfer the result from the LS to memory.Are the 2th,3th and 4th DMA transfer indispensable?i think it is cockamamie,is there some efficient ways to solve this problem?thank you!!!
PPE program:
#include <libspe2.h>
#include <stdio.h>
#include <errno.h>
#include "simple_dma.h"
typedef struct _control_block {
unsigned int a;
unsigned int b;
unsigned int c;
unsigned int size;
unsigned char pad
112;
} control_block;
int array_a
ARRAYSIZE __attribute__((aligned(16)));
int array_b
ARRAYSIZE __attribute__((aligned(16)));
int array_c
ARRAYSIZE __attribute__((aligned(16)));
extern spe_program_handle_t DMA_SPU;
spe_context_ptr_t speid;
unsigned int entry = SPE_DEFAULT_ENTRY;
void init_array(){
int i;
for(i=0;i<ARRAYSIZE; i++){
array_a[i] = i * 2;
array_b[i] = ((i * 2)*2);
array_c[i] = 0;
}
}
int main()
{
int i;
init_array();
cb.a=(unsigned int)array_a;
cb.b=(unsigned int)array_b;
cb.c=(unsigned int)array_c;
/* Create context */
if ((speid = spe_context_create (0, NULL)) == NULL) {
perror ("Failed creating context");
exit (1);
}
/* Load program into context */
if (spe_program_load(speid, &DMA_SPU)) {
perror ("Failed loading program");
exit (1);
}
/* Run context */
if (spe_context_run(speid, &entry, 0, (unsigned long long *)&cb, NULL, NULL) < 0) {
perror ("Failed running context");
exit (1);
}
/* Destroy context */
if (spe_context_destroy(speid) != 0) {
perror("Failed destroying context");
exit (1);
}
__asm__ __volatile__ ("sync" : : : "memory");
printf("Array Addition completes. Verifying results...\n");
for (i=0; i<ARRAYSIZE; i++)
printf("%d \n",array_c[i]);
return 0;
}
SPE program:
#include <spu_intrinsics.h>
#include <spu_mfcio.h>
#include <stdio.h>
#define ARRAYSIZE 8
typedef struct _control_block {
unsigned int a;
unsigned int b;
unsigned int c;
unsigned int size;
unsigned char pad
112;
}control_block;
/* Here's the local copy of the control block, to be filled by the DMA */
control_block cb __attribute__((aligned (128)));
/* Here's the local copy of the data array, to be filled by the DMA */
int array_d
ARRAYSIZE __attribute__((aligned(16)));
int array_e
ARRAYSIZE __attribute__((aligned(16)));
int array_f
ARRAYSIZE __attribute__((aligned(16)));
int main(unsigned long long speid,unsigned long long argp, unsigned long long envp){
int i;
mfc_get(&cb, argp, sizeof(cb), 31, 0, 0);
mfc_write_tag_mask(1<<31);
mfc_read_tag_status_all();
/* DMA the data from system memory to our local store buffer. */
mfc_get(array_d, cb.a, ARRAYSIZE*sizeof(int), 31, 0, 0);
mfc_write_tag_mask(1<<31);
mfc_read_tag_status_all();
mfc_get(array_e, cb.b, ARRAYSIZE*sizeof(int), 31, 0, 0);
mfc_write_tag_mask(1<<31);
mfc_read_tag_status_all();
mfc_get(array_f, cb.c, ARRAYSIZE*sizeof(int), 31, 0, 0);
mfc_write_tag_mask(1<<31);
mfc_read_tag_status_all();
/*calculate the date in local store buffer*/
for (i=0; i<ARRAYSIZE; i++) {
array_f[i]=array_d[i]+array_e[i];
}
/*DMA the result from local store buffer to system memory*/
mfc_put(array_f,cb.c,ARRAYSIZE*sizeof(int),31,0,0);
mfc_write_tag_mask(1<<31);
mfc_read_tag_status_all();
}