#include "DistributeVecLib.h" /* All comments only consider the distribution of the vector v, where all vector-length arrays are of length A.n. This corresponds to the direction dir = ROW, meaning that the vector is in the row direction. In case of the vector u = Av, one should read A.m instead. */ int AssignColumnToProc(long int *X, long *procstart, int *procindex, long *Ns, long *Nr, long *Nv, long *Sums, long j, int q) { /* This function assigns column j to processor q, updating the owner array X and the number of sends Ns, receives Nr, vector elements Nv of all processors. If Sums <> NULL, it also updates the sum of the number of sends and receives, assuming that inevitable communications have already been included (1 communication for every processor in a matrix column). Column j must not yet been owned. Input: 0 <= j < n, 0 <= q < P. This function can also be used to update only X and Nv. In that case, the parameters procstart, procindex, Ns, Nr, Sums must be NULL upon input. */ long r; int q_occurs_in_col, npj; if (!X || !Nv) { fprintf(stderr, "AssignColumnToProc(): Null arguments!\n"); return FALSE; } if (X[j] != -1) { fprintf(stderr, "AssignColumnToProc(): Column already owned!\n"); return FALSE; } /* Assign column to processor */ X[j] = q; Nv[q]++; /* Return if sends and receives need not be updated */ if (procstart == NULL && procindex == NULL && Ns == NULL && Nr == NULL && Sums == NULL) return TRUE; /* Check whether q occurs in column j and add receives to all receiving processors */ q_occurs_in_col = FALSE; for (r=procstart[j]; r<procstart[j+1]; r++) { if (procindex[r] == q) q_occurs_in_col = TRUE; else Nr[procindex[r]]++; } /* Add sends to sending processor and update sums */ npj = procstart[j+1] - procstart[j]; /* number of processors in column j */ if (q_occurs_in_col) { Ns[q] += npj - 1; /* This is >=0 */ if (Sums != NULL && npj >= 3) Sums[q] += npj - 2; } else { Ns[q] += npj; if (Sums != NULL) Sums[q] += npj; } return TRUE; } /* end AssignColumnToProc */ int RemoveColumnFromProc(long int *X, long *procstart, int *procindex, long *Ns, long *Nr, long *Nv, long *Sums, long j, int q) { /* This function removes column j from processor q, updating the owner array X and the number of sends Ns, receives Nr, vector elements Nv of all processors. If Sums <> NULL, it also updates the sum of the number of sends and receives, assuming that inevitable communications have already been included (1 communication for every processor in a matrix column). Column j must be owned by q. Input: 0 <= j < n, 0 <= q < P. This function can also be used to update only X and Nv. In that case, the parameters procstart, procindex, Ns, Nr, Sums must be NULL upon input. */ long r; int q_occurs_in_col, npj; if (!X || !Nv) { fprintf(stderr, "RemoveColumnFromProc(): Null arguments!\n"); return FALSE; } if (X[j] != q) { fprintf(stderr, "RemoveColumnFromProc(): Column removed from nonowner!\n"); return FALSE; } /* Remove column from processor */ X[j] = -1; Nv[q]--; /* Return if sends and receives need not be updated */ if (procstart == NULL && procindex == NULL && Ns == NULL && Nr == NULL && Sums == NULL) return TRUE; /* Check whether q occurs in column j and subtract receives from all receiving processors */ q_occurs_in_col = FALSE; for (r=procstart[j]; r<procstart[j+1]; r++) { if (procindex[r] == q) q_occurs_in_col = TRUE; else Nr[procindex[r]]--; } /* Subtract sends from sending processor and update sums */ npj = procstart[j+1] - procstart[j]; /* number of processors in column j */ if (q_occurs_in_col) { Ns[q] -= npj - 1; /* This is >=0 */ if (Sums != NULL && npj >= 3) Sums[q] -= npj - 2; } else { Ns[q] -= npj; if (Sums != NULL) Sums[q] -= npj; } return TRUE; } /* end RemoveColumnFromProc */ int AssignRemainingColumns(long l, int P, long int *X, long *Nv) { /* This function assigns all the remaining unowned vector components (i.e. columns) trying to balance the number of vector components among the processors. Input: l is the number of vector components. P is the number of processors. X is an array of length l. If X[j] = -1, then component j is still unowned. Otherwise, 0 <= X[j] < P and X[j] will not be changed. Nv is an array of length P. It need not be initialised; it will be overwritten. Output: 0 <= X[j] < P for all j. Nv[q] is the number of vector components j with X[j] = q, for 0 <= q < P. */ long max, j; int q; if (!X || !Nv) { fprintf(stderr, "AssignRemainingColumns(): Null arguments!\n"); return FALSE; } /* Count the number of vector components Nv[q] owned by processor q */ for (q=0; q < P; q++) Nv[q] = 0; for (j=0; j < l; j++) { if (X[j] != -1) { if (X[j] < 0 || X[j] > P) { fprintf(stderr, "AssignRemainingColumns(): Error in owner!\n"); return FALSE; } Nv[X[j]]++; } } /* Determine current maximum number of vector components per processor */ max= 0; for (q=0; q < P; q++) if (Nv[q] > max) max = Nv[q]; /* Assign columns until all processors have reached max */ q=0; for (j=0; j < l; j++) { if (X[j] == -1) { /* find first available processor */ while(q < P && Nv[q] == max) q++; if (q==P) { /* All processors have reached max */ break; } else { /* Nv[q] < max */ if (!AssignColumnToProc(X, NULL,NULL,NULL,NULL, Nv, NULL,j,q)) { fprintf(stderr, "AssignRemainingColumns(): Unable to assign column!\n"); return FALSE; } } } } /* Assign remaining columns cyclically */ q=0; for (j=0; j < l; j++) { if (X[j] == -1) { if (!AssignColumnToProc(X, NULL,NULL,NULL,NULL, Nv, NULL, j,q)) { fprintf(stderr, "AssignRemainingColumns(): Unable to assign column!\n"); return FALSE; } q++; if (q == P) q = 0; } } return TRUE; } /* end AssignRemainingColumns */ int AssignRemainingNonemptyColumns(long l, int P, long int *X, long *procstart, int *procindex, long *Ns, long *Nr, long *Nv) { /* This function greedily assigns all unowned columns with Nprocs >= 1 to the processor q with minimal cost, updating the owner array X and the number of sends Ns, receives Nr, vector elements Nv of all processors. Input: l is the number of vector components. P is the number of processors. X is an array of length l. If X[j] = -1, then component j is still unowned. Otherwise, 0 <= X[j] < P and X[j] will not be changed. Output: 0 <= X[j] < P for all j, except if Nprocs[j]=0. */ long j, s, min; int q, r, npj; if (!X || !Nv) { fprintf(stderr, "AssignRemainingNonemptyColumns(): Null arguments!\n"); return FALSE; } for (j=0; j<l; j++) { if (X[j] != -1) continue; npj = procstart[j+1] - procstart[j]; if (npj == 1) { q = procindex[procstart[j]]; if (!AssignColumnToProc(X, procstart, procindex, Ns, Nr, Nv, NULL, j, q)) { fprintf(stderr, "AssignRemainingNonemptyColumns(): Unable to assign column!\n"); return FALSE; } } else if (npj > 1) { /* Add receives temporarily to all processors in the column, so we need to consider only one processor at a time */ for (s=procstart[j]; s<procstart[j+1]; s++) { r = procindex[s]; Nr[r]++; } /* Find processor q with minimum cost */ q = procindex[procstart[j]]; min = MAX(Ns[q]+npj-1, Nr[q]-1); for (s=procstart[j]+1; s<procstart[j+1]; s++) { r = procindex[s]; if (MAX(Ns[r]+npj-1, Nr[r]-1) < min) { q = r; min = MAX(Ns[r]+npj-1,Nr[r]-1); } } /* Remove temporary receives */ for (s=procstart[j]; s<procstart[j+1]; s++) { r = procindex[s]; Nr[r]--; } if (!AssignColumnToProc(X, procstart, procindex, Ns, Nr, Nv, NULL, j, q)) { fprintf(stderr, "AssignRemainingNonemptyColumns(): Unable to assign column!\n"); return FALSE; } } } return TRUE; } /* end AssignRemainingNonemptyColumns */ int InitNprocs(const struct sparsematrix *pM, int dir, int *Nprocs) { /* This function initialises the array Nprocs such that Nprocs[j] is the number of processors that own nonzeros in matrix column j (in case dir = ROW), or matrix row j (in case dir = COL). The nonzeros of the matrix have been sorted by increasing processor number, as given by the array pM->Pstart[0..P], where P=pM->NrProcs and pM->Pstart[P] = pM->NrNzElts. */ int *occur; /* occur[j]= q means processor q occurs in column j and is its current highest-numbered processor. occur[j]= -1 means no processor occurs yet in column j */ int q; /* q is the current processor number */ long j, t, l=0, *Index=NULL; if (!pM || !Nprocs) { fprintf(stderr, "InitNprocs(): Null arguments!\n"); return FALSE; } if (dir == COL) { l = pM->m; Index = pM->i; } else if (dir == ROW) { l = pM->n; Index = pM->j; } else { fprintf(stderr, "InitNprocs(): Unknown direction!\n"); return FALSE; } occur = (int *)malloc(l*sizeof(int)); if (occur == NULL) { fprintf(stderr, "InitNprocs(): Not enough memory!\n"); return FALSE; } for (j=0; j<l; j++) { Nprocs[j] = 0; occur[j] = -1; } /* Initialise Nprocs to the number of processors in the column */ q=0; for (t=0; t<pM->NrNzElts; t++) { while(t == pM->Pstart[q+1]) q++; /* this terminates because t != pM->Pstart[P] = pM->NrNzElts */ j = Index[t]; if (occur[j] != q) { occur[j] = q; Nprocs[j]++; } } free(occur); return TRUE; } /* end InitNprocs */ int InitProcindex(const struct sparsematrix *pM, int dir, int *Nprocs, long *procstart, int *procindex) { /* This function initialises procstart and procindex using Nprocs. Input: Nprocs, where Nprocs[j] is the number of processors that own nonzeros in matrix column j (in case dir = ROW). Output: procindex, contains the processor numbers occurring in the matrix columns. The processor numbers for one column j are stored consecutively. procstart, where procstart[j] is the position in array procindex of the first processor number of column j, 0 <= j < pM->n. procstart[pM->n] = total number of processor numbers of all the columns together. Together, InitNprocs and InitProcindex compute the P x n sparse communication matrix C, stored in compressed column storage (CCS) format, and corresponding to the m x n distributed sparse matrix pM-> */ int *occur; /* occur[j]= q means processor q occurs in column j and is its current highest-numbered processor. occur[j]= -1 means no processor occurs yet in column j */ int q; /* q is the current processor number */ long j, t, l=0, total, *Index=NULL; if (!pM || !Nprocs || !procstart || !procindex) { fprintf(stderr, "InitProcindex(): Null arguments!\n"); return FALSE; } if (dir == COL) { l = pM->m; Index = pM->i; } else if (dir == ROW) { l = pM->n; Index = pM->j; } else { fprintf(stderr, "InitProcindex(): Unknown direction!\n"); return FALSE; } /* Initialise procstart */ total= 0; for (j=0; j<l; j++) { procstart[j] = total; total += Nprocs[j]; } procstart[l] = total; occur = (int *)malloc(l*sizeof(int)); if (occur == NULL) { fprintf(stderr, "InitProcindex(): Not enough memory!\n"); } for (j=0; j<l; j++) occur[j] = -1; /* Initialise procindex, modifying procstart */ q=0; for (t=0; t<pM->NrNzElts; t++) { while(t == pM->Pstart[q+1]) q++; /* this terminates because t != pM->Pstart[P] = pM->NrNzElts */ j = Index[t]; if (occur[j] != q) { occur[j] = q; procindex[procstart[j]] = q; procstart[j]++; } } /* Reinitialise procstart */ total= 0; for (j=0; j<l; j++) { procstart[j] = total; total += Nprocs[j]; } procstart[l] = total; free(occur); return TRUE; } /* end InitProcindex */ int GenerateHistogram(int *X, long l, int lo, int hi, long *Histogram) { /* This function computes a histogram for the array X of length l, for function values X[j] in the interval lo..hi. Output: Histogram[q-lo] = |{j: 0 <= j < l and X[j] = q }| for q=lo,..,hi. The array Histogram must be at least of length hi-lo+1. */ long j; int q; if (!X || !Histogram) { fprintf(stderr, "GenerateHistogram(): Null arguments!\n"); return FALSE; } /* Initialise histogram */ for (q = lo; q <= hi; q++) Histogram[q-lo]= 0; /* Fill histogram */ for (j = 0; j < l; j++) if (lo <= X[j] && X[j] <= hi) Histogram[X[j]-lo]++; return TRUE; } /* end GenerateHistogram */ int PrintHistogram(int lo, int hi, long *Histogram) { /* This function prints a histogram for values lo..hi but only up to the maximum value that occurs. The function prints q and Histogram[q-lo] for lo <= q <= qmax. Here, lo <= qmax <= hi. The array Histogram must be at least of length hi-lo+1. */ int q, qmax; if (!Histogram) { fprintf(stderr, "PrintHistogram(): Null argument!\n"); return FALSE; } /* Determine maximum occurring value qmax */ qmax = lo; /* qmax >= lo, to print at least one value */ for (q = hi; q >= lo; q--) if (Histogram[q-lo]>0) { qmax = q; break; } for (q = lo; q <= qmax; q++) printf(" p=%d: %ld \n", q, Histogram[q-lo]); return TRUE; } /* end PrintHistogram */ int CalcCom(const struct sparsematrix *pM, long int *X, int dir, long *ComVol, long *MaxOut, long *MaxIn, long *MaxCompnts, long *TotCompnts) { /* This function calculates the total and maximum communication volume for a given distributed sparse matrix A and a distributed vector X. Note that these numbers are for only one direction (vector). If X=NULL, the volume is only based on the matrix and no vector-based statistics are computed, i.e. no MaxOut, MaxIn, MaxCompnts, TotCompnts. The total volume may be higher than the matrix-based volume. This function can also be used if the vector is only partially distributed. Unowned components (with X[j] = -1) are ignored. Input: A distributed matrix, X distribution of vector, with 0 <= X[j] < P, for 0 <= j < pM->n. X[j] is the owner of vector component j. dir = ROW (for distribution of v) or dir = COL (for u). Output: ComVol is total communication volume for direction dir, MaxOut is maximum number of data words sent by a processor, MaxIn is maximum number of data words received by a processor, MaxCompnts is maximum number of vector components of a processor. TotCompnts is total number of owned vector components. */ int P, q, *Nprocs, *procindex; long total, maxs, maxr, maxv, tots, totr, totv, l=0, j, *procstart, *Ns, *Nr, *Nv; if (!pM || !ComVol || !MaxOut || !MaxIn || !MaxCompnts || !TotCompnts) { fprintf(stderr, "CalcCom(): Null arguments!\n"); return FALSE; } if (dir == COL) { l = pM->m; } else if (dir == ROW) { l = pM->n; } else { fprintf(stderr, "CalcCom(): Unknown direction!\n"); return FALSE; } P = pM->NrProcs; Nprocs = (int *)malloc(l*sizeof(int)); if (Nprocs == NULL) { fprintf(stderr, "CalcCom(): Not enough memory!\n"); return FALSE; } if (!InitNprocs(pM, dir, Nprocs)) { fprintf(stderr, "CalcCom(): Unable to initialise processor array!\n"); return FALSE; } if (X == NULL) { /* Compute matrix-based communication volume */ tots= 0; for (j=0; j<l; j++) tots += MAX(Nprocs[j] - 1,0); *ComVol = tots; *MaxOut = 0; *MaxIn = 0; *MaxCompnts = 0; *TotCompnts = 0; free(Nprocs); return TRUE; } total = 0; for (j=0; j<l; j++) total += Nprocs[j]; procindex = (int *)malloc(total*sizeof(int)); procstart = (long *)malloc((l+1)*sizeof(long)); if (procindex == NULL || procstart == NULL) { fprintf(stderr, "CalcCom(): Not enough memory!\n"); return FALSE; } if (!InitProcindex(pM, dir, Nprocs, procstart, procindex)) { fprintf(stderr, "CalcCom(): Cannot initialise processor index!\n"); return FALSE; } Ns = (long *)malloc(P*sizeof(long)); Nr = (long *)malloc(P*sizeof(long)); Nv = (long *)malloc(P*sizeof(long)); if (Ns == NULL || Nr == NULL || Nv == NULL) { fprintf(stderr, "CalcCom(): Not enough memory!\n"); return FALSE; } /* Compute the number of components sent, received, owned by the P processors */ for (q=0; q<P; q++) { Ns[q] = 0; Nr[q] = 0; Nv[q] = 0; } for (j=0; j<l; j++) { q = X[j]; if (q >= 0 && q < P) { /* Disassign for a moment and reassign to update the statistics */ X[j] = -1; if (!AssignColumnToProc(X, procstart, procindex, Ns, Nr, Nv, NULL, j, q)) { fprintf(stderr, "CalcCom(): Unable to assign column!\n"); return FALSE; } } } /* Compute the maximum and total number of components sent, received, owned */ maxs= 0; maxr= 0; maxv= 0; tots= 0; totr= 0; totv=0; for (q=0; q<P; q++) { if (Ns[q] > maxs) maxs = Ns[q]; if (Nr[q] > maxr) maxr= Nr[q]; if (Nv[q] > maxv) maxv= Nv[q]; tots += Ns[q]; totr += Nr[q]; totv += Nv[q]; } *MaxOut= maxs; *MaxIn= maxr; *MaxCompnts= maxv; if (tots != totr) { fprintf(stderr, "CalcCom(): Total sent != total received!\n"); return FALSE; } *ComVol= totr; *TotCompnts= totv; free(Nv); free(Nr); free(Ns); free(procstart); free(procindex); free(Nprocs); return TRUE; } /* end CalcCom */ int PrintCom(int P, long l, int dir, long ComVol, long MaxOut, long MaxIn, long MaxCompnts, long TotCompnts) { /* This function prints the total and maximum communication volume and total and maximum number of vector components for a given distributed sparse matrix A and a distributed vector X as computed by CalcCom. Note that these numbers are for only one direction (vector). The function also prints scaled numbers (between brackets), which have been normalised by the length l of the vector. Input: P is the number of processors, l is the length of the vector, dir = ROW (for distribution of v) or dir = COL (for u) ComVol is total communication volume for direction dir, MaxOut is maximum number of data words sent by a processor, MaxIn is maximum number of data words received by a processor, MaxCompnts is maximum number of vector components of a processor. TotCompnts is total number of owned vector components (can be < l). */ double ComVolSc, MaxOutSc, MaxInSc; /* Scaled values, normalised by length of vector */ ComVolSc = (double) ComVol / (double) l; MaxOutSc = (double) MaxOut / (double) l; MaxInSc = (double) MaxIn / (double) l; printf("Communication for vector" " %c (after vector distribution):\n", dir == ROW ? 'v' : 'u'); printf(" vol : %ld (%g)\n", ComVol, ComVolSc); printf(" avg = vol/P : %g (%g)\n", (double)ComVol / (double)P, ComVolSc / (double)P); if (dir == ROW) { printf(" max sent : %ld (%g)\n", MaxOut, MaxOutSc); printf(" max recd : %ld (%g)\n", MaxIn, MaxInSc); } else { /* roles of sends and receives are interchanged */ printf(" max sent : %ld (%g)\n", MaxIn, MaxInSc); printf(" max recd : %ld (%g)\n", MaxOut, MaxOutSc); } printf("Nr components for vector " "%c (after vector distribution):\n", dir == ROW ? 'v' : 'u'); printf(" avg : %g (%g) \n", (double) TotCompnts / (double) P, (double) TotCompnts / (double) (P*l)); printf(" max : %ld (%g)\n", MaxCompnts, (double) MaxCompnts /(double) l); return TRUE; } /* end PrintCom */ int CalcLocalLowerBound(const struct sparsematrix *pM, int dir, long *LB, int *Pact) { /* This function computes the local lower bound on the maximum communication volume per processor for a distributed sparse matrix pM-> This bound for a vector distribution is solely based on the matrix distribution. Input: distributed matrix A, dir = ROW (for distribution of v) or dir = COL (for u). Output: LB = local lower bound (max over all processors), Pact = number of active processors, i.e., processors that will have to communicate. 0 <= Pact <= pM->NrNprocs. */ int P, q, r, Pactive; long l=0, t, i, j, local, Ls, Lr, *Index = NULL; /* Arrays of length pM->n */ int *Nprocs; /* Nprocs[j] is the number of processors that occur in column j */ int *occur; /* occur[j]= q means processor q occurs in column j and is its current highest-numbered processor. occur[j]= -1 means no processor occurs yet in column j */ /* Array of length P+1 */ long *Available; /* Available[r] = the number of columns with r processors that contain the current processor, 0 <= r <= P. */ if (!pM || !LB || !Pact) { fprintf(stderr, "CalcLocalLowerBound(): Null arguments!\n"); return FALSE; } if (dir == COL) { l = pM->m; Index = pM->i; } else if (dir == ROW) { l = pM->n; Index = pM->j; } else { fprintf(stderr, "CalcLocalLowerBound(): Unknown direction!\n"); return FALSE; } P = pM->NrProcs; Nprocs = (int *)malloc(l*sizeof(int)); occur = (int *)malloc(l*sizeof(int)); Available = (long *)malloc((P+1)*sizeof(long)); if (Nprocs == NULL || occur == NULL || Available == NULL) { fprintf(stderr, "CalcLocalLowerBound(): Not enough memory!\n"); return FALSE; } if (!InitNprocs(pM, dir, Nprocs)) { fprintf(stderr, "CalcLocalLowerBound(): Unable to initialise processor array!\n"); return FALSE; } /* Initialise occur */ for (j=0; j<l; j++) occur[j] = -1; #ifdef INFO2 printf("Local bounds for vector %c:\n", dir == ROW ? 'v' : 'u'); #endif local = 0; Pactive = 0; for (q=0; q<P; q++) { /*### Determine bound for processor q ###*/ /* Initialise Available[r] at number of columns with r processors that contain processor q */ for (r=0; r<=P; r++) Available[r] = 0; for (t=pM->Pstart[q]; t<pM->Pstart[q+1]; t++) { j= Index[t]; if (occur[j] != q) { occur[j] = q; Available[Nprocs[j]]++; } } /* Compute egoistic lower bound for processor q */ Ls= 0; /* number of sends for processor q */ Lr= 0; /* number of receives */ for (r=2; r<=P; r++) Lr += Available[r]; /* Initialise Lr at total number of columns with Nprocs >= 2 that contain processor q */ if (Lr > 0) Pactive++; for (r=2; r<=P; r++) { if (Ls + (r-1)*Available[r] <= Lr - Available[r]) { /* Grab all available columns with r processors */ Ls += (r-1)*Available[r]; Lr -= Available[r]; } else { /* Grab i columns with i as large as possible, such that new Ls <= Lr */ i = (Lr-Ls)/r; /* 0 <= i <= Available[r] */ Ls += (r-1)*i; Lr -= i; break; } } if (Ls > Lr) { fprintf(stderr, "CalcLocalLowerBound(): Ls > Lr!\n"); return FALSE; } #ifdef INFO2 printf(" Proc[%d]: %ld\n", q, Lr); /* lower bound = Lr */ #endif if (Lr > local) local = Lr; } *LB = local; *Pact = Pactive; free(Available); free(occur); free(Nprocs); return TRUE; } /* end CalcLocalLowerBound */ void PrintVecStatistics(int P, long *Ns, long *Nr, long *Nv) { /* This function prints for each processor the send, receive values and the number of components. Ns, Nr = number of sends/recvs; Nv = number of components. */ int q; if (!Ns || !Nr || !Nv) return; for (q=0; q<P; q++) printf(" Proc[%d]: Ns = %ld, Nr = %ld, Nv = %ld\n", q, Ns[q], Nr[q], Nv[q]); } /* end PrintVecStatistics */ int WriteVector(const long int *X, const char base, const char *name, long l, int P, FILE *fp, const struct opts *pOptions) { /* Base vector-writing function. Automatically adapts base of array to write. Assumes that we are writing a distribution vector when P>0. P=0 enables simple writing of a vector. l is the length of the vector to be written (X). base is the base value of X (usually 0). name is used only when EMM output is enabled. name is NULL if vector is the main object of the EMM file. */ long t; if (!X || !fp) { fprintf(stderr, "WriteVectorDistribution(): Null arguments!\n"); return FALSE; } if (ferror(fp)) { fprintf(stderr, "WriteVectorDistribution(): Unable to write to stream!\n"); return FALSE; } if (pOptions->OutputFormat == OutputEMM) { if( P == 0 ) { if (name==NULL) fprintf(fp, "%%%%Extended-MatrixMarket vector array integer general original\n"); else fprintf(fp, "%%%%%s vector array integer general original\n", name); } else { if (name==NULL) fprintf(fp, "%%%%Extended-MatrixMarket distributed-vector array integer general global\n"); else fprintf(fp, "%%%%%s distributed-vector array integer general global\n", name); } } if( P > 0 ) { fprintf(fp, "%ld %d\n", l, P); for (t = 0; t < l; t++) fprintf(fp, "%ld %ld\n", t+1, X[t]+(1-base)); } else { if( pOptions->OutputFormat == OutputEMM ) fprintf(fp, "%ld\n", l); for (t = 0; t < l; t++) fprintf(fp, "%ld\n", X[t]+(1-base)); } return TRUE; } /* end WriteVector */ int WriteVectorDistribution(const long int *X, const char *name, long l, int P, FILE *fp, const struct opts *pOptions) { /* This function writes the owners of the vector components to a file. Input: l is the number of vector components. P is the number of processors. X is an array of length l, containing the owners of the vector components. The input numbering is 0-based: 0 <= X[i] < P, for 0 <= i < l. The output file format is: 1 line with l , P 1 line per vector component, giving the index i and the owner X[i]. The output numbering is 1-based: 1 <= X[i] <= P, for 1 <= i <= l. This numbering is consistent with the Matrix Market format. */ return WriteVector( X, 0, name, l, P, fp, pOptions ); } /* end WriteVectorDistribution */ int WriteVectorCollection(long int **X, const char *name, const long i, const long *j, FILE *fp) { int l, k; if(name == NULL) fprintf(fp, "%%%%Extended-MatrixMarket vector-collection array integer general original\n"); else fprintf(fp, "%%%%%s vector-collection array integer general original\n", name); fprintf(fp, "%ld\n", i); for(l=0; l<i; l++) { fprintf(fp, "%ld\n", j[l]); for(k=0; k<j[l]; k++) fprintf(fp, "%ld\n", (X[l][k]+1)); /* base 1 */ } return TRUE; } /* end WriteVectorCollection */ long * ReadVector(const char base, long *l, int *P, FILE *fp) { /* Base vector-reading function. Automatically adapts base of array to read. Assumes that we are writing a distribution vector with P>0. base is the base value of X (usually 0). l is the length of the vector read (X). P is the number of processors. The return value is the array of vector values (X), or NULL if an error occurs. */ long t; if (!fp) { fprintf(stderr, "ReadVector(): Null arguments!\n"); return NULL; } if (ferror(fp)) { fprintf(stderr, "ReadVector(): Unable to write to stream!\n"); return NULL; } int SizeRead=FALSE, count=0; char *line, linebuffer[MAX_LINE_LENGTH]; while (SizeRead == FALSE && (line = fgets(linebuffer, MAX_LINE_LENGTH, fp)) != NULL) { /* a new line has been read */ if (strlen(line) > 0) { if (linebuffer[0] != '%') { /* The size line */ /* Read l and P from the line */ count = sscanf(line, "%ld %d\n", l, P); if (count < 2 || *l < 1 || *P < 0) { fprintf(stderr, "ReadVector(): Error in vector size!\n"); return NULL; } else { SizeRead = TRUE; } } } } if(*P == 0) { fprintf(stderr, "ReadVector(): This function has not been implemented for undistributed vectors!\n"); return NULL; } long *X = (long *)malloc((*l)*sizeof(long)); long tmp; if (X == NULL) { fprintf(stderr, "ReadVector(): Not enough memory!\n"); return NULL; } for (t = 0; t < *l; t++) { if(fscanf(fp, "%ld %ld\n", &tmp, &(X[t])) != 2) { fprintf(stderr, "ReadVector(): Read Error!\n"); return NULL; } X[t] -= (1-base); } return X; } /* end ReadVector */