I've been working on a program for a research project in physics. The program is written in C but uses a fortran function (it's called "zgesv" and it's from the LAPACK and BLAS libraries).
The idea is to solve a system of equations. LHS.X = RHS for the vector "X". int INFO is passed to zgesv. If the equations can't be solved (i.e. LHS is singular), info is supposed to return a value != 0;
Trying to run my program as "normal" by passing my double * to the solve function (solution 1, in the following code), INFO is returned as 0 -- even if LHS is singular. Not only that, but if I print out solution, it's a disaster of numbers - some small, some zero, some huge.
If I create LHS and RHS manually by doing LHS[] = {value 1, value 2, ...}; RHS[] = {value 1, value 2, ...}; Then INFO is returned as 3 as expected, and solution is equal to RHS (which is also what I would expect.)
If I declare arrays LHS2[] = {value 1, value 2, ...}; RHS2[] = {value 1, value 2, ...}; and copy them into LHS and RHS element by element, then INFO is returned as 8 (it's weird to me that it's different than the previous case.), and solution is equal to RHS.
I feel like this must be some fundamental difference between the two ways of declaring an array. I don't really have access to muck with the function zgesv to make it take the types I want since A) it's a standard in the scientific community, and B) it's written in fortran - which I haven't ever learned.
Can anyone explain what's going on here? Is there a simple (and preferably computationally cheap) fix? Should I just copy my double * into an array[]?
Here is my program (modified for testing):
include
#include <stdlib.h>
#include <math.h>
#define PI 3.1415926535897932384626433832795029L
#define ERROR_VALUE 911.911
int* getA(int N, char* argv[])
{
int i;
int* AMatrix;
AMatrix = malloc(N * N * sizeof(int));
if (AMatrix == NULL)
{
printf("Failed to allocate memory for AMatrix. Exiting.");
exit (EXIT_FAILURE);
}
for (i = 0; i < N * N; i++)
{
AMatrix[i] = atoi(argv[i + 1]);
}
return AMatrix;
}
double* generateLHS(int N, int* AMatrix, int TAPs[], long double kal)
{
double S, C;
S = sinl(kal);
C = cosl(kal);
printf("According to C, Sin(Pi/2) = %.25lf and Cos(Pi/2) = %.25lf", S, C);
// S = 1;
// C = 0;
double* LHS;
LHS = malloc(N * N * 2 * sizeof(double));
if (LHS == NULL)
{
printf("Failed to allocate memory for LHS. Exiting.");
exit (EXIT_FAILURE);
}
int i;
for (i = 0; i < N * N; i++)
{
LHS[2 * i] = -1 * AMatrix[i];
LHS[(2 * i) + 1] = 0;
}
for (i = 0; i <= 2 * N * N - 2; i = i + (2 * N) + 2)
{
LHS[i] = LHS[i] + (2 * C);
}
int j;
for (i = 0; i <= 3; i++)
{
j = 2 * N * TAPs[i] + 2 * TAPs[i];
LHS[j] = LHS[j] - C;
LHS[j + 1] = LHS[j + 1] - S;
}
return LHS;
}
double* generateRHS(int N, int inputTailAttachmentPoint, long double kal)
{
double* RHS;
RHS = malloc(2 * N * siz开发者_Go百科eof(double));
int i;
for (i = 0; i < 2 * N; i++)
{
RHS[i] = 0.0;
}
RHS[2 * inputTailAttachmentPoint + 1] = - 2 * sin(kal);
return RHS;
}
double* solveUsingLUD(int N, double* LHS, double* RHS)
{
int INFO; /*Info is changed by ZGELSD to 0 if the computation was carried out successfully. Else it changes to some none-zero integer. */
int ione = 1;
int LDA = N;
int LDB = N;
int n = N;
int* IPV = malloc(N * sizeof(int));
if (IPV == NULL)
{
printf("Failed to allocate memory for IPV. Exiting.");
exit (EXIT_FAILURE);
}
zgesv_(&n, &ione, LHS, &LDA, IPV, RHS, &LDB, &INFO);
free(IPV);
if (INFO != 0)
{
printf("\n ERROR: info = %d\n", INFO);
}
return RHS;
}
void printComplexVectors(int numberOfRows, double* matrix)
{
int i;
for (i = 0; i < 2 * numberOfRows - 1; i = i + 2)
{
printf("%f + %f*i \n", matrix[i], matrix[i + 1]);
}
printf("\n");
}
int main(int argc, char* argv[])
{
int N = 8;
int* AMatrix;
AMatrix = getA(N, argv);
int TAPs[]={4,4,4,3};
long double kal = PI/2;
double *LHS, *RHS;
LHS = generateLHS(N, AMatrix, TAPs,kal);
int i;
RHS = generateRHS(N, TAPs[0],kal);
printf("\n LHS = \n{{");
for (i = 0; i < 2 * N * N - 1;)
{
printf("%lf + ", LHS[i]);
i = i + 1;
printf("%lfI", LHS[i]);
i = i + 1;
if ((int)(.5 * i) % N == 0)
{
printf("},\n{");
}
else
{
printf(",");
}
}
printf("}");
double LHS2[] = {0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.000000,0.000000,-1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.000000,0.000000,-1.000000,0.000000,-1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.000000,0.000000,-1.000000,0.000000,-1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.000000,0.000000,0.000000,0.000000,0.000000,-1.000000,0.000000,-1.000000,0.000000,-1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-0.000000,-3.000000,0.000000,0.000000,-1.000000,0.000000,-1.000000,0.000000,-1.000000,0.000000,-1.000000,0.000000,-1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.000000,0.000000,-1.000000,0.000000,-1.000000,0.000000,-1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-1.000000,0.000000,-1.000000,0.000000,-1.000000,0.000000,-1.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000};
double RHS2[] ={0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,-2.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000};
printf("comparing LHS and LHS2\n");
for (i = 0; i < 2 * N * N;)
{
if (LHS[i] != LHS2[i]) {
printf( "LHS difference at index %d\n", i);
printf("LHS[%d] = %.16lf\n", i, LHS[i]);
printf("LHS2[%d] = %.16lf\n", i, LHS2[i]);
printf("The difference is %.16lf\n", LHS[i] - LHS2[i]);
}
i = i + 1;
}
printf("\n");
printf("comparing RHS and RHS2\n");
for (i = 0; i < 2 * N;)
{
if (RHS[i] != RHS2[i]) {
printf( "RHS difference at index %d\n", i);
printf("RHS[%d] = %.16lf\n", i, RHS[i]);
printf("RHS2[%d] = %.16lf\n", i, RHS2[i]);
printf("The difference is %.16lf", RHS[i] - RHS2[i]);
}
i = i + 1;
}
printf("\n");
double *solution;
solution = solveUsingLUD(N,LHS,RHS);
printf("\n Solution = \n{");
for (i = 0; i < 2 * N - 1;)
{
printf("{%.16lf + ", solution[i]);
i = i + 1;
printf("%.16lfI},", solution[i]);
i = i + 1;
printf("\n");
}
solution = solveUsingLUD(N,LHS2,RHS2);
printf("Solution2 = \n{");
for (i = 0; i < 2 * N - 1;)
{
printf("{%lf + ", solution[i]);
i = i + 1;
printf("%lfI},", solution[i]);
i = i + 1;
printf("\n");
}
for (i = 0; i < 2 * N * N;)
{
LHS[i] = LHS2[i];
i = i + 1;
}
for (i = 0; i < 2 * N;)
{
RHS[i] = RHS2[i];
i = i + 1;
}
solution = solveUsingLUD(N,LHS,RHS);
printf("Solution3 = \n{");
for (i = 0; i < 2 * N - 1;)
{
printf("{%lf + ", solution[i]);
i = i + 1;
printf("%lfI},", solution[i]);
i = i + 1;
printf("\n");
}
return 0;
}
I use the compile line
gcc -lm -llapack -lblas PiecesOfCprogarm.c -Wall -g
and I execute with:
./a.out 0 0 0 0 1 1 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 1 1 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0
Which gives the output
According to C, Sin(Pi/2) = 1.0000000000000000000000000 and Cos(Pi/2) = -0.0000000000000000000271051
LHS =
{{-0.000000 + 0.000000I,0.000000 + 0.000000I,0.000000 + 0.000000I,0.000000 + 0.000000I,-1.000000 + 0.000000I,-1.000000 + 0.000000I,0.000000 + 0.000000I,0.000000 + 0.000000I},
{0.000000 + 0.000000I,-0.000000 + 0.000000I,0.000000 + 0.000000I,0.000000 + 0.000000I,0.000000 + 0.000000I,-1.000000 + 0.000000I,-1.000000 + 0.000000I,-1.000000 + 0.000000I},
{0.000000 + 0.000000I,0.000000 + 0.000000I,-0.000000 + 0.000000I,0.000000 + 0.000000I,0.000000 + 0.000000I,-1.000000 + 0.000000I,-1.000000 + 0.000000I,-1.000000 + 0.000000I},
{0.000000 + 0.000000I,0.000000 + 0.000000I,0.000000 + 0.000000I,-0.000000 + -1.000000I,0.000000 + 0.000000I,0.000000 + 0.000000I,-1.000000 + 0.000000I,-1.000000 + 0.000000I},
{-1.000000 + 0.000000I,0.000000 + 0.000000I,0.000000 + 0.000000I,0.000000 + 0.000000I,0.000000 + -3.000000I,0.000000 + 0.000000I,-1.000000 + 0.000000I,-1.000000 + 0.000000I},
{-1.000000 + 0.000000I,-1.000000 + 0.000000I,-1.000000 + 0.000000I,0.000000 + 0.000000I,0.000000 + 0.000000I,-0.000000 + 0.000000I,0.000000 + 0.000000I,0.000000 + 0.000000I},
{0.000000 + 0.000000I,-1.000000 + 0.000000I,-1.000000 + 0.000000I,-1.000000 + 0.000000I,-1.000000 + 0.000000I,0.000000 + 0.000000I,-0.000000 + 0.000000I,0.000000 + 0.000000I},
{0.000000 + 0.000000I,-1.000000 + 0.000000I,-1.000000 + 0.000000I,-1.000000 + 0.000000I,-1.000000 + 0.000000I,0.000000 + 0.000000I,0.000000 + 0.000000I,-0.000000 + 0.000000I},
{}comparing LHS and LHS2
LHS difference at index 0
LHS[0] = -0.0000000000000000
LHS2[0] = 0.0000000000000000
The difference is -0.0000000000000000
LHS difference at index 18
LHS[18] = -0.0000000000000000
LHS2[18] = 0.0000000000000000
The difference is -0.0000000000000000
LHS difference at index 36
LHS[36] = -0.0000000000000000
LHS2[36] = 0.0000000000000000
The difference is -0.0000000000000000
LHS difference at index 54
LHS[54] = -0.0000000000000000
LHS2[54] = 0.0000000000000000
The difference is -0.0000000000000000
LHS difference at index 72
LHS[72] = 0.0000000000000000
LHS2[72] = -0.0000000000000000
The difference is 0.0000000000000000
LHS difference at index 90
LHS[90] = -0.0000000000000000
LHS2[90] = 0.0000000000000000
The difference is -0.0000000000000000
LHS difference at index 108
LHS[108] = -0.0000000000000000
LHS2[108] = 0.0000000000000000
The difference is -0.0000000000000000
LHS difference at index 126
LHS[126] = -0.0000000000000000
LHS2[126] = 0.0000000000000000
The difference is -0.0000000000000000
comparing RHS and RHS2
Solution =
{{1.0000000000000000 + -0.0000000000000000I},
{-1.0000000000000000 + -0.0000000000000000I},
{-0.0000000000000000 + 0.0000000000000000I},
{-0.0000000000000000 + 0.0000000000000000I},
{0.6000000000000000 + 0.2000000000000000I},
{-0.0000000000000000 + -0.0000000000000000I},
{-6854258945071195.0000000000000000 + 4042255275298396.0000000000000000I},
{6854258945071195.0000000000000000 + -4042255275298396.0000000000000000I},
ERROR: info = 3
Solution2 =
{{0.000000 + 0.000000I},
{0.000000 + 0.000000I},
{0.000000 + 0.000000I},
{0.000000 + 0.000000I},
{0.000000 + -2.000000I},
{0.000000 + 0.000000I},
{0.000000 + 0.000000I},
{0.000000 + 0.000000I},
ERROR: info = 8
Solution3 =
{{0.000000 + 0.000000I},
{0.000000 + 0.000000I},
{0.000000 + 0.000000I},
{0.000000 + 0.000000I},
{0.000000 + -2.000000I},
{0.000000 + 0.000000I},
{0.000000 + 0.000000I},
{0.000000 + 0.000000I},
You have some syntactical errors in your code (unmatched parentheses and quotes) but I suppose those slipped in when you copied the code here.
My guess is that the following could cause the problem:
solveSystemByLUD(int N, double *LHS, double* RHS, ...)
{
...
}
This declares a function that returns int
but you are returning double *
. Add the return type and see if anything changes.
EDIT:
After your clarification there are still some flaws - the code still cannot compile because of this line:
* SUBROUTINE ZGESV( N, Nrhs, A, LDA, IPIV, B, LDB, INFO ) Solves A * X = B and stores the answer in B. If the solver worked, INFO is set to 0.
*/
There is an opening comment missing.
Also you are assigning results of sin() and cos() to integer variables (C and S are integers by default):
S = sin(kal);
C = cos(kal);
Judging from your code this is not what you want.
The last thing to notice is that in LHS2
the last element is missing (sizeof(LHS2)/sizeof(double)
is 127 instead of 2 * 8 * 8
= 128). This means you are reading and writing beyond the end of an array which causes undefined behavior and might cause the problems you see.
EDIT 2:
One more thing: you are reading argv[0]
in the function getA() which is always a path to the executable. You should start reading at argv[1]
. And to be safe you should check if the user supplied enough arguments (argc - 1 >= N * N
).
First off: I come from the Fortran side of it and I have no C experience, hence what I say may or may not be relevant.
BLAS/LAPACK routines expect to receive Fortran arrays--- essentially an address of a first element of a contiguous chunk of memory. The array layout is assumed to be column-major and the size is controlled by LDA. Essentially no checks are made, so if what you're sending in does not conform to these expectations, all hell breaks loose.
What I've found from experience (or rather --- saw somebody doing it and copied it) calling LAPACK routines from C++ is that the following works: if you use an std::vector<double>
to store your matrices, and invoke the LAPACK calls using &A[0]
(where A is an std::vector<double>
), it works. My (maybe all too naive) understanding is that the standard library vector is "a sort of like" C array, so this, I guess, can be directly translated into C.
Also, you are using complex routines, which may or may not change something in subtle ways. I would guess that Fortran's COMPLEX*16 is equivalent to struct{double real_part; double imag_part;}
Finally, the reference implementation of LAPACK routines is readily available from netlib, here http://www.netlib.org/lapack/complex16/zgesv.f
PI
is not exactly representable as a double
. Therefore neither is PI/2
.
Therefore sin(kal)
and cos(kal)
are not necessarily equal to 1 or 0, respectively.
(If you are trying to fix this by assigning them to "int", remember that C typically rounds down when doing such a conversion.)
[edit]
Actually I have a better suggestion...
When compiling Fortran code with GCC, you probably want to use -ffloat-store
. Fortran code is often written with careful attention to the quantization of double-precision numbers... But by default, intermediate values on x86 can have extra precision (because the floating-point unit uses 80 bits internally). Normally, the extra precision only helps, but for some numerically-sensitive code (like, say, sin(PI/2)), it can cause unexpected results.
-ffloat-store
avoids this extra precision.
[edit 2, in response to comments]
To get better precision from sin and cos, I suggest the following.
First, declare kal
as long double
, both in main
and in the argument lists to the other functions.
Second, invoke sinl()
and cosl()
instead of sin()
and cos()
.
Third, define PI like this:
#define PI 3.1415926535897932384626433832795029L
Leave everything else (especially S
and C
) as double
. See if it helps...
Here's another guess:
I wonder if inside of generateLHS()
and/or generateRHS()
you're generating double
values with very small errors that show as zero (or at least zero in the fractional part) when you display them with printf()
, but the differences affect the calcuations in zgesv_()
.
Can you try adding these loops to your test run just after the declarations of LHS2
and RHS2
:
printf("comparing LHS and LHS2\n");
for (i = 0; i < 2 * N * N;)
{
if (LHS[i] != LHS2[i]) {
printf( "LHS difference at index %d\n", i);
}
i = i + 1;
}
printf("\n");
printf("comparing RHS and RHS2\n");
for (i = 0; i < 2 * N;)
{
if (RHS[i] != RHS2[i]) {
printf( "RHS difference at index %d\n", i);
}
i = i + 1;
}
printf("\n");
精彩评论