On Sat, 24 Mar 2007, Jeff Squyres wrote:
> FWIW, I do see one possible problem area: is there a reason you're
> looping over calling MPI_WAIT instead of calling MPI_WAITALL? This
> is a potential area for deadlock if requests are being completed out
> of order. It seems unlikely given the code above (that you're just
> blocking on WAITs and not initiating any other messages), but I
> figured I'd raise the point anyway. ...
RLN: This is simply my ignorance. When I took an existing blocking
subroutine and modified it to form the nonblocking routine, I didn't
realize that a MPI_WAITALL call existed. The code has been modified.
> FWIW, I usually tell users to avoid MPI_PROBE. It almost always
> forces the MPI to perform an additional copy. It looks like you're
> deciding what to do based on the tag, so you could simply post some
> non-blocking receives on the tags and/or peers that you want.
> ...
RLN: I will look into this; I was unaware of the additional copy
associate with MPI_PROBE when I was considering my alternatives for
building the receive routine.
> This is your problem. You are deallocating the X_value buffer before
> the send has [potentially] completed. So if MPI doesn't finished
> sending before MPI_ISEND completes, when MPI goes to complete the
> send in MPI_WAIT, the buffer is now potentially pointing to garbage
> in memory (which MPI will dutifully send -- if it doesn't seg fault!).
>
> Once you have started a non-blocking MPI action, you must wait for
> that action to complete before you release any resources associated
> with that action.
>
> I'm guessing that having proper deallocation will fix your problem.
>
RLN: My assumtion here had been that MPI_ISEND created its own buffer
to hold the send information, and that I was free to deallocate this
array after the call to MPI_ISEND; I can see now that this wasn't an
appropriate assumption. I have modified the vec_isend routine so that
there is an X_value_i array associated with each MPI_ISEND as
follows:
*******************************************************************
subroutine vec_isend(X, communicator, lag, start, step)
! ... Send segment of X array, corresponding to C_{i,j} connectors,
! ... to surrounding partitions.
! ... nonblocking version
! ...
! ... argument list
! ...
integer, intent(in) :: communicator, lag, start, step
real(kind=kv), dimension(:), intent(in) :: X
! ...
! ... local variables
! ...
integer :: ierr, error
integer :: i, process_no
integer :: status(MPI_STATUS_SIZE)
character(len=64) :: err_loc_message
! ...
! ...................................................................
! ...
ierr=0; req_send=MPI_REQUEST_NULL
nullify (X_value_1, X_value_2, X_value_3, X_value_4, X_value_5, X_value_6)
do i=start,6,step
process_no=p_adj(i)
! ... i indicates partition face
if (process_no<1) cycle
! ...
! ... Array X_value filled in subroutine load_send.
! ...
Select case(i)
case(1)
allocate(X_value_1(1:CC_size(1)), stat=error)
call load_send(X_value_1, index_1, CC_size(1))
call MPI_ISEND(X_value_1, CC_size(1), MPI_DOUBLE_PRECISION, &
process_no-lag, CC_size(1), communicator, req_send(1), ierr)
err_loc_message='SR_utilities_child vec_isend MPI_ISEND 1'
call error_class(communicator, ierr, err_loc_message)
case(2)
allocate(X_value_2(1:CC_size(1)), stat=error)
call load_send(X_value_2, index_2, CC_size(1))
call MPI_ISEND(X_value_2, CC_size(1), MPI_DOUBLE_PRECISION, &
process_no-lag, CC_size(1), communicator, req_send(2), ierr)
err_loc_message='SR_utilities_child vec_isend MPI_ISEND 2'
call error_class(communicator, ierr, err_loc_message)
case(3)
allocate(X_value_3(1:CC_size(2)), stat=error)
call load_send(X_value_3, index_3, CC_size(2))
call MPI_ISEND(X_value_3, CC_size(2), MPI_DOUBLE_PRECISION, &
process_no-lag, CC_size(2), communicator, req_send(3), ierr)
err_loc_message='SR_utilities_child vec_isend MPI_ISEND 3'
call error_class(communicator, ierr, err_loc_message)
case(4)
allocate(X_value_4(1:CC_size(2)), stat=error)
call load_send(X_value_4, index_4, CC_size(2))
call MPI_ISEND(X_value_4, CC_size(2), MPI_DOUBLE_PRECISION, &
process_no-lag, CC_size(2), communicator, req_send(4), ierr)
err_loc_message='SR_utilities_child vec_isend MPI_ISEND 4'
call error_class(communicator, ierr, err_loc_message)
case(5)
allocate(X_value_5(1:CC_size(3)), stat=error)
call load_send(X_value_5, index_5, CC_size(3))
call MPI_ISEND(X_value_5, CC_size(3), MPI_DOUBLE_PRECISION, &
process_no-lag, CC_size(3), communicator, req_send(5), ierr)
err_loc_message='SR_utilities_child vec_isend MPI_ISEND 5'
call error_class(communicator, ierr, err_loc_message)
case(6)
allocate(X_value_6(1:CC_size(3)), stat=error)
call load_send(X_value_6, index_6, CC_size(3))
call MPI_ISEND(X_value_6, CC_size(3), MPI_DOUBLE_PRECISION, &
process_no-lag, CC_size(3), communicator, req_send(6), ierr)
err_loc_message='SR_utilities_child vec_isend MPI_ISEND 6'
call error_class(communicator, ierr, err_loc_message)
end select
! ...
enddo
! ...
contains
subroutine load_send(X_value, index_fn, loop_size)
! ... Extract segment from X array; store in X_value array.
! ... index_f: function for cell face ording indicator
real(kind=kv), dimension(:), intent(out) :: X_value
integer, intent(in) :: loop_size
integer, external :: index_fn
integer :: i
! ...
do i=1, loop_size
X_value(i)=X(index_fn(i))
enddo
end subroutine load_send
end subroutine vec_isend
************************************************************************
These arrays are now deallocated in the AC_multiply subroutine after
the call to MPI_WAITALL as follows:
***********************************************************************
subroutine AC_multiply(X, rhs)
! ... matrix-vector multiply: A*X=rhs
! ... Find rhs=A*X, where A is the partitioned matrix.
! ... Each process corresponds to a partition of matrix A.
! ...
! ... argument list
! ...
real(kind=kv), dimension(:), intent(in) :: X
real(kind=kv), dimension(:), intent(out) :: rhs
! ...
! ... local variables: none
! ...
integer :: i, ierr
integer :: status(MPI_STATUS_SIZE)
! ........................................................request......
stride=nx; reach=ny; top=nz
! ...
! ... Nonblocking sends; does not function with LAM 7.1.3 ?
! ...
! xxx if (no_partitions>1) call vec_isend(X, part_intracomm, 1, 1, 1)
if (no_partitions>1) call vec_isend(X, cp_intracomm, 0, 1, 1)
! ...
! ... Multiply partition by vector X corresponding to partition nodes.
! ...
call a_multiply(X, rhs)
! ...
! ... Receive X vector info from black partitions
! ...
if (no_partitions>1) then
! xxx call vec_fnc_recv(recv_prod_1, rhs, part_intracomm, 1, 1, 1)
call vec_fnc_recv(recv_prod_1, rhs, cp_intracomm, 0, 1, 1)
! ...
call MPI_WAITALL(6, req_send, status, ierr)
if (associated(X_value_1)) deallocate (X_value_1)
if (associated(X_value_2)) deallocate (X_value_2)
if (associated(X_value_3)) deallocate (X_value_3)
if (associated(X_value_4)) deallocate (X_value_4)
if (associated(X_value_5)) deallocate (X_value_5)
if (associated(X_value_6)) deallocate (X_value_6)
endif
! ...
end subroutine AC_multiply
*************************************************************************
(Here, I have chosen to use my other communicator (cp_intracomm) so
that I don't have to deal with any offsets (lag) in in the process
numbers.)
Unfortunately, the problem presists; with LAM 7.1.3 the sent vector
section does not correspond to the received vector section. A sample
printout for a simple 4 processsor problem (plus a master process)
follows:
***************************************************************************
A print statement in process 1 indicated the following vector section
was sent to process 4:
isend to process: 4 X= 9.819079043969328 20.03739327293852 2.937165521825665 9.672045129594361 32.51248029517219 3.703790279916702 26.91231674939189 4.919285744412965 34.53091848255578
However, print statements in process 4 indicates that the following vector section was received from process 1:
Probe from sender: 1 process_no= 1
Recv from sender: 1 p_adj(j)= 1 X= 23.92983122355254 24.90961832683145 11.47213807665227 34.14693740318928 18.33479365003256 3.294915109537070 26.07733221657863 29.58106493204133 1.731450359495497
****************************************************************************
The same print in an OpenMPI run indicates the correct vector section
is received. If you folks have any other suggestions, I would be
appreciative
-- Rich Naff
> On Mar 23, 2007, at 6:43 PM, Rich Naff wrote:
>
>> I have a problem which runs nicely with OpenMPI, LAM 7.0.5 and
>> Suns's LAM 6.3-b3, but fails on LAM 7.1.3. This doesn't rule out the
>> possibility that a programming error is involved, but does make LAM
>> 7.1.3 somewhat more suspect. In any case, I have been chasing this
>> problem for about a week now and appreciate some help. The problem
>> concerns my subroutine AC_multiply which, in its present configuration
>> has the form:
>>
>> **********************************************************************
>> ****
>> subroutine AC_multiply(X, rhs)
>> ! ... matrix-vector multiply: A*X=rhs
>> ! ... Find rhs=A*X, where A is the partitioned matrix.
>> ! ... Each process corresponds to a partition of matrix A.
>> ! ...
>> ! ... argument list
>> ! ...
>> real(kind=kv), dimension(:), intent(in) :: X
>> real(kind=kv), dimension(:), intent(out) :: rhs
>> ! ...
>> ! ... local variables: none
>> ! ...
>> integer :: i, ierr
>> integer :: status(MPI_STATUS_SIZE)
>> ! ..............................................................
>> stride=nx; reach=ny; top=nz
>> ! ...
>> ! ... Nonblocking sends; does not function with LAM 7.1.3 ?
>> ! ...
>> if (no_partitions>1) call vec_isend(X, part_intracomm, 1, 1, 1)
>> ! ...
>> ! ... Multiply partition by vector X corresponding to partition
>> nodes.
>> ! ...
>> call a_multiply(X, rhs)
>> ! ...
>> ! ... Receive X vector info from black partitions
>> ! ...
>> if (no_partitions>1) then
>> call vec_fnc_recv(recv_prod_1, rhs, part_intracomm, 1, 1, 1)
>> ! ...
>> do i=1, 6
>> call MPI_WAIT(request(i), status, ierr)
>> enddo
>> endif
>> ! ...
>> end subroutine AC_multiply
>> **********************************************************************
>> *****
>
> FWIW, I do see one possible problem area: is there a reason you're
> looping over calling MPI_WAIT instead of calling MPI_WAITALL? This
> is a potential area for deadlock if requests are being completed out
> of order. It seems unlikely given the code above (that you're just
> blocking on WAITs and not initiating any other messages), but I
> figured I'd raise the point anyway.
>
> It's been a long time since I looked at the LAM progression engine
> code, but it *may* be blocking on a single request and not trying to
> progress any other messages when you use the singular MPI_WAIT
> function (Vs. the vector MPI_WAITALL function). I have a dim
> recollection that there are cases where this *could* happen -- I
> don't think it always happens that way. I could also be wrong. :-)
> (btw, if LAM does this, it is perfectly legal to do so -- there is
> nothing in the MPI spec that says that MPI_WAIT has to make progress
> on other pending requests)
>
> Regardless, it's probably more efficient to simply call MPI_WAITALL
> and give it all 6 requests and let the MPI library sort out the
> completion order.
>
>> As the name implies, this is a vector/matrix multiply subroutine where
>> the matrix has been partitioned into blocks based on a CCFD
>> representation. The interior of the blocks, where the bulk of the
>> work is done, uses a general-purpose vector/matrix multiply scheme for
>> compressed diagonal storage: subroutine a_multiply(X, rhs). The
>> various block vector/matrix multiplies obtained from subroutine
>> a_multiply are tied together by the passing of appropriate sections of
>> the X vector to processes representing surrounding blocks; this is
>> done with the call to subroutine vec_isend, where up to six X-vector
>> sections can be passed to processes representing surrounding blocks.
>> The current process also looks to receive up to six X-vector sections
>> from surrounding blocks (processes); this is done in subroutine
>> vec_fnc_recv where upon these sections are multiplied by appropriate
>> coefficients and added to the RHS vector. In this manner, a complete
>> block-by-block vector/matrix multiply is carried out. I have chosen
>> to use non-blocking sends (MPI_isend) to send the small X-vector
>> sections in subroutine vec_isend; this seems logical as blocking sends
>> could lead to a deadlock. Integer request flags associated with the
>> non-blocking sends are cleared subsequent to the call to subroutine
>> vec_fnc_send. When an X-vector section is received in subroutine
>> vec_fnc_send, the sending process is first identified; knowing the
>> sending process allows the receiving process to form the appropriate
>> addition to the RHS vector.
>>
>> My problem arises in that X-vector sections received are not always
>> the
>> correct X-vector sections. This could be a problem of the receive
>> construct:
>>
>> **********************************************************************
>> *******
>> subroutine vec_fnc_recv(load_fnc, r_vec, communicator, lag,
>> start, step)
>> ! ... Receive R_value array corresponding to entries in C_{i,j};
>> ! ... apply load_fnc to R_value and add to R.
>> ! ... step=1: accept R_value array from all adjoining partitions.
>> ! ... step=2: accept R_value array from alternate partitions
>> ! ... start=1: lower faces or all.
>> ! ... start=2: upper faces, only step=2
>> ! ... if (communicator==cp_intracomm) lag=0
>> ! ... if (communicator==part_intracomm) lag=1
>> ! ... R, R_value and part_con passed via module
>> ! ... i_val: upon return, has value of 1; debugging purposes only
>> ! ...
>> ! ... load_fnc specified by calling program;
>> ! ... see RECEIVE FUNCTIONS below.
>> ! ...
>> ! ... argument list
>> ! ...
>> integer, external :: load_fnc
>> integer, intent(in) :: communicator, lag, start, step
>> real(kind=kv), dimension(:), intent(inout), target :: r_vec
>> ! ...
>> ! ... local variables
>> ! ...
>> integer :: ierr, error
>> integer :: i, j, tag, sender, process_no, i_val
>> integer :: status(MPI_STATUS_SIZE)
>> character(len=64) :: err_loc_message
>> ! ...
>> ! ................................................................
>> ...
>> ! ...
>> nullify (part_con, R_value)
>> R=>r_vec
>> ierr=0; i_val=-1
>> err_loc_message='SR_utilities_child vec_fnc_recv 1'
>> do i=start,6,step
>> process_no=p_adj(i)
>> ! ... i indicates partition face
>> if (process_no<1) cycle
>> CALL MPI_PROBE(MPI_ANY_SOURCE, MPI_ANY_TAG, communicator, &
>> status, ierr)
>> sender=status(MPI_SOURCE)
>> tag=status(MPI_TAG)
>
> FWIW, I usually tell users to avoid MPI_PROBE. It almost always
> forces the MPI to perform an additional copy. It looks like you're
> deciding what to do based on the tag, so you could simply post some
> non-blocking receives on the tags and/or peers that you want.
>
> For zero length / short messages, the performance difference is
> probably negligible, but I typically avoid PROBEs like the plague
> simply because I view them as ugly. :-)
>
> All that being said, I don't see a deadlock problem with what you've
> done here.
>
>> if (tag==0) then
>> ! ... no content in array
>> call MPI_RECV(MPI_BOTTOM, 0, MPI_INTEGER, sender, &
>> tag, communicator, status, ierr)
>> write(unit=interim_print,fmt=*) "child: SR_utilities p=", &
>> process_id," LOOP i=", i," sender=",sender," empty"
>> else
>> ! ... Size of array d_value returned in tag.
>> allocate(R_value(1:tag), stat=error)
>> call MPI_RECV(R_value, tag, MPI_DOUBLE_PRECISION, sender, &
>> tag, communicator, status, ierr)
>> call error_class(communicator, ierr, err_loc_message)
>> ! ...
>> ! ... Add R_value to R using load_fnc
>> ! ...
>> do j=start,6,step
>> if (p_adj(j)/=sender+lag) cycle
>> ! ... j indicates partition face
>> Select case(j)
>> case(1)
>> part_con=>part_con_1l
>> i_val=load_fnc(index_1, tag)
>> case(2)
>> part_con=>part_con_1u
>> i_val=load_fnc(index_2, tag)
>> case(3)
>> part_con=>part_con_2l
>> i_val=load_fnc(index_3, tag)
>> case(4)
>> part_con=>part_con_2u
>> i_val=load_fnc(index_4, tag)
>> case(5)
>> part_con=>part_con_3l
>> i_val=load_fnc(index_5, tag)
>> case(6)
>> part_con=>part_con_3u
>> i_val=load_fnc(index_6, tag)
>> end select
>> nullify (part_con)
>> exit
>> enddo
>> endif
>> ! ...
>> deallocate (R_value)
>> ! ...
>> enddo
>> nullify (R)
>> ! ...
>> end subroutine vec_fnc_recv
>>
>> function recv_prod_1(index_fn, loop_size)
>> ! ... index_f: function for cell face ording indicator
>> integer :: recv_prod_1
>> integer, intent(in) :: loop_size
>> integer, external :: index_fn
>> integer :: i, ii
>> ! ...
>> recv_prod_1=0
>> do i=1,loop_size
>> ii=index_fn(i)
>> R(ii)=R(ii)+part_con(i)*R_value(i)
>> enddo
>> recv_prod_1=1
>> end function recv_prod_1
>> **********************************************************************
>> ******
>>
>> Through the array p_adj(i), each process knows the identification of
>> its neighboring processes (blocks). By checking the
>> sender=status(MPI_SOURCE) versus the values entered in p_adj(i) (give
>> or take an adjustment for communicator differences), the originating
>> process of the entering X-array section is identified. What has
>> occurred to me is that there is some incompatability between using a
>> non-blocking send and the use of MPI_PROBE to identify the souce
>> process on the receiving end.
>
> Nope -- this should be fine. You can mix any flavor of send with any
> flavor of receive or probe.
>
>> For it is evident, from my debugging,
>> that the array being received with the call to MPI_RECV(R_value ...)
>> is not identically the array being sent by that process, even though
>> status(MPI_SOURCE) has identified the source process.
>
> Hmm; that's odd. Since you're pulling the source and length from the
> status (it looks like you are doubling the use of the tag as the
> length of the message as well -- clever), I don't see how that could
> happen.
>
> So if you print out the array on the sender, you're saying that the
> array received on the target doesn't match what was sent?
>
> Aaahhh.... looking further in your message I think I see the
> problem. Look below.
>
>> So either I
>> have a logic error of some sort at this point, or LAM 7.1.3 has a
>> problem. It should be noted that I also have a blocking version of
>> the subroutine vec_isend (vec_send), which uses standard MPI_send's,
>> and works perfectly in this case. I could use this version to send
>> the X-array sections in the AC_multiply subroutine above, but that
>> puts me in the uncomfortable position of starting all process with
>> blocking sends, which could leand to a deadlock. In this particular
>> case, I haven't actually seen a deadlock resulting from the use of
>> blocking sends, but I am newby enough to be wary. The subroutine
>> vec_isend follows; I am attaching, with separate files, information
>> concerning the LAM installation and running the code. The entire code
>> is tarred (tar czf ../vec_mat_test.gz *) and attached to this message.
>>
>> -- Rich Naff
>>
>> **********************************************************************
>> ******
>> subroutine vec_isend(X, communicator, lag, start, step)
>> ! ... Send segment of X array, corresponding to C_{i,j}
>> connectors,
>> ! ... to surrounding partitions.
>> ! ... nonblocking version
>> ! ...
>> ! ... argument list
>> ! ...
>> integer, intent(in) :: communicator, lag, start, step
>> real(kind=kv), dimension(:), intent(in) :: X
>> ! ...
>> ! ... local variables
>> ! ...
>> integer :: ierr, error
>> integer :: i, process_no
>> integer :: status(MPI_STATUS_SIZE)
>> real(kind=kv), dimension(:), pointer :: X_value
>> character(len=64) :: err_loc_message
>> ! ...
>> ! ................................................................
>> ...
>> ! ...
>> ierr=0; request=MPI_REQUEST_NULL
>> do i=start,6,step
>> process_no=p_adj(i)
>> ! ... i indicates partition face
>> if (process_no<1) cycle
>> ! ...
>> ! ... Array X_value filled in subroutine load_send.
>> ! ...
>> Select case(i)
>> case(1)
>> allocate(X_value(1:CC_size(1)), stat=error)
>> call load_send(index_1, CC_size(1))
>> call MPI_ISEND(X_value, CC_size(1), MPI_DOUBLE_PRECISION, &
>> process_no-lag, CC_size(1), communicator, request
>> (1), ierr)
>> err_loc_message='SR_utilities_child vec_isend MPI_ISEND 1'
>> call error_class(communicator, ierr, err_loc_message)
>> case(2)
>> allocate(X_value(1:CC_size(1)), stat=error)
>> call load_send(index_2, CC_size(1))
>> call MPI_ISEND(X_value, CC_size(1), MPI_DOUBLE_PRECISION, &
>> process_no-lag, CC_size(1), communicator, request
>> (2), ierr)
>> err_loc_message='SR_utilities_child vec_isend MPI_ISEND 2'
>> call error_class(communicator, ierr, err_loc_message)
>> case(3)
>> allocate(X_value(1:CC_size(2)), stat=error)
>> call load_send(index_3, CC_size(2))
>> call MPI_ISEND(X_value, CC_size(2), MPI_DOUBLE_PRECISION, &
>> process_no-lag, CC_size(2), communicator, request
>> (3), ierr)
>> err_loc_message='SR_utilities_child vec_isend MPI_ISEND 3'
>> call error_class(communicator, ierr, err_loc_message)
>> case(4)
>> allocate(X_value(1:CC_size(2)), stat=error)
>> call load_send(index_4, CC_size(2))
>> call MPI_ISEND(X_value, CC_size(2), MPI_DOUBLE_PRECISION, &
>> process_no-lag, CC_size(2), communicator, request
>> (4), ierr)
>> err_loc_message='SR_utilities_child vec_isend MPI_ISEND 4'
>> call error_class(communicator, ierr, err_loc_message)
>> case(5)
>> allocate(X_value(1:CC_size(3)), stat=error)
>> call load_send(index_5, CC_size(3))
>> call MPI_ISEND(X_value, CC_size(3), MPI_DOUBLE_PRECISION, &
>> process_no-lag, CC_size(3), communicator, request
>> (5), ierr)
>> err_loc_message='SR_utilities_child vec_isend MPI_ISEND 5'
>> call error_class(communicator, ierr, err_loc_message)
>> case(6)
>> allocate(X_value(1:CC_size(3)), stat=error)
>> call load_send(index_6, CC_size(3))
>> call MPI_ISEND(X_value, CC_size(3), MPI_DOUBLE_PRECISION, &
>> process_no-lag, CC_size(3), communicator, request
>> (6), ierr)
>> err_loc_message='SR_utilities_child vec_isend MPI_ISEND 6'
>> call error_class(communicator, ierr, err_loc_message)
>> end select
>> ! ...
>> deallocate (X_value)
>
> This is your problem. You are deallocating the X_value buffer before
> the send has [potentially] completed. So if MPI doesn't finished
> sending before MPI_ISEND completes, when MPI goes to complete the
> send in MPI_WAIT, the buffer is now potentially pointing to garbage
> in memory (which MPI will dutifully send -- if it doesn't seg fault!).
>
> Once you have started a non-blocking MPI action, you must wait for
> that action to complete before you release any resources associated
> with that action.
>
> I'm guessing that having proper deallocation will fix your problem.
>
>> ! ...
>> enddo
>> ! ...
>> contains
>>
>> subroutine load_send(index_fn, loop_size)
>> ! ... Extract segment from X array; store in X_value array.
>> ! ... index_f: function for cell face ording indicator
>> integer, intent(in) :: loop_size
>> integer, external :: index_fn
>> integer :: i
>> ! ...
>> do i=1, loop_size
>> X_value(i)=X(index_fn(i))
>> enddo
>> end subroutine load_send
>>
>> end subroutine vec_isend
>> **********************************************************************
>> *******
>>
>>
>> --
>> Rich Naff
>> U.S. Geological Survey
>> MS 413 Box 25046
>> Denver, CO 80225 USA
>> telephone: (303) 236-4986
>> Email: rlnaff_at_[hidden]
>> <vec_mat_test.gz>
>> <lam_rept.txt>
>> _______________________________________________
>> This list is archived at http://www.lam-mpi.org/MailArchives/lam/
>
>
>
--
Rich Naff
U.S. Geological Survey
MS 413 Box 25046
Denver, CO 80225 USA
telephone: (303) 236-4986
Email: rlnaff_at_[hidden]
|