/***************************************************************************
 *
 * Copyright (C) 2001 International Business Machines
 * All rights reserved.
 *
 * This file is part of the GPFS mmfslinux kernel module.
 *
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions 
 * are met:
 *
 *  1. Redistributions of source code must retain the above copyright notice, 
 *     this list of conditions and the following disclaimer. 
 *  2. Redistributions in binary form must reproduce the above copyright 
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution. 
 *  3. The name of the author may not be used to endorse or promote products 
 *     derived from this software without specific prior written
 *     permission. 
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *************************************************************************** */
/* @(#)98       1.20  src/avs/fs/mmfs/ts/kernext/ibm-linux/cxiAtomic-plat.h, mmfs, avs_rgpfs24, rgpfs240610b 6/24/05 15:57:45 */
/*
 * Platform specific synchronization/atomic operations for Linux
 *
 * Note that these should not be directly invoked; instead use the
 * ATOMIC_XXX and ATOMIC_XXXLP macros from <cxiAtomic.h>.
 *
 * Definitions for baseline atomic operations (long/pointer variants)
 *     comp_and_swap(lp)
 *
 * Definitions for atomic operations
 *     fetch_and_add(lp)
 *     fetch_and_and(lp)
 *     fetch_and_or(lp)
 *     compare_and_swap(lp)
 *     _check_lock
 *
 */

#ifndef _h_cxiAtomic_plat
#define _h_cxiAtomic_plat

#ifndef _h_cxiAtomic
#error Platform header (XXX-plat.h) should not be included directly
#endif

/* NOTE: need to further split this file into architecture specific headers. */

#include <cxiTypes.h>


/* Memory fencing operations for various architectures */

#if defined(GPFS_ARCH_POWER) || defined(GPFS_ARCH_PPC64)
#ifndef CONFIG_UP
#define IMPORT_FENCE __asm__ __volatile__ ("isync" : : )
#define EXPORT_FENCE __asm__ __volatile__ ("sync" : : )
#else
#define IMPORT_FENCE ((void)0)
#define EXPORT_FENCE ((void)0)
#endif
/* A complete fence is defined as insuring that the most recently preceding
   store is visible to all processors before any subsequent access completes
   in storage.  For PowerPC MP, the implementations of COMPLETE_FENCE and
   EXPORT_FENCE are the same. */
#define COMPLETE_FENCE EXPORT_FENCE
#endif /* GPFS_ARCH_POWER */

#ifdef GPFS_ARCH_I386
/* Memory in the I386 architecture is always consistent from all processors,
   so explicit fence instructions are not needed. */
#define IMPORT_FENCE ((void)0)
#define EXPORT_FENCE ((void)0)
#define COMPLETE_FENCE ((void)0)
#endif  /* GPFS_ARCH_I386 */

#ifdef GPFS_ARCH_IA64
/* Only full/complete memory fence available */
#define IMPORT_FENCE __asm__ __volatile__ ("mf" : : )
#define EXPORT_FENCE __asm__ __volatile__ ("mf" : : )
#define COMPLETE_FENCE __asm__ __volatile__ ("mf" : : )
#endif

#ifdef GPFS_ARCH_X86_64
#define IMPORT_FENCE __asm__ __volatile__ ("sfence":::"memory")
#define EXPORT_FENCE __asm__ __volatile__ ("mfence":::"memory")
#define COMPLETE_FENCE EXPORT_FENCE
#endif


/* Baseline atomic operation for i386: comp_and_swap */

#if defined(GPFS_ARCH_I386)

/* Compare the contents of word_addr with the contents of old_val_addr.
   If the values are equal, store new_val in word_addr and return 1.
   Otherwise, set old_val_addr to the current value of word_addr and
   return 0. See ppc64 comp_and_swaplp for details on exception table
   code . */
static inline int
comp_and_swap(volatile int *word_addr, int *old_val_addr, int new_val)
{
   unsigned char result;

   __asm__  __volatile__(
            "1: lock; cmpxchg %3,%0           \n\
             2: setz %2                       \n\
             .section .fixup, \"ax\"          \n\
             3: jmp 2b                        \n\
             .previous                        \n\
             .section __ex_table, \"a\"       \n\
             .align 4                         \n\
             .long 1b, 3b                     \n\
             .previous"

	    :"=m" (*word_addr),
	     "=a" (*old_val_addr),
	     "=&b" (result)

	    :"r" (new_val),
	     "a" (*old_val_addr)

	    :"cc",
	     "memory");
    return result;
}

#endif /* GPFS_ARCH_I386 */

/* Baseline atomic operations for x86_64: comp_and_swap and comp_and_swaplp .
   See ppc64 comp_and_swaplp for details on exception table code. */

#ifdef GPFS_ARCH_X86_64

/* Compare the contents of word_addr with the contents of old_val_addr.
   If the values are equal, store new_val in word_addr and return 1.
   Otherwise, set old_val_addr to the current value of word_addr and
   return 0. See ppc64 comp_and_swaplp for details on exception table
   code . */
static inline int
comp_and_swap(volatile int *word_addr, int *old_val_addr, int new_val)
{
   unsigned char result;

   __asm__  __volatile__(
            "lock; cmpxchg %3,%0           \n\
             setz %2"

	    :"=m" (*word_addr),
	     "=a" (*old_val_addr),
	     "=&b" (result)

	    :"r" (new_val),
	     "a" (*old_val_addr)

	    :"cc",
	     "memory");
    return result;
}

static inline int
comp_and_swaplp(volatile long *word_addr, long *old_val_addr, long new_val)
{
   char result;

   __asm__  __volatile__(
            "1: lock; cmpxchgq %3,%0          \n\
             2: setz %2                       \n\
             .section .fixup, \"ax\"          \n\
             3: jmp 2b                        \n\
             .previous                        \n\
             .section __ex_table, \"a\"       \n\
             .align 8                         \n\
             .quad 1b, 3b                     \n\
             .previous"

            :"=m" (*word_addr),
             "=a" (*old_val_addr),
             "=q" (result)

            :"r" (new_val),
             "a" (*old_val_addr)

            :"cc",
             "memory");
    return result;
}
#endif /* GPFS_ARCH_X86_64 */


/* Baseline atomic operation for power: comp_and_swap */

#if defined(GPFS_ARCH_POWER) || defined(GPFS_ARCH_PPC64)

/* Compare the contents of word_addr with the contents of old_val_addr.
   If the values are equal, store new_val in word_addr and return 1.
   Otherwise, set old_val_addr to the current value of word_addr and
   return 0. */
static inline int
comp_and_swap(volatile int *word_addr, int *old_val_addr, int new_val)
{
        int result;

        __asm__ __volatile__(
"1:     lwarx   %0,0,%4        # result = *word_addr                    \n\
        cmplw   cr0,%0,%3      # compare result to *old_val_addr        \n\
        bne-    2f             # skip to 2: if mismatch                 \n\
        stwcx.  %2,0,%4        # *word_addr = new_val                   \n\
        bne-    1b             # repeat if reservation lost             \n\
        li      %0,1           # result = 1                             \n\
        b       3f                                                      \n\
2:      stw     %0,%1          # *old_val_addr = result                 \n\
        li      %0,0           # result = 0                             \n\
3:"
        // output values
        : "=&r" (result),       // %0 return value + temp variable
          "=m" (*old_val_addr)  // %1 changed if mismatch

        // input values
        : "r" (new_val),        // %2
          "r" (*old_val_addr),  // %3
          "r" (word_addr)       // %4
        : "cr0", "memory");     // "memory" because we modify *word_addr

	return result;
}
#endif /* GPFS_ARCH_POWER */

#ifdef GPFS_ARCH_PPC64
/* This is a regular comp_and_swap function, but with an added twist.
 * In SLES9 SP1 ppc64, a patch has been added that modifies the page
 * fault handler to search the exceptions table _before_ an actual 
 * exception happens, in the course of handling a minor page fault
 * triggered by a store to a userspace address.  If the offending 
 * instruction is not found in the module exception table, the page 
 * fault will result in an Oops, even though the dereferenced address 
 * is actually OK, and would have resulted in a successful store had
 * it been given a chance to proceed.  This problems occurs in 
 * internalAcquire, where we have to do some atomic store operations on
 * the lockWord that may be located in the userspace.  To work around
 * this check, we add exception handling code to all ppc64 atomic ops.
 * This exception code does absolutely nothing (it transfers control
 * back to the instruction following the one that triggered the fault),
 * but that doesn't really matter, as we do not expect the exception
 * handling code to ever be invoked, we only want search_exception_tables()
 * not to return false.  If a bad address is passed to internalAcquire,
 * we'll get an Oops or assert before getting a chance to run any atomic
 * ops.  See LTC bugzilla 14533 for more details. */
static inline int
comp_and_swaplp(volatile long *word_addr, long *old_val_addr, long new_val)
{ 
        long result;

        __asm__ __volatile__(
"1:     ldarx   %0,0,%4        # result = *word_addr                    \n\
8:      cmpld   cr0,%0,%3      # compare result to *old_val_addr        \n\
        bne-    2f             # skip to 2: if mismatch                 \n\
4:      stdcx.  %2,0,%4        # *word_addr = new_val                   \n\
        .section .fixup, \"ax\"                                         \n\
5:      b       6f                                                      \n\
7:      b       8b                                                      \n\
        .previous                                                       \n\
        .section __ex_table, \"a\"                                      \n\
        .align 3                                                        \n\
        .llong 4b, 5b                                                   \n\
        .llong 1b, 7b                                                   \n\
        .previous                                                       \n\
6:      bne-    1b             # repeat if reservation lost             \n\
        li      %0,1           # result = 1                             \n\
        b       3f                                                      \n\
2:      std     %0,%1          # *old_val_addr = result                 \n\
        li      %0,0           # result = 0                             \n\
3:"
        // output values
        : "=&r" (result),       // %0 return value + temp variable
          "=m" (*old_val_addr)  // %1 changed if mismatch 

        // input values
        : "r" (new_val),        // %2
          "r" (*old_val_addr),  // %3
          "r" (word_addr)       // %4
        : "cr0", "memory");     // "memory" because we modify *word_addr

        return (int)result;
}
#endif


/* Baseline atomic operations for ia64: comp_and_swap and comp_and_swaplp */

/* Found the HP IA64 ISA guide very useful here:
   http://devresource.hp.com/devresource/Docs/Refs/IA64ISA/ */

#ifdef GPFS_ARCH_IA64

#define MASK_LOWER32 0x00000000FFFFFFFFULL

/* Compare the contents of word_addr with the contents of old_val_addr.
   If the values are equal, store new_val in word_addr and return 1.
   Otherwise, set old_val_addr to the current value of word_addr and
   return 0. */

/* compare and swap 4-byte halfword */
static inline int
comp_and_swap(volatile int *word_addr, int *old_val_addr, int new_val)
{
  UInt64 old_val = ((UInt64)*old_val_addr) & MASK_LOWER32;
  UInt64 ret_val;

  /* Ensure mov-to-AR[CCV] in separate instruction group/bundle from cmpxchg
     to handle RAW dependency */
  __asm__ __volatile__ ("mov ar.ccv=%0\n\
                         ;;"
                         :
                         : "rO"(old_val));
  /* Use acquire consistancy sem with cmpxchg
     (memory write visible to all subsequent data memory accesses) */
  __asm__ __volatile__ ("cmpxchg4.acq %0=[%1],%2,ar.ccv"

                        : "=r"(ret_val)

                        : "r"(word_addr),
                          "r"(new_val)

                        : "memory");

  if (ret_val == old_val)
    return 1;
  else
  {
    *old_val_addr = (int)ret_val;
    return 0;
  }
}

/* compare and swap natural 8-byte word */
static inline int
comp_and_swaplp(volatile long *word_addr, long *old_val_addr, long new_val)
{
  long ret;

  /* Ensure mov-to-AR[CCV] in separate instruction group/bundle from cmpxchg
     to handle RAW dependency */
  __asm__ __volatile__ ("mov ar.ccv=%0\n\
                        ;;"
                        :
                        : "rO"(*old_val_addr));

  /* Use acquire consistancy sem with cmpxchg
     (memory write visible to all subsequent data memory accesses) */
  __asm__ __volatile__ ("cmpxchg8.acq %0=[%1],%2,ar.ccv"

                        : "=r"(ret)

                        : "r"(word_addr),
                          "r"(new_val)

                        : "memory");

  if (ret == *old_val_addr)
    return 1;
  else
  {
    *old_val_addr = ret;
    return 0;
  }
}

#endif /* GPFS_ARCH_IA64 */


/* fetch_and_XXX and fetch_and_XXXlp operations */

/* With inlined functions we cannot use the standard trace statements, so
   for the atomic operations the USE_LOCK_TRACE must be toggled on to
   debug these operations (which fortunately shouldn't happen often). */
#undef USE_LOCK_TRACE

#ifdef USE_LOCK_TRACE
#ifdef _KERNEL
#define LOCK_TRACE printk
#else
#define LOCK_TRACE printf
#endif /* _KERNEL */
#else
#define LOCK_TRACE(X1,X2,X3,X4,X5,X6)
#endif /* USE_LOCK_TRACE */

static inline int 
fetch_and_add(atomic_p wd, int i)
{
  int ret, oldVal, newVal;
  oldVal = cxiSafeGetInt(wd);

  do
  {
    newVal = oldVal + i;
    ret = comp_and_swap((volatile int *)wd, &oldVal, newVal);

    LOCK_TRACE(
           "fetch_and_add: wd 0x%lX *wd 0x%lX old 0x%lX new 0x%lX ret %d\n",
           wd, *wd, oldVal, newVal, ret);

  } while (ret == 0);

  return oldVal;
}

#ifdef __64BIT__
static inline long
fetch_and_addlp(atomic_l wd, long i)
{
  long oldVal, newVal;
  int  ret;

  oldVal = cxiSafeGetLong(wd);

  do
  {
    newVal = oldVal + i;
    ret = comp_and_swaplp((volatile long *)wd, &oldVal, newVal);

    LOCK_TRACE(
           "fetch_and_addlp: wd 0x%lX *wd 0x%lX old 0x%lX new 0x%lX ret %d\n",
           wd, *wd, oldVal, newVal, ret);

  } while (ret == 0);

  return oldVal;
}
#endif /* __64BIT__ */

static inline int 
fetch_and_and(atomic_p wd, uint mask)
{
  int ret, oldVal,newVal;
  oldVal = cxiSafeGetInt(wd);

  do
  {
    newVal = oldVal & mask;
    ret = comp_and_swap((volatile int *)wd, &oldVal, newVal);

    LOCK_TRACE(
           "fetch_and_and: wd 0x%lX *wd 0x%lX old 0x%lX new 0x%lX ret %d\n",
           wd, *wd, oldVal, newVal, ret);

  } while (ret == 0);

  return oldVal;
}

#ifdef __64BIT__
static inline long
fetch_and_andlp(atomic_l wd, ulong  mask)
{
  long oldVal,newVal;
  int ret;
  oldVal = cxiSafeGetLong(wd);

  do
  {
    newVal = oldVal & mask;
    ret = comp_and_swaplp((volatile long *)wd, &oldVal, newVal);

    LOCK_TRACE(
           "fetch_and_andlp: wd 0x%lX *wd 0x%lX old 0x%lX new 0x%lX ret %d\n",
           wd, *wd, oldVal, newVal, ret);

  } while (ret == 0);

  return oldVal;
}
#endif /* __64BIT__ */

static inline int 
fetch_and_or(atomic_p wd, uint mask)
{
  int ret, oldVal,newVal;
  oldVal = cxiSafeGetInt(wd);

  do
  {
    newVal = oldVal | mask;
    ret = comp_and_swap((volatile int *)wd, &oldVal, newVal);

    LOCK_TRACE(
           "fetch_and_or: wd 0x%lX *wd 0x%lX old 0x%lX new 0x%lX ret %d\n",
           wd, *wd, oldVal, newVal, ret);

  } while (ret == 0);

  return oldVal;
}

#ifdef __64BIT__
static inline long
fetch_and_orlp(atomic_l wd, ulong  mask)
{
  long oldVal,newVal;
  int ret;
  oldVal = cxiSafeGetLong(wd);

  do
  {
    newVal = oldVal | mask;
    ret = comp_and_swaplp((volatile long *)wd, &oldVal, newVal);

    LOCK_TRACE(
           "fetch_and_orlp: wd 0x%lX *wd 0x%lX old 0x%lX new 0x%lX ret %d\n",
           wd, *wd, oldVal, newVal, ret);

  } while (ret == 0);

  return oldVal;
}
#endif /* __64BIT__ */

static inline Boolean 
compare_and_swap(atomic_p wd, int *oldVal, int newVal)
{
  Boolean ret;

  ret = comp_and_swap((volatile int *)wd, oldVal, newVal);

  LOCK_TRACE(
         "compare_and_swap out: wd 0x%lX *wd 0x%lX old 0x%lX "
         "new 0x%lX ret %d\n", wd, *wd, *oldVal, newVal, ret);
  return ret;
}

#ifdef __64BIT__
static inline Boolean
compare_and_swaplp(atomic_l wd, long *oldVal, long newVal)
{
  Boolean ret;

  ret = comp_and_swaplp((volatile long *)wd, oldVal, newVal);

  LOCK_TRACE(
         "compare_and_swaplp out: wd 0x%lX *wd 0x%lX old 0x%lX "
         "new 0x%lX ret %d\n", wd, *wd, *oldVal, newVal, ret);
  return ret;
}
#endif /* __64BIT__ */

static inline Boolean
_check_lock(atomic_p wd, int oldVal, int newVal)
{
    int old_val_addr = oldVal;
    Boolean  ret;

    ret = comp_and_swap((volatile int *) wd, &old_val_addr, newVal);

    LOCK_TRACE(
         "_check_lock: wd 0x%X *wd 0x%X old 0x%X new 0x%X ret %d\n",
          wd, *wd, old_val_addr, newVal, ret);

    if (ret)
    {
      IMPORT_FENCE;
      return 0;
    }
    else
      return 1;
}

#ifdef __64BIT__
static inline Boolean
_check_locklp(atomic_l wd, long oldVal, long newVal)
{
    long old_val_addr = oldVal;
    Boolean  ret;

    ret = comp_and_swaplp((volatile long *) wd, &old_val_addr, newVal);

    LOCK_TRACE(
         "_check_locklp: wd 0x%lX *wd 0x%lX old 0x%lX new 0x%lX ret %d\n",
          wd, *wd, old_val_addr, newVal, ret);

    if (ret)
    {
      IMPORT_FENCE;
      return 0;
    }
    else
      return 1;
}
#endif /* __64BIT__ */

#endif /* _h_cxiAtomic_plat */