From: Linus Torvalds Date: Sun, 20 Sep 2015 03:41:31 +0000 (-0700) Subject: Merge tag 'pm+acpi-4.3-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael... X-Git-Tag: v4.3-rc2~3 X-Git-Url: http://git.cascardo.eti.br/?a=commitdiff_plain;h=009884f384dcc71909f2b861c317da7242979f31;hp=0f40314b81b765c26202cde33523e35809adbe9b;p=cascardo%2Flinux.git Merge tag 'pm+acpi-4.3-rc2' of git://git./linux/kernel/git/rafael/linux-pm Pull power management and ACPI updates from Rafael Wysocki: "Included are: a somewhat late devfreq update which however is mostly fixes and cleanups with one new thing only (the PPMUv2 support on Exynos5433), an ACPI cpufreq driver fixup and two ACPI core cleanups related to preprocessor directives. Specifics: - Fix a memory allocation size in the devfreq core (Xiaolong Ye). - Fix a mistake in the exynos-ppmu DT binding (Javier Martinez Canillas). - Add support for PPMUv2 ((Platform Performance Monitoring Unit version 2.0) on the Exynos5433 SoCs (Chanwoo Choi). - Fix a type casting bug in the Exynos PPMU code (MyungJoo Ham). - Assorted devfreq code cleanups and optimizations (Javi Merino, MyungJoo Ham, Viresh Kumar). - Fix up the ACPI cpufreq driver to use a more lightweight way to get to its private data in the ->get() callback (Rafael J Wysocki). - Fix a CONFIG_ prefix bug in one of the ACPI drivers and make the ACPI subsystem use IS_ENABLED() instead of #ifdefs in function bodies (Sudeep Holla)" * tag 'pm+acpi-4.3-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: cpufreq: acpi-cpufreq: Use cpufreq_cpu_get_raw() in ->get() ACPI: Eliminate CONFIG_.*{, _MODULE} #ifdef in favor of IS_ENABLED() ACPI: int340x_thermal: add missing CONFIG_ prefix PM / devfreq: Fix incorrect type issue. PM / devfreq: tegra: Update governor to use devfreq_update_stats() PM / devfreq: comments for get_dev_status usage updated PM / devfreq: drop comment about thermal setting max_freq PM / devfreq: cache the last call to get_dev_status() PM / devfreq: Drop unlikely before IS_ERR(_OR_NULL) PM / devfreq: exynos-ppmu: bit-wise operation bugfix. PM / devfreq: exynos-ppmu: Update documentation to support PPMUv2 PM / devfreq: exynos-ppmu: Add the support of PPMUv2 for Exynos5433 PM / devfreq: event: Remove incorrect property in exynos-ppmu DT binding --- diff --git a/Documentation/gpio/board.txt b/Documentation/gpio/board.txt index b80606de545a..f59c43b6411b 100644 --- a/Documentation/gpio/board.txt +++ b/Documentation/gpio/board.txt @@ -21,8 +21,8 @@ exact way to do it depends on the GPIO controller providing the GPIOs, see the device tree bindings for your controller. GPIOs mappings are defined in the consumer device's node, in a property named --gpios, where is the function the driver will request -through gpiod_get(). For example: +either -gpios or -gpio, where is the function +the driver will request through gpiod_get(). For example: foo_device { compatible = "acme,foo"; @@ -31,7 +31,7 @@ through gpiod_get(). For example: <&gpio 16 GPIO_ACTIVE_HIGH>, /* green */ <&gpio 17 GPIO_ACTIVE_HIGH>; /* blue */ - power-gpios = <&gpio 1 GPIO_ACTIVE_LOW>; + power-gpio = <&gpio 1 GPIO_ACTIVE_LOW>; }; This property will make GPIOs 15, 16 and 17 available to the driver under the @@ -39,15 +39,24 @@ This property will make GPIOs 15, 16 and 17 available to the driver under the struct gpio_desc *red, *green, *blue, *power; - red = gpiod_get_index(dev, "led", 0); - green = gpiod_get_index(dev, "led", 1); - blue = gpiod_get_index(dev, "led", 2); + red = gpiod_get_index(dev, "led", 0, GPIOD_OUT_HIGH); + green = gpiod_get_index(dev, "led", 1, GPIOD_OUT_HIGH); + blue = gpiod_get_index(dev, "led", 2, GPIOD_OUT_HIGH); - power = gpiod_get(dev, "power"); + power = gpiod_get(dev, "power", GPIOD_OUT_HIGH); The led GPIOs will be active-high, while the power GPIO will be active-low (i.e. gpiod_is_active_low(power) will be true). +The second parameter of the gpiod_get() functions, the con_id string, has to be +the -prefix of the GPIO suffixes ("gpios" or "gpio", automatically +looked up by the gpiod functions internally) used in the device tree. With above +"led-gpios" example, use the prefix without the "-" as con_id parameter: "led". + +Internally, the GPIO subsystem prefixes the GPIO suffix ("gpios" or "gpio") +with the string passed in con_id to get the resulting string +(snprintf(... "%s-%s", con_id, gpio_suffixes[]). + ACPI ---- ACPI also supports function names for GPIOs in a similar fashion to DT. @@ -142,13 +151,14 @@ The driver controlling "foo.0" will then be able to obtain its GPIOs as follows: struct gpio_desc *red, *green, *blue, *power; - red = gpiod_get_index(dev, "led", 0); - green = gpiod_get_index(dev, "led", 1); - blue = gpiod_get_index(dev, "led", 2); + red = gpiod_get_index(dev, "led", 0, GPIOD_OUT_HIGH); + green = gpiod_get_index(dev, "led", 1, GPIOD_OUT_HIGH); + blue = gpiod_get_index(dev, "led", 2, GPIOD_OUT_HIGH); - power = gpiod_get(dev, "power"); - gpiod_direction_output(power, 1); + power = gpiod_get(dev, "power", GPIOD_OUT_HIGH); -Since the "power" GPIO is mapped as active-low, its actual signal will be 0 -after this code. Contrary to the legacy integer GPIO interface, the active-low -property is handled during mapping and is thus transparent to GPIO consumers. +Since the "led" GPIOs are mapped as active-high, this example will switch their +signals to 1, i.e. enabling the LEDs. And for the "power" GPIO, which is mapped +as active-low, its actual signal will be 0 after this code. Contrary to the legacy +integer GPIO interface, the active-low property is handled during mapping and is +thus transparent to GPIO consumers. diff --git a/Documentation/gpio/consumer.txt b/Documentation/gpio/consumer.txt index a206639454ab..e000502fde20 100644 --- a/Documentation/gpio/consumer.txt +++ b/Documentation/gpio/consumer.txt @@ -39,6 +39,9 @@ device that displays digits), an additional index argument can be specified: const char *con_id, unsigned int idx, enum gpiod_flags flags) +For a more detailed description of the con_id parameter in the DeviceTree case +see Documentation/gpio/board.txt + The flags parameter is used to optionally specify a direction and initial value for the GPIO. Values can be: diff --git a/Documentation/hwmon/nct6775 b/Documentation/hwmon/nct6775 index f0dd3d2fec96..76add4c9cd68 100644 --- a/Documentation/hwmon/nct6775 +++ b/Documentation/hwmon/nct6775 @@ -32,6 +32,10 @@ Supported chips: Prefix: 'nct6792' Addresses scanned: ISA address retrieved from Super I/O registers Datasheet: Available from Nuvoton upon request + * Nuvoton NCT6793D + Prefix: 'nct6793' + Addresses scanned: ISA address retrieved from Super I/O registers + Datasheet: Available from Nuvoton upon request Authors: Guenter Roeck diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt index f4cb0b2d5cd7..477927becacb 100644 --- a/Documentation/static-keys.txt +++ b/Documentation/static-keys.txt @@ -15,8 +15,8 @@ The updated API replacements are: DEFINE_STATIC_KEY_TRUE(key); DEFINE_STATIC_KEY_FALSE(key); -static_key_likely() -statick_key_unlikely() +static_branch_likely() +static_branch_unlikely() 0) Abstract diff --git a/MAINTAINERS b/MAINTAINERS index 7ba7ab749c85..274f85405584 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6452,11 +6452,11 @@ F: drivers/hwmon/ltc4261.c LTP (Linux Test Project) M: Mike Frysinger M: Cyril Hrubis -M: Wanlong Gao +M: Wanlong Gao M: Jan Stancek M: Stanislav Kholmanskikh M: Alexey Kodanev -L: ltp-list@lists.sourceforge.net (subscribers-only) +L: ltp@lists.linux.it (subscribers-only) W: http://linux-test-project.github.io/ T: git git://github.com/linux-test-project/ltp.git S: Maintained diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index f05bdb4b1cb9..ff4049155c84 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -297,7 +297,9 @@ static inline void __iomem * ioremap_nocache(unsigned long offset, unsigned long size) { return ioremap(offset, size); -} +} + +#define ioremap_uc ioremap_nocache static inline void iounmap(volatile void __iomem *addr) { diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index 2804648c8ff4..2d6efcff3bf3 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -117,6 +117,6 @@ handle_irq(int irq) } irq_enter(); - generic_handle_irq_desc(irq, desc); + generic_handle_irq_desc(desc); irq_exit(); } diff --git a/arch/alpha/lib/udelay.c b/arch/alpha/lib/udelay.c index 69d52aa37bae..f2d81ff38aa6 100644 --- a/arch/alpha/lib/udelay.c +++ b/arch/alpha/lib/udelay.c @@ -30,6 +30,7 @@ __delay(int loops) " bgt %0,1b" : "=&r" (tmp), "=r" (loops) : "1"(loops)); } +EXPORT_SYMBOL(__delay); #ifdef CONFIG_SMP #define LPJ cpu_data[smp_processor_id()].loops_per_jiffy diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index d9e44b62df05..4ffd1855f1bd 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -252,7 +252,7 @@ static struct irq_chip idu_irq_chip = { static int idu_first_irq; -static void idu_cascade_isr(unsigned int __core_irq, struct irq_desc *desc) +static void idu_cascade_isr(struct irq_desc *desc) { struct irq_domain *domain = irq_desc_get_handler_data(desc); unsigned int core_irq = irq_desc_get_irq(desc); diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 7451b447cc2d..2c2b28ee4811 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -54,6 +54,14 @@ AS += -EL LD += -EL endif +# +# The Scalar Replacement of Aggregates (SRA) optimization pass in GCC 4.9 and +# later may result in code being generated that handles signed short and signed +# char struct members incorrectly. So disable it. +# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65932) +# +KBUILD_CFLAGS += $(call cc-option,-fno-ipa-sra) + # This selects which instruction set is used. # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes diff --git a/arch/arm/common/it8152.c b/arch/arm/common/it8152.c index 96dabcb6c621..996aed3b4eee 100644 --- a/arch/arm/common/it8152.c +++ b/arch/arm/common/it8152.c @@ -95,7 +95,7 @@ void it8152_init_irq(void) } } -void it8152_irq_demux(unsigned int irq, struct irq_desc *desc) +void it8152_irq_demux(struct irq_desc *desc) { int bits_pd, bits_lp, bits_ld; int i; diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c index 304adea4bc52..0e97b4b871f9 100644 --- a/arch/arm/common/locomo.c +++ b/arch/arm/common/locomo.c @@ -138,7 +138,7 @@ static struct locomo_dev_info locomo_devices[] = { }, }; -static void locomo_handler(unsigned int __irq, struct irq_desc *desc) +static void locomo_handler(struct irq_desc *desc) { struct locomo *lchip = irq_desc_get_chip_data(desc); int req, i; diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 4f290250fa93..3d224941b541 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -196,10 +196,8 @@ static struct sa1111_dev_info sa1111_devices[] = { * active IRQs causes the interrupt output to pulse, the upper levels * will call us again if there are more interrupts to process. */ -static void -sa1111_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void sa1111_irq_handler(struct irq_desc *desc) { - unsigned int irq = irq_desc_get_irq(desc); unsigned int stat0, stat1, i; struct sa1111 *sachip = irq_desc_get_handler_data(desc); void __iomem *mapbase = sachip->base + SA1111_INTC; @@ -214,7 +212,7 @@ sa1111_irq_handler(unsigned int __irq, struct irq_desc *desc) sa1111_writel(stat1, mapbase + SA1111_INTSTATCLR1); if (stat0 == 0 && stat1 == 0) { - do_bad_IRQ(irq, desc); + do_bad_IRQ(desc); return; } diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 7bbf325a4f31..b2bc8e11471d 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -491,11 +491,6 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #endif .endm - .macro uaccess_save_and_disable, tmp - uaccess_save \tmp - uaccess_disable \tmp - .endm - .irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo .macro ret\c, reg #if __LINUX_ARM_ARCH__ < 6 diff --git a/arch/arm/include/asm/bug.h b/arch/arm/include/asm/bug.h index b274bde24905..e7335a92144e 100644 --- a/arch/arm/include/asm/bug.h +++ b/arch/arm/include/asm/bug.h @@ -40,6 +40,7 @@ do { \ "2:\t.asciz " #__file "\n" \ ".popsection\n" \ ".pushsection __bug_table,\"a\"\n" \ + ".align 2\n" \ "3:\t.word 1b, 2b\n" \ "\t.hword " #__line ", 0\n" \ ".popsection"); \ diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index e878129f2fee..fc8ba1663601 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -12,6 +12,7 @@ #ifndef __ASSEMBLY__ #include +#include #endif /* @@ -89,7 +90,8 @@ static inline unsigned int get_domain(void) asm( "mrc p15, 0, %0, c3, c0 @ get domain" - : "=r" (domain)); + : "=r" (domain) + : "m" (current_thread_info()->cpu_domain)); return domain; } @@ -98,7 +100,7 @@ static inline void set_domain(unsigned val) { asm volatile( "mcr p15, 0, %0, c3, c0 @ set domain" - : : "r" (val)); + : : "r" (val) : "memory"); isb(); } diff --git a/arch/arm/include/asm/hardware/it8152.h b/arch/arm/include/asm/hardware/it8152.h index d36a73d7c0e8..076777ff3daa 100644 --- a/arch/arm/include/asm/hardware/it8152.h +++ b/arch/arm/include/asm/hardware/it8152.h @@ -106,7 +106,7 @@ extern void __iomem *it8152_base_address; struct pci_dev; struct pci_sys_data; -extern void it8152_irq_demux(unsigned int irq, struct irq_desc *desc); +extern void it8152_irq_demux(struct irq_desc *desc); extern void it8152_init_irq(void); extern int it8152_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin); extern int it8152_pci_setup(int nr, struct pci_sys_data *sys); diff --git a/arch/arm/include/asm/hw_irq.h b/arch/arm/include/asm/hw_irq.h index af79da40af2a..9beb92914f4d 100644 --- a/arch/arm/include/asm/hw_irq.h +++ b/arch/arm/include/asm/hw_irq.h @@ -11,12 +11,6 @@ static inline void ack_bad_irq(int irq) pr_crit("unexpected IRQ trap at vector %02x\n", irq); } -void set_irq_flags(unsigned int irq, unsigned int flags); - -#define IRQF_VALID (1 << 0) -#define IRQF_PROBE (1 << 1) -#define IRQF_NOAUTOEN (1 << 2) - #define ARCH_IRQ_INIT_FLAGS (IRQ_NOREQUEST | IRQ_NOPROBE) #endif diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index dcba0fa5176e..3df1e975f72a 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -29,12 +29,6 @@ #define __KVM_HAVE_ARCH_INTC_INITIALIZED -#if defined(CONFIG_KVM_ARM_MAX_VCPUS) -#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS -#else -#define KVM_MAX_VCPUS 0 -#endif - #define KVM_USER_MEM_SLOTS 32 #define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -44,6 +38,8 @@ #include +#define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS + u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); @@ -148,6 +144,7 @@ struct kvm_vm_stat { struct kvm_vcpu_stat { u32 halt_successful_poll; + u32 halt_attempted_poll; u32 halt_wakeup; }; diff --git a/arch/arm/include/asm/mach/irq.h b/arch/arm/include/asm/mach/irq.h index 2092ee1e1300..de4634b51456 100644 --- a/arch/arm/include/asm/mach/irq.h +++ b/arch/arm/include/asm/mach/irq.h @@ -23,10 +23,10 @@ extern int show_fiq_list(struct seq_file *, int); /* * This is for easy migration, but should be changed in the source */ -#define do_bad_IRQ(irq,desc) \ +#define do_bad_IRQ(desc) \ do { \ raw_spin_lock(&desc->lock); \ - handle_bad_irq(irq, desc); \ + handle_bad_irq(desc); \ raw_spin_unlock(&desc->lock); \ } while(0) diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index d0a1119dcaf3..776757d1604a 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -25,7 +25,6 @@ struct task_struct; #include -#include typedef unsigned long mm_segment_t; diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 5ff4826cb154..2766183e69df 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -79,26 +79,6 @@ asm_do_IRQ(unsigned int irq, struct pt_regs *regs) handle_IRQ(irq, regs); } -void set_irq_flags(unsigned int irq, unsigned int iflags) -{ - unsigned long clr = 0, set = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; - - if (irq >= nr_irqs) { - pr_err("Trying to set irq flags for IRQ%d\n", irq); - return; - } - - if (iflags & IRQF_VALID) - clr |= IRQ_NOREQUEST; - if (iflags & IRQF_PROBE) - clr |= IRQ_NOPROBE; - if (!(iflags & IRQF_NOAUTOEN)) - clr |= IRQ_NOAUTOEN; - /* Order is clear bits in "clr" then set bits in "set" */ - irq_modify_status(irq, clr, set & ~clr); -} -EXPORT_SYMBOL_GPL(set_irq_flags); - void __init init_IRQ(void) { int ret; diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index a3089bacb8d8..7a7c4cea5523 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -226,6 +226,7 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save)); +#ifdef CONFIG_CPU_USE_DOMAINS /* * Copy the initial value of the domain access control register * from the current thread: thread->addr_limit will have been @@ -233,6 +234,7 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, * kernel/fork.c */ thread->cpu_domain = get_domain(); +#endif if (likely(!(p->flags & PF_KTHREAD))) { *childregs = *current_pt_regs(); diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index bfb915d05665..210eccadb69a 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -45,15 +45,4 @@ config KVM_ARM_HOST ---help--- Provides host support for ARM processors. -config KVM_ARM_MAX_VCPUS - int "Number maximum supported virtual CPUs per VM" - depends on KVM_ARM_HOST - default 4 - help - Static number of max supported virtual CPUs per VM. - - If you choose a high number, the vcpu structures will be quite - large, so only choose a reasonable number that you expect to - actually use. - endif # VIRTUALIZATION diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index ce404a5c3062..dc017adfddc8 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -446,7 +446,7 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) * Map the VGIC hardware resources before running a vcpu the first * time on this VM. */ - if (unlikely(!vgic_ready(kvm))) { + if (unlikely(irqchip_in_kernel(kvm) && !vgic_ready(kvm))) { ret = kvm_vgic_map_resources(kvm); if (ret) return ret; diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 702740d37465..51a59504bef4 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -515,8 +515,7 @@ ARM_BE8(rev r6, r6 ) mrc p15, 0, r2, c14, c3, 1 @ CNTV_CTL str r2, [vcpu, #VCPU_TIMER_CNTV_CTL] - bic r2, #1 @ Clear ENABLE - mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL + isb mrrc p15, 3, rr_lo_hi(r2, r3), c14 @ CNTV_CVAL @@ -529,6 +528,9 @@ ARM_BE8(rev r6, r6 ) mcrr p15, 4, r2, r2, c14 @ CNTVOFF 1: + mov r2, #0 @ Clear ENABLE + mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL + @ Allow physical timer/counter access for the host mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL orr r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 7b4201294187..6984342da13d 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1792,8 +1792,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (vma->vm_flags & VM_PFNMAP) { gpa_t gpa = mem->guest_phys_addr + (vm_start - mem->userspace_addr); - phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) + - vm_start - vma->vm_start; + phys_addr_t pa; + + pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; + pa += vm_start - vma->vm_start; /* IO region dirty page logging not allowed */ if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 4b94b513168d..ad6f6424f1d1 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -126,7 +126,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) { - int i; + int i, matching_cpus = 0; unsigned long mpidr; unsigned long target_affinity; unsigned long target_affinity_mask; @@ -151,12 +151,16 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) */ kvm_for_each_vcpu(i, tmp, kvm) { mpidr = kvm_vcpu_get_mpidr_aff(tmp); - if (((mpidr & target_affinity_mask) == target_affinity) && - !tmp->arch.pause) { - return PSCI_0_2_AFFINITY_LEVEL_ON; + if ((mpidr & target_affinity_mask) == target_affinity) { + matching_cpus++; + if (!tmp->arch.pause) + return PSCI_0_2_AFFINITY_LEVEL_ON; } } + if (!matching_cpus) + return PSCI_RET_INVALID_PARAMS; + return PSCI_0_2_AFFINITY_LEVEL_OFF; } diff --git a/arch/arm/mach-dove/irq.c b/arch/arm/mach-dove/irq.c index 305d7c6242bb..bfb3703357c5 100644 --- a/arch/arm/mach-dove/irq.c +++ b/arch/arm/mach-dove/irq.c @@ -69,14 +69,14 @@ static struct irq_chip pmu_irq_chip = { .irq_ack = pmu_irq_ack, }; -static void pmu_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void pmu_irq_handler(struct irq_desc *desc) { - unsigned int irq = irq_desc_get_irq(desc); unsigned long cause = readl(PMU_INTERRUPT_CAUSE); + unsigned int irq; cause &= readl(PMU_INTERRUPT_MASK); if (cause == 0) { - do_bad_IRQ(irq, desc); + do_bad_IRQ(desc); return; } diff --git a/arch/arm/mach-footbridge/isa-irq.c b/arch/arm/mach-footbridge/isa-irq.c index fcd79bc3a3e1..c01fca11b224 100644 --- a/arch/arm/mach-footbridge/isa-irq.c +++ b/arch/arm/mach-footbridge/isa-irq.c @@ -87,13 +87,12 @@ static struct irq_chip isa_hi_chip = { .irq_unmask = isa_unmask_pic_hi_irq, }; -static void -isa_irq_handler(unsigned int irq, struct irq_desc *desc) +static void isa_irq_handler(struct irq_desc *desc) { unsigned int isa_irq = *(unsigned char *)PCIIACK_BASE; if (isa_irq < _ISA_IRQ(0) || isa_irq >= _ISA_IRQ(16)) { - do_bad_IRQ(isa_irq, desc); + do_bad_IRQ(desc); return; } diff --git a/arch/arm/mach-gemini/gpio.c b/arch/arm/mach-gemini/gpio.c index 220333ed741d..2478d9f4d92d 100644 --- a/arch/arm/mach-gemini/gpio.c +++ b/arch/arm/mach-gemini/gpio.c @@ -126,7 +126,7 @@ static int gpio_set_irq_type(struct irq_data *d, unsigned int type) return 0; } -static void gpio_irq_handler(unsigned int irq, struct irq_desc *desc) +static void gpio_irq_handler(struct irq_desc *desc) { unsigned int port = (unsigned int)irq_desc_get_handler_data(desc); unsigned int gpio_irq_no, irq_stat; diff --git a/arch/arm/mach-imx/3ds_debugboard.c b/arch/arm/mach-imx/3ds_debugboard.c index 45903be6e7b3..16496a071ecb 100644 --- a/arch/arm/mach-imx/3ds_debugboard.c +++ b/arch/arm/mach-imx/3ds_debugboard.c @@ -85,7 +85,7 @@ static struct platform_device smsc_lan9217_device = { .resource = smsc911x_resources, }; -static void mxc_expio_irq_handler(u32 irq, struct irq_desc *desc) +static void mxc_expio_irq_handler(struct irq_desc *desc) { u32 imr_val; u32 int_valid; diff --git a/arch/arm/mach-imx/mach-mx31ads.c b/arch/arm/mach-imx/mach-mx31ads.c index 2c0853560bd2..2b147e4bf9c9 100644 --- a/arch/arm/mach-imx/mach-mx31ads.c +++ b/arch/arm/mach-imx/mach-mx31ads.c @@ -154,7 +154,7 @@ static inline void mxc_init_imx_uart(void) imx31_add_imx_uart0(&uart_pdata); } -static void mx31ads_expio_irq_handler(u32 irq, struct irq_desc *desc) +static void mx31ads_expio_irq_handler(struct irq_desc *desc) { u32 imr_val; u32 int_valid; diff --git a/arch/arm/mach-iop13xx/msi.c b/arch/arm/mach-iop13xx/msi.c index 9f89e76dfbb9..f6235b28578c 100644 --- a/arch/arm/mach-iop13xx/msi.c +++ b/arch/arm/mach-iop13xx/msi.c @@ -91,7 +91,7 @@ static void (*write_imipr[])(u32) = { write_imipr_3, }; -static void iop13xx_msi_handler(unsigned int irq, struct irq_desc *desc) +static void iop13xx_msi_handler(struct irq_desc *desc) { int i, j; unsigned long status; diff --git a/arch/arm/mach-lpc32xx/irq.c b/arch/arm/mach-lpc32xx/irq.c index cce4cef12b6e..2ae431e8bc1b 100644 --- a/arch/arm/mach-lpc32xx/irq.c +++ b/arch/arm/mach-lpc32xx/irq.c @@ -370,7 +370,7 @@ static struct irq_chip lpc32xx_irq_chip = { .irq_set_wake = lpc32xx_irq_wake }; -static void lpc32xx_sic1_handler(unsigned int irq, struct irq_desc *desc) +static void lpc32xx_sic1_handler(struct irq_desc *desc) { unsigned long ints = __raw_readl(LPC32XX_INTC_STAT(LPC32XX_SIC1_BASE)); @@ -383,7 +383,7 @@ static void lpc32xx_sic1_handler(unsigned int irq, struct irq_desc *desc) } } -static void lpc32xx_sic2_handler(unsigned int irq, struct irq_desc *desc) +static void lpc32xx_sic2_handler(struct irq_desc *desc) { unsigned long ints = __raw_readl(LPC32XX_INTC_STAT(LPC32XX_SIC2_BASE)); diff --git a/arch/arm/mach-netx/generic.c b/arch/arm/mach-netx/generic.c index 6373e2bff203..842302df99c1 100644 --- a/arch/arm/mach-netx/generic.c +++ b/arch/arm/mach-netx/generic.c @@ -69,8 +69,7 @@ static struct platform_device *devices[] __initdata = { #define DEBUG_IRQ(fmt...) while (0) {} #endif -static void -netx_hif_demux_handler(unsigned int irq_unused, struct irq_desc *desc) +static void netx_hif_demux_handler(struct irq_desc *desc) { unsigned int irq = NETX_IRQ_HIF_CHAINED(0); unsigned int stat; diff --git a/arch/arm/mach-omap1/fpga.c b/arch/arm/mach-omap1/fpga.c index dfec671b1639..39e20d0ead08 100644 --- a/arch/arm/mach-omap1/fpga.c +++ b/arch/arm/mach-omap1/fpga.c @@ -87,7 +87,7 @@ static void fpga_mask_ack_irq(struct irq_data *d) fpga_ack_irq(d); } -static void innovator_fpga_IRQ_demux(unsigned int irq, struct irq_desc *desc) +static void innovator_fpga_IRQ_demux(struct irq_desc *desc) { u32 stat; int fpga_irq; diff --git a/arch/arm/mach-omap2/prm_common.c b/arch/arm/mach-omap2/prm_common.c index 257e98c26618..3fc2cbe52113 100644 --- a/arch/arm/mach-omap2/prm_common.c +++ b/arch/arm/mach-omap2/prm_common.c @@ -102,7 +102,7 @@ static void omap_prcm_events_filter_priority(unsigned long *events, * dispatched accordingly. Clearing of the wakeup events should be * done by the SoC specific individual handlers. */ -static void omap_prcm_irq_handler(unsigned int irq, struct irq_desc *desc) +static void omap_prcm_irq_handler(struct irq_desc *desc) { unsigned long pending[OMAP_PRCM_MAX_NR_PENDING_REG]; unsigned long priority_pending[OMAP_PRCM_MAX_NR_PENDING_REG]; diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c index 70366b35d299..a3ebb517cca1 100644 --- a/arch/arm/mach-pxa/balloon3.c +++ b/arch/arm/mach-pxa/balloon3.c @@ -496,7 +496,7 @@ static struct irq_chip balloon3_irq_chip = { .irq_unmask = balloon3_unmask_irq, }; -static void balloon3_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void balloon3_irq_handler(struct irq_desc *desc) { unsigned long pending = __raw_readl(BALLOON3_INT_CONTROL_REG) & balloon3_irq_enabled; diff --git a/arch/arm/mach-pxa/cm-x2xx-pci.c b/arch/arm/mach-pxa/cm-x2xx-pci.c index 1fa79f1f832d..3221ae15bef7 100644 --- a/arch/arm/mach-pxa/cm-x2xx-pci.c +++ b/arch/arm/mach-pxa/cm-x2xx-pci.c @@ -29,13 +29,12 @@ void __iomem *it8152_base_address; static int cmx2xx_it8152_irq_gpio; -static void cmx2xx_it8152_irq_demux(unsigned int __irq, struct irq_desc *desc) +static void cmx2xx_it8152_irq_demux(struct irq_desc *desc) { - unsigned int irq = irq_desc_get_irq(desc); /* clear our parent irq */ desc->irq_data.chip->irq_ack(&desc->irq_data); - it8152_irq_demux(irq, desc); + it8152_irq_demux(desc); } void __cmx2xx_pci_init_irq(int irq_gpio) diff --git a/arch/arm/mach-pxa/lpd270.c b/arch/arm/mach-pxa/lpd270.c index b070167deef2..4823d972e647 100644 --- a/arch/arm/mach-pxa/lpd270.c +++ b/arch/arm/mach-pxa/lpd270.c @@ -120,7 +120,7 @@ static struct irq_chip lpd270_irq_chip = { .irq_unmask = lpd270_unmask_irq, }; -static void lpd270_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void lpd270_irq_handler(struct irq_desc *desc) { unsigned int irq; unsigned long pending; diff --git a/arch/arm/mach-pxa/pcm990-baseboard.c b/arch/arm/mach-pxa/pcm990-baseboard.c index 9a0c8affdadb..d8319b54299a 100644 --- a/arch/arm/mach-pxa/pcm990-baseboard.c +++ b/arch/arm/mach-pxa/pcm990-baseboard.c @@ -284,7 +284,7 @@ static struct irq_chip pcm990_irq_chip = { .irq_unmask = pcm990_unmask_irq, }; -static void pcm990_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void pcm990_irq_handler(struct irq_desc *desc) { unsigned int irq; unsigned long pending; diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c index 4841d6cefe76..8ab26370107e 100644 --- a/arch/arm/mach-pxa/viper.c +++ b/arch/arm/mach-pxa/viper.c @@ -276,7 +276,7 @@ static inline unsigned long viper_irq_pending(void) viper_irq_enabled_mask; } -static void viper_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void viper_irq_handler(struct irq_desc *desc) { unsigned int irq; unsigned long pending; diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c index 6f94dd7b4dee..30e62a3f0701 100644 --- a/arch/arm/mach-pxa/zeus.c +++ b/arch/arm/mach-pxa/zeus.c @@ -105,7 +105,7 @@ static inline unsigned long zeus_irq_pending(void) return __raw_readw(ZEUS_CPLD_ISA_IRQ) & zeus_irq_enabled_mask; } -static void zeus_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void zeus_irq_handler(struct irq_desc *desc) { unsigned int irq; unsigned long pending; diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c index f726d4c4e6dd..dc67a7fb3831 100644 --- a/arch/arm/mach-rpc/ecard.c +++ b/arch/arm/mach-rpc/ecard.c @@ -551,8 +551,7 @@ static void ecard_check_lockup(struct irq_desc *desc) } } -static void -ecard_irq_handler(unsigned int irq, struct irq_desc *desc) +static void ecard_irq_handler(struct irq_desc *desc) { ecard_t *ec; int called = 0; diff --git a/arch/arm/mach-s3c24xx/bast-irq.c b/arch/arm/mach-s3c24xx/bast-irq.c index ced1ab86ac83..2bb08961e934 100644 --- a/arch/arm/mach-s3c24xx/bast-irq.c +++ b/arch/arm/mach-s3c24xx/bast-irq.c @@ -100,9 +100,7 @@ static struct irq_chip bast_pc104_chip = { .irq_ack = bast_pc104_maskack }; -static void -bast_irq_pc104_demux(unsigned int irq, - struct irq_desc *desc) +static void bast_irq_pc104_demux(struct irq_desc *desc) { unsigned int stat; unsigned int irqno; diff --git a/arch/arm/mach-s3c64xx/common.c b/arch/arm/mach-s3c64xx/common.c index fd63ecfb2f81..ddb30b8434c5 100644 --- a/arch/arm/mach-s3c64xx/common.c +++ b/arch/arm/mach-s3c64xx/common.c @@ -388,22 +388,22 @@ static inline void s3c_irq_demux_eint(unsigned int start, unsigned int end) } } -static void s3c_irq_demux_eint0_3(unsigned int irq, struct irq_desc *desc) +static void s3c_irq_demux_eint0_3(struct irq_desc *desc) { s3c_irq_demux_eint(0, 3); } -static void s3c_irq_demux_eint4_11(unsigned int irq, struct irq_desc *desc) +static void s3c_irq_demux_eint4_11(struct irq_desc *desc) { s3c_irq_demux_eint(4, 11); } -static void s3c_irq_demux_eint12_19(unsigned int irq, struct irq_desc *desc) +static void s3c_irq_demux_eint12_19(struct irq_desc *desc) { s3c_irq_demux_eint(12, 19); } -static void s3c_irq_demux_eint20_27(unsigned int irq, struct irq_desc *desc) +static void s3c_irq_demux_eint20_27(struct irq_desc *desc) { s3c_irq_demux_eint(20, 27); } diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c index 6d237b4f7a8e..8411985af9ff 100644 --- a/arch/arm/mach-sa1100/neponset.c +++ b/arch/arm/mach-sa1100/neponset.c @@ -166,7 +166,7 @@ static struct sa1100_port_fns neponset_port_fns = { * ensure that the IRQ signal is deasserted before returning. This * is rather unfortunate. */ -static void neponset_irq_handler(unsigned int irq, struct irq_desc *desc) +static void neponset_irq_handler(struct irq_desc *desc) { struct neponset_drvdata *d = irq_desc_get_handler_data(desc); unsigned int irr; diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S index 71df43547659..39c20afad7ed 100644 --- a/arch/arm/nwfpe/entry.S +++ b/arch/arm/nwfpe/entry.S @@ -95,9 +95,10 @@ emulate: reteq r4 @ no, return failure next: + uaccess_enable r3 .Lx1: ldrt r6, [r5], #4 @ get the next instruction and @ increment PC - + uaccess_disable r3 and r2, r6, #0x0F000000 @ test for FP insns teq r2, #0x0C000000 teqne r2, #0x0D000000 diff --git a/arch/arm/plat-orion/gpio.c b/arch/arm/plat-orion/gpio.c index 79c33eca09a3..7bd22d8e5b11 100644 --- a/arch/arm/plat-orion/gpio.c +++ b/arch/arm/plat-orion/gpio.c @@ -407,7 +407,7 @@ static int gpio_irq_set_type(struct irq_data *d, u32 type) return 0; } -static void gpio_irq_handler(unsigned __irq, struct irq_desc *desc) +static void gpio_irq_handler(struct irq_desc *desc) { struct orion_gpio_chip *ochip = irq_desc_get_handler_data(desc); u32 cause, type; diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S index f00e08075938..10fd99c568c6 100644 --- a/arch/arm/xen/hypercall.S +++ b/arch/arm/xen/hypercall.S @@ -98,8 +98,23 @@ ENTRY(privcmd_call) mov r1, r2 mov r2, r3 ldr r3, [sp, #8] + /* + * Privcmd calls are issued by the userspace. We need to allow the + * kernel to access the userspace memory before issuing the hypercall. + */ + uaccess_enable r4 + + /* r4 is loaded now as we use it as scratch register before */ ldr r4, [sp, #4] __HVC(XEN_IMM) + + /* + * Disable userspace access from kernel. This is fine to do it + * unconditionally as no set_fs(KERNEL_DS)/set_fs(get_ds()) is + * called before. + */ + uaccess_disable r4 + ldm sp!, {r4} ret lr ENDPROC(privcmd_call); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7d95663c0160..07d1811aa03f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -32,6 +32,7 @@ config ARM64 select GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_CPU_AUTOPROBE select GENERIC_EARLY_IOREMAP + select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL @@ -331,6 +332,22 @@ config ARM64_ERRATUM_845719 If unsure, say Y. +config ARM64_ERRATUM_843419 + bool "Cortex-A53: 843419: A load or store might access an incorrect address" + depends on MODULES + default y + help + This option builds kernel modules using the large memory model in + order to avoid the use of the ADRP instruction, which can cause + a subsequent memory access to use an incorrect address on Cortex-A53 + parts up to r0p4. + + Note that the kernel itself must be linked with a version of ld + which fixes potentially affected ADRP instructions through the + use of veneers. + + If unsure, say Y. + endmenu diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 15ff5b4156fd..f9914d7c1bb0 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -41,6 +41,10 @@ endif CHECKFLAGS += -D__aarch64__ +ifeq ($(CONFIG_ARM64_ERRATUM_843419), y) +CFLAGS_MODULE += -mcmodel=large +endif + # Default value head-y := arch/arm64/kernel/head.o diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index 2bb7009bdac7..a57601f9d17c 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -43,9 +43,4 @@ static inline void ack_bad_irq(unsigned int irq) irq_err_count++; } -/* - * No arch-specific IRQ flags. - */ -#define set_irq_flags(irq, flags) - #endif /* __ASM_HARDIRQ_H */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 7605e095217f..9694f2654593 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -95,6 +95,7 @@ SCTLR_EL2_SA | SCTLR_EL2_I) /* TCR_EL2 Registers bits */ +#define TCR_EL2_RES1 ((1 << 31) | (1 << 23)) #define TCR_EL2_TBI (1 << 20) #define TCR_EL2_PS (7 << 16) #define TCR_EL2_PS_40B (2 << 16) @@ -106,9 +107,10 @@ #define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) -#define TCR_EL2_FLAGS (TCR_EL2_PS_40B) +#define TCR_EL2_FLAGS (TCR_EL2_RES1 | TCR_EL2_PS_40B) /* VTCR_EL2 Registers bits */ +#define VTCR_EL2_RES1 (1 << 31) #define VTCR_EL2_PS_MASK (7 << 16) #define VTCR_EL2_TG0_MASK (1 << 14) #define VTCR_EL2_TG0_4K (0 << 14) @@ -147,7 +149,8 @@ */ #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) + VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B | \ + VTCR_EL2_RES1) #define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) #else /* @@ -158,7 +161,8 @@ */ #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) + VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B | \ + VTCR_EL2_RES1) #define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) #endif @@ -168,7 +172,6 @@ #define VTTBR_VMID_MASK (UL(0xFF) << VTTBR_VMID_SHIFT) /* Hyp System Trap Register */ -#define HSTR_EL2_TTEE (1 << 16) #define HSTR_EL2_T(x) (1 << x) /* Hyp Coproccessor Trap Register Shifts */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 67fa0de3d483..5e377101f919 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -53,9 +53,7 @@ #define IFSR32_EL2 25 /* Instruction Fault Status Register */ #define FPEXC32_EL2 26 /* Floating-Point Exception Control Register */ #define DBGVCR32_EL2 27 /* Debug Vector Catch Register */ -#define TEECR32_EL1 28 /* ThumbEE Configuration Register */ -#define TEEHBR32_EL1 29 /* ThumbEE Handler Base Register */ -#define NR_SYS_REGS 30 +#define NR_SYS_REGS 28 /* 32bit mapping */ #define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 415938dc45cf..4562459456a6 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -30,12 +30,6 @@ #define __KVM_HAVE_ARCH_INTC_INITIALIZED -#if defined(CONFIG_KVM_ARM_MAX_VCPUS) -#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS -#else -#define KVM_MAX_VCPUS 0 -#endif - #define KVM_USER_MEM_SLOTS 32 #define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 @@ -43,6 +37,8 @@ #include #include +#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS + #define KVM_VCPU_MAX_FEATURES 3 int __attribute_const__ kvm_target_cpu(void); @@ -195,6 +191,7 @@ struct kvm_vm_stat { struct kvm_vcpu_stat { u32 halt_successful_poll; + u32 halt_attempted_poll; u32 halt_wakeup; }; diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 6900b2d95371..b0329be95cb1 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -26,13 +26,9 @@ * Software defined PTE bits definition. */ #define PTE_VALID (_AT(pteval_t, 1) << 0) +#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) -#ifdef CONFIG_ARM64_HW_AFDBM -#define PTE_WRITE (PTE_DBM) /* same as DBM */ -#else -#define PTE_WRITE (_AT(pteval_t, 1) << 57) -#endif #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* @@ -146,7 +142,7 @@ extern struct page *empty_zero_page; #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #ifdef CONFIG_ARM64_HW_AFDBM -#define pte_hw_dirty(pte) (!(pte_val(pte) & PTE_RDONLY)) +#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) #else #define pte_hw_dirty(pte) (0) #endif @@ -238,7 +234,7 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); * When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via * the page fault mechanism. Checking the dirty status of a pte becomes: * - * PTE_DIRTY || !PTE_RDONLY + * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY) */ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) @@ -503,7 +499,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK; /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) - newprot |= PTE_DIRTY; + pte = pte_mkdirty(pte); pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 9b3b62ac9c24..cebf78661a55 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -134,7 +134,7 @@ static int os_lock_notify(struct notifier_block *self, unsigned long action, void *data) { int cpu = (unsigned long)data; - if (action == CPU_ONLINE) + if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) smp_call_function_single(cpu, clear_os_lock, NULL, 1); return NOTIFY_OK; } diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index a055be6125cf..90d09eddd5b2 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -523,6 +523,11 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr hstr_el2, xzr // Disable CP15 traps to EL2 #endif + /* EL2 debug */ + mrs x0, pmcr_el0 // Disable debug access traps + ubfx x0, x0, #11, #5 // to EL2 and allow access to + msr mdcr_el2, x0 // all PMU counters from EL1 + /* Stage-2 translation */ msr vttbr_el2, xzr diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index c97040ecf838..bba85c8f8037 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -872,7 +872,7 @@ static int hw_breakpoint_reset_notify(struct notifier_block *self, void *hcpu) { int cpu = (long)hcpu; - if (action == CPU_ONLINE) + if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1); return NOTIFY_OK; } diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 67bf4107f6ef..876eb8df50bf 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -332,12 +332,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, AARCH64_INSN_IMM_ADR); break; +#ifndef CONFIG_ARM64_ERRATUM_843419 case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; case R_AARCH64_ADR_PREL_PG_HI21: ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, AARCH64_INSN_IMM_ADR); break; +#endif case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_LDST8_ABS_LO12_NC: overflow_check = false; diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 948f0ad2de23..71ef6dc89ae5 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -212,14 +212,32 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) /* * VFP save/restore code. + * + * We have to be careful with endianness, since the fpsimd context-switch + * code operates on 128-bit (Q) register values whereas the compat ABI + * uses an array of 64-bit (D) registers. Consequently, we need to swap + * the two halves of each Q register when running on a big-endian CPU. */ +union __fpsimd_vreg { + __uint128_t raw; + struct { +#ifdef __AARCH64EB__ + u64 hi; + u64 lo; +#else + u64 lo; + u64 hi; +#endif + }; +}; + static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) { struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr, fpexc; - int err = 0; + int i, err = 0; /* * Save the hardware registers to the fpsimd_state structure. @@ -235,10 +253,15 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) /* * Now copy the FP registers. Since the registers are packed, * we can copy the prefix we want (V0-V15) as it is. - * FIXME: Won't work if big endian. */ - err |= __copy_to_user(&frame->ufp.fpregs, fpsimd->vregs, - sizeof(frame->ufp.fpregs)); + for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) { + union __fpsimd_vreg vreg = { + .raw = fpsimd->vregs[i >> 1], + }; + + __put_user_error(vreg.lo, &frame->ufp.fpregs[i], err); + __put_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err); + } /* Create an AArch32 fpscr from the fpsr and the fpcr. */ fpscr = (fpsimd->fpsr & VFP_FPSCR_STAT_MASK) | @@ -263,7 +286,7 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr; - int err = 0; + int i, err = 0; __get_user_error(magic, &frame->magic, err); __get_user_error(size, &frame->size, err); @@ -273,12 +296,14 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) return -EINVAL; - /* - * Copy the FP registers into the start of the fpsimd_state. - * FIXME: Won't work if big endian. - */ - err |= __copy_from_user(fpsimd.vregs, frame->ufp.fpregs, - sizeof(frame->ufp.fpregs)); + /* Copy the FP registers into the start of the fpsimd_state. */ + for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) { + union __fpsimd_vreg vreg; + + __get_user_error(vreg.lo, &frame->ufp.fpregs[i], err); + __get_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err); + fpsimd.vregs[i >> 1] = vreg.raw; + } /* Extract the fpsr and the fpcr from the fpscr */ __get_user_error(fpscr, &frame->ufp.fpscr, err); diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index bfffe8f4bd53..5c7e920e4861 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -41,15 +41,4 @@ config KVM_ARM_HOST ---help--- Provides host support for ARM processors. -config KVM_ARM_MAX_VCPUS - int "Number maximum supported virtual CPUs per VM" - depends on KVM_ARM_HOST - default 4 - help - Static number of max supported virtual CPUs per VM. - - If you choose a high number, the vcpu structures will be quite - large, so only choose a reasonable number that you expect to - actually use. - endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 37c89ea2c572..e5836138ec42 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -433,20 +433,13 @@ mrs x5, ifsr32_el2 stp x4, x5, [x3] - skip_fpsimd_state x8, 3f + skip_fpsimd_state x8, 2f mrs x6, fpexc32_el2 str x6, [x3, #16] -3: - skip_debug_state x8, 2f +2: + skip_debug_state x8, 1f mrs x7, dbgvcr32_el2 str x7, [x3, #24] -2: - skip_tee_state x8, 1f - - add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) - mrs x4, teecr32_el1 - mrs x5, teehbr32_el1 - stp x4, x5, [x3] 1: .endm @@ -466,16 +459,9 @@ msr dacr32_el2, x4 msr ifsr32_el2, x5 - skip_debug_state x8, 2f + skip_debug_state x8, 1f ldr x7, [x3, #24] msr dbgvcr32_el2, x7 -2: - skip_tee_state x8, 1f - - add x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1) - ldp x4, x5, [x3] - msr teecr32_el1, x4 - msr teehbr32_el1, x5 1: .endm @@ -570,8 +556,6 @@ alternative_endif mrs x3, cntv_ctl_el0 and x3, x3, #3 str w3, [x0, #VCPU_TIMER_CNTV_CTL] - bic x3, x3, #1 // Clear Enable - msr cntv_ctl_el0, x3 isb @@ -579,6 +563,9 @@ alternative_endif str x3, [x0, #VCPU_TIMER_CNTV_CVAL] 1: + // Disable the virtual timer + msr cntv_ctl_el0, xzr + // Allow physical timer/counter access for the host mrs x2, cnthctl_el2 orr x2, x2, #3 @@ -753,6 +740,9 @@ ENTRY(__kvm_vcpu_run) // Guest context add x2, x0, #VCPU_CONTEXT + // We must restore the 32-bit state before the sysregs, thanks + // to Cortex-A57 erratum #852523. + restore_guest_32bit_state bl __restore_sysregs skip_debug_state x3, 1f @@ -760,7 +750,6 @@ ENTRY(__kvm_vcpu_run) kern_hyp_va x3 bl __restore_debug 1: - restore_guest_32bit_state restore_guest_regs // That's it, no more messing around. diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b41607d270ac..d03d3af17e7e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -272,7 +272,7 @@ static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, { __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; - if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; return 0; } @@ -314,7 +314,7 @@ static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, { __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; - if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; return 0; @@ -358,7 +358,7 @@ static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, { __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; - if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; return 0; } @@ -400,7 +400,7 @@ static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, { __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; - if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0) + if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0) return -EFAULT; return 0; } @@ -539,13 +539,6 @@ static const struct sys_reg_desc sys_reg_descs[] = { { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110), trap_dbgauthstatus_el1 }, - /* TEECR32_EL1 */ - { Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000), - NULL, reset_val, TEECR32_EL1, 0 }, - /* TEEHBR32_EL1 */ - { Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000), - NULL, reset_val, TEEHBR32_EL1, 0 }, - /* MDCCSR_EL1 */ { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000), trap_raz_wi }, diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 0bcc4bc94b4a..99224dcebdc5 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -100,7 +100,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, if (IS_ENABLED(CONFIG_ZONE_DMA) && dev->coherent_dma_mask <= DMA_BIT_MASK(32)) flags |= GFP_DMA; - if (IS_ENABLED(CONFIG_DMA_CMA) && (flags & __GFP_WAIT)) { + if (dev_get_cma_area(dev) && (flags & __GFP_WAIT)) { struct page *page; void *addr; diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c index d51ff8f1c541..96cabad68489 100644 --- a/arch/avr32/mach-at32ap/extint.c +++ b/arch/avr32/mach-at32ap/extint.c @@ -144,7 +144,7 @@ static struct irq_chip eic_chip = { .irq_set_type = eic_set_irq_type, }; -static void demux_eic_irq(unsigned int irq, struct irq_desc *desc) +static void demux_eic_irq(struct irq_desc *desc) { struct eic *eic = irq_desc_get_handler_data(desc); unsigned long status, pending; diff --git a/arch/avr32/mach-at32ap/pio.c b/arch/avr32/mach-at32ap/pio.c index 157a5e0e789f..4f61378c3453 100644 --- a/arch/avr32/mach-at32ap/pio.c +++ b/arch/avr32/mach-at32ap/pio.c @@ -281,7 +281,7 @@ static struct irq_chip gpio_irqchip = { .irq_set_type = gpio_irq_type, }; -static void gpio_irq_handler(unsigned irq, struct irq_desc *desc) +static void gpio_irq_handler(struct irq_desc *desc) { struct pio_device *pio = irq_desc_get_chip_data(desc); unsigned gpio_irq; diff --git a/arch/blackfin/include/asm/irq_handler.h b/arch/blackfin/include/asm/irq_handler.h index 4b2a992794d7..d2f90c72378e 100644 --- a/arch/blackfin/include/asm/irq_handler.h +++ b/arch/blackfin/include/asm/irq_handler.h @@ -60,7 +60,7 @@ extern void bfin_internal_mask_irq(unsigned int irq); extern void bfin_internal_unmask_irq(unsigned int irq); struct irq_desc; -extern void bfin_demux_mac_status_irq(unsigned int, struct irq_desc *); -extern void bfin_demux_gpio_irq(unsigned int, struct irq_desc *); +extern void bfin_demux_mac_status_irq(struct irq_desc *); +extern void bfin_demux_gpio_irq(struct irq_desc *); #endif diff --git a/arch/blackfin/kernel/irqchip.c b/arch/blackfin/kernel/irqchip.c index 0ba25764b8c0..052cde5ed2e4 100644 --- a/arch/blackfin/kernel/irqchip.c +++ b/arch/blackfin/kernel/irqchip.c @@ -107,7 +107,7 @@ asmlinkage void asm_do_IRQ(unsigned int irq, struct pt_regs *regs) * than crashing, do something sensible. */ if (irq >= NR_IRQS) - handle_bad_irq(irq, &bad_irq_desc); + handle_bad_irq(&bad_irq_desc); else generic_handle_irq(irq); diff --git a/arch/blackfin/mach-bf537/ints-priority.c b/arch/blackfin/mach-bf537/ints-priority.c index 14b2f74554dc..a48baae4384d 100644 --- a/arch/blackfin/mach-bf537/ints-priority.c +++ b/arch/blackfin/mach-bf537/ints-priority.c @@ -89,8 +89,7 @@ static struct irq_chip bf537_generic_error_irqchip = { .irq_unmask = bf537_generic_error_unmask_irq, }; -static void bf537_demux_error_irq(unsigned int int_err_irq, - struct irq_desc *inta_desc) +static void bf537_demux_error_irq(struct irq_desc *inta_desc) { int irq = 0; @@ -182,15 +181,12 @@ static struct irq_chip bf537_mac_rx_irqchip = { .irq_unmask = bf537_mac_rx_unmask_irq, }; -static void bf537_demux_mac_rx_irq(unsigned int __int_irq, - struct irq_desc *desc) +static void bf537_demux_mac_rx_irq(struct irq_desc *desc) { - unsigned int int_irq = irq_desc_get_irq(desc); - if (bfin_read_DMA1_IRQ_STATUS() & (DMA_DONE | DMA_ERR)) bfin_handle_irq(IRQ_MAC_RX); else - bfin_demux_gpio_irq(int_irq, desc); + bfin_demux_gpio_irq(desc); } #endif diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c index a6d1b03cdf36..e8d4d748d0fd 100644 --- a/arch/blackfin/mach-common/ints-priority.c +++ b/arch/blackfin/mach-common/ints-priority.c @@ -656,8 +656,7 @@ static struct irq_chip bfin_mac_status_irqchip = { .irq_set_wake = bfin_mac_status_set_wake, }; -void bfin_demux_mac_status_irq(unsigned int int_err_irq, - struct irq_desc *inta_desc) +void bfin_demux_mac_status_irq(struct irq_desc *inta_desc) { int i, irq = 0; u32 status = bfin_read_EMAC_SYSTAT(); @@ -825,7 +824,7 @@ static void bfin_demux_gpio_block(unsigned int irq) } } -void bfin_demux_gpio_irq(unsigned int __inta_irq, struct irq_desc *desc) +void bfin_demux_gpio_irq(struct irq_desc *desc) { unsigned int inta_irq = irq_desc_get_irq(desc); unsigned int irq; diff --git a/arch/c6x/platforms/megamod-pic.c b/arch/c6x/platforms/megamod-pic.c index d487698e978a..ddcb45d7dfa7 100644 --- a/arch/c6x/platforms/megamod-pic.c +++ b/arch/c6x/platforms/megamod-pic.c @@ -93,7 +93,7 @@ static struct irq_chip megamod_chip = { .irq_unmask = unmask_megamod, }; -static void megamod_irq_cascade(unsigned int __irq, struct irq_desc *desc) +static void megamod_irq_cascade(struct irq_desc *desc) { struct megamod_cascade_data *cascade; struct megamod_pic *pic; diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 95c39b95e97e..99c96a5e6016 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -11,7 +11,7 @@ -#define NR_syscalls 319 /* length of syscall table */ +#define NR_syscalls 321 /* length of syscall table */ /* * The following defines stop scripts/checksyscalls.sh from complaining about diff --git a/arch/ia64/include/uapi/asm/unistd.h b/arch/ia64/include/uapi/asm/unistd.h index 461079560c78..98e94e19a5a0 100644 --- a/arch/ia64/include/uapi/asm/unistd.h +++ b/arch/ia64/include/uapi/asm/unistd.h @@ -332,5 +332,7 @@ #define __NR_memfd_create 1340 #define __NR_bpf 1341 #define __NR_execveat 1342 +#define __NR_userfaultfd 1343 +#define __NR_membarrier 1344 #endif /* _UAPI_ASM_IA64_UNISTD_H */ diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index ae0de7bf5525..37cc7a65cd3e 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1768,5 +1768,7 @@ sys_call_table: data8 sys_memfd_create // 1340 data8 sys_bpf data8 sys_execveat + data8 sys_userfaultfd + data8 sys_membarrier .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls diff --git a/arch/m68k/amiga/amiints.c b/arch/m68k/amiga/amiints.c index 47b5f90002ab..7ff739e94896 100644 --- a/arch/m68k/amiga/amiints.c +++ b/arch/m68k/amiga/amiints.c @@ -46,7 +46,7 @@ static struct irq_chip amiga_irq_chip = { * The builtin Amiga hardware interrupt handlers. */ -static void ami_int1(unsigned int irq, struct irq_desc *desc) +static void ami_int1(struct irq_desc *desc) { unsigned short ints = amiga_custom.intreqr & amiga_custom.intenar; @@ -69,7 +69,7 @@ static void ami_int1(unsigned int irq, struct irq_desc *desc) } } -static void ami_int3(unsigned int irq, struct irq_desc *desc) +static void ami_int3(struct irq_desc *desc) { unsigned short ints = amiga_custom.intreqr & amiga_custom.intenar; @@ -92,7 +92,7 @@ static void ami_int3(unsigned int irq, struct irq_desc *desc) } } -static void ami_int4(unsigned int irq, struct irq_desc *desc) +static void ami_int4(struct irq_desc *desc) { unsigned short ints = amiga_custom.intreqr & amiga_custom.intenar; @@ -121,7 +121,7 @@ static void ami_int4(unsigned int irq, struct irq_desc *desc) } } -static void ami_int5(unsigned int irq, struct irq_desc *desc) +static void ami_int5(struct irq_desc *desc) { unsigned short ints = amiga_custom.intreqr & amiga_custom.intenar; diff --git a/arch/m68k/coldfire/intc-5272.c b/arch/m68k/coldfire/intc-5272.c index 47371de60427..b0a19e207a63 100644 --- a/arch/m68k/coldfire/intc-5272.c +++ b/arch/m68k/coldfire/intc-5272.c @@ -143,12 +143,10 @@ static int intc_irq_set_type(struct irq_data *d, unsigned int type) * We need to be careful with the masking/acking due to the side effects * of masking an interrupt. */ -static void intc_external_irq(unsigned int __irq, struct irq_desc *desc) +static void intc_external_irq(struct irq_desc *desc) { - unsigned int irq = irq_desc_get_irq(desc); - irq_desc_get_chip(desc)->irq_ack(&desc->irq_data); - handle_simple_irq(irq, desc); + handle_simple_irq(desc); } static struct irq_chip intc_irq_chip = { diff --git a/arch/m68k/include/asm/irq.h b/arch/m68k/include/asm/irq.h index 81ca118d58af..a644f4a53b94 100644 --- a/arch/m68k/include/asm/irq.h +++ b/arch/m68k/include/asm/irq.h @@ -64,8 +64,7 @@ extern void m68k_setup_auto_interrupt(void (*handler)(unsigned int, struct pt_regs *)); extern void m68k_setup_user_interrupt(unsigned int vec, unsigned int cnt); extern void m68k_setup_irq_controller(struct irq_chip *, - void (*handle)(unsigned int irq, - struct irq_desc *desc), + void (*handle)(struct irq_desc *desc), unsigned int irq, unsigned int cnt); extern unsigned int irq_canonicalize(unsigned int irq); diff --git a/arch/m68k/include/asm/mac_via.h b/arch/m68k/include/asm/mac_via.h index fe3fc9ae1b69..53c632c85b03 100644 --- a/arch/m68k/include/asm/mac_via.h +++ b/arch/m68k/include/asm/mac_via.h @@ -261,7 +261,7 @@ extern void via_irq_enable(int); extern void via_irq_disable(int); extern void via_nubus_irq_startup(int irq); extern void via_nubus_irq_shutdown(int irq); -extern void via1_irq(unsigned int irq, struct irq_desc *desc); +extern void via1_irq(struct irq_desc *desc); extern void via1_set_head(int); extern int via2_scsi_drq_pending(void); diff --git a/arch/m68k/mac/baboon.c b/arch/m68k/mac/baboon.c index 3fe0e43d44f6..f6f7d42713ec 100644 --- a/arch/m68k/mac/baboon.c +++ b/arch/m68k/mac/baboon.c @@ -45,7 +45,7 @@ void __init baboon_init(void) * Baboon interrupt handler. This works a lot like a VIA. */ -static void baboon_irq(unsigned int irq, struct irq_desc *desc) +static void baboon_irq(struct irq_desc *desc) { int irq_bit, irq_num; unsigned char events; diff --git a/arch/m68k/mac/oss.c b/arch/m68k/mac/oss.c index 191610d97689..55d6592783f5 100644 --- a/arch/m68k/mac/oss.c +++ b/arch/m68k/mac/oss.c @@ -63,7 +63,7 @@ void __init oss_nubus_init(void) * Handle miscellaneous OSS interrupts. */ -static void oss_irq(unsigned int __irq, struct irq_desc *desc) +static void oss_irq(struct irq_desc *desc) { int events = oss->irq_pending & (OSS_IP_IOPSCC | OSS_IP_SCSI | OSS_IP_IOPISM); @@ -99,7 +99,7 @@ static void oss_irq(unsigned int __irq, struct irq_desc *desc) * Unlike the VIA/RBV this is on its own autovector interrupt level. */ -static void oss_nubus_irq(unsigned int irq, struct irq_desc *desc) +static void oss_nubus_irq(struct irq_desc *desc) { int events, irq_bit, i; diff --git a/arch/m68k/mac/psc.c b/arch/m68k/mac/psc.c index 3b9e302e7a37..cd38f29955c8 100644 --- a/arch/m68k/mac/psc.c +++ b/arch/m68k/mac/psc.c @@ -113,7 +113,7 @@ void __init psc_init(void) * PSC interrupt handler. It's a lot like the VIA interrupt handler. */ -static void psc_irq(unsigned int __irq, struct irq_desc *desc) +static void psc_irq(struct irq_desc *desc) { unsigned int offset = (unsigned int)irq_desc_get_handler_data(desc); unsigned int irq = irq_desc_get_irq(desc); diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c index e198dec868e4..ce56e04386e7 100644 --- a/arch/m68k/mac/via.c +++ b/arch/m68k/mac/via.c @@ -446,7 +446,7 @@ void via_nubus_irq_shutdown(int irq) * via6522.c :-), disable/pending masks added. */ -void via1_irq(unsigned int irq, struct irq_desc *desc) +void via1_irq(struct irq_desc *desc) { int irq_num; unsigned char irq_bit, events; @@ -467,7 +467,7 @@ void via1_irq(unsigned int irq, struct irq_desc *desc) } while (events >= irq_bit); } -static void via2_irq(unsigned int irq, struct irq_desc *desc) +static void via2_irq(struct irq_desc *desc) { int irq_num; unsigned char irq_bit, events; @@ -493,7 +493,7 @@ static void via2_irq(unsigned int irq, struct irq_desc *desc) * VIA2 dispatcher as a fast interrupt handler. */ -void via_nubus_irq(unsigned int irq, struct irq_desc *desc) +static void via_nubus_irq(struct irq_desc *desc) { int slot_irq; unsigned char slot_bit, events; diff --git a/arch/metag/kernel/irq.c b/arch/metag/kernel/irq.c index a336094a7a6c..3074b64793e6 100644 --- a/arch/metag/kernel/irq.c +++ b/arch/metag/kernel/irq.c @@ -94,13 +94,11 @@ void do_IRQ(int irq, struct pt_regs *regs) "MOV D0.5,%0\n" "MOV D1Ar1,%1\n" "MOV D1RtP,%2\n" - "MOV D0Ar2,%3\n" "SWAP A0StP,D0.5\n" "SWAP PC,D1RtP\n" "MOV A0StP,D0.5\n" : - : "r" (isp), "r" (irq), "r" (desc->handle_irq), - "r" (desc) + : "r" (isp), "r" (desc), "r" (desc->handle_irq) : "memory", "cc", "D1Ar1", "D0Ar2", "D1Ar3", "D0Ar4", "D1Ar5", "D0Ar6", "D0Re0", "D1Re0", "D0.4", "D1RtP", "D0.5" diff --git a/arch/mips/alchemy/common/irq.c b/arch/mips/alchemy/common/irq.c index 4c496c50edf6..da9f9220048f 100644 --- a/arch/mips/alchemy/common/irq.c +++ b/arch/mips/alchemy/common/irq.c @@ -851,7 +851,7 @@ static struct syscore_ops alchemy_gpic_pmops = { /* create chained handlers for the 4 IC requests to the MIPS IRQ ctrl */ #define DISP(name, base, addr) \ -static void au1000_##name##_dispatch(unsigned int irq, struct irq_desc *d) \ +static void au1000_##name##_dispatch(struct irq_desc *d) \ { \ unsigned long r = __raw_readl((void __iomem *)KSEG1ADDR(addr)); \ if (likely(r)) \ @@ -865,7 +865,7 @@ DISP(ic0r1, AU1000_INTC0_INT_BASE, AU1000_IC0_PHYS_ADDR + IC_REQ1INT) DISP(ic1r0, AU1000_INTC1_INT_BASE, AU1000_IC1_PHYS_ADDR + IC_REQ0INT) DISP(ic1r1, AU1000_INTC1_INT_BASE, AU1000_IC1_PHYS_ADDR + IC_REQ1INT) -static void alchemy_gpic_dispatch(unsigned int irq, struct irq_desc *d) +static void alchemy_gpic_dispatch(struct irq_desc *d) { int i = __raw_readl(AU1300_GPIC_ADDR + AU1300_GPIC_PRIENC); generic_handle_irq(ALCHEMY_GPIC_INT_BASE + i); diff --git a/arch/mips/alchemy/devboards/bcsr.c b/arch/mips/alchemy/devboards/bcsr.c index 324ad72d7c36..faeddf119fd4 100644 --- a/arch/mips/alchemy/devboards/bcsr.c +++ b/arch/mips/alchemy/devboards/bcsr.c @@ -86,7 +86,7 @@ EXPORT_SYMBOL_GPL(bcsr_mod); /* * DB1200/PB1200 CPLD IRQ muxer */ -static void bcsr_csc_handler(unsigned int irq, struct irq_desc *d) +static void bcsr_csc_handler(struct irq_desc *d) { unsigned short bisr = __raw_readw(bcsr_virt + BCSR_REG_INTSTAT); struct irq_chip *chip = irq_desc_get_chip(d); diff --git a/arch/mips/ath25/ar2315.c b/arch/mips/ath25/ar2315.c index ec9a371f1e62..8da996142d6a 100644 --- a/arch/mips/ath25/ar2315.c +++ b/arch/mips/ath25/ar2315.c @@ -69,7 +69,7 @@ static struct irqaction ar2315_ahb_err_interrupt = { .name = "ar2315-ahb-error", }; -static void ar2315_misc_irq_handler(unsigned irq, struct irq_desc *desc) +static void ar2315_misc_irq_handler(struct irq_desc *desc) { u32 pending = ar2315_rst_reg_read(AR2315_ISR) & ar2315_rst_reg_read(AR2315_IMR); diff --git a/arch/mips/ath25/ar5312.c b/arch/mips/ath25/ar5312.c index e63e38fa4880..acd55a9cffe3 100644 --- a/arch/mips/ath25/ar5312.c +++ b/arch/mips/ath25/ar5312.c @@ -73,7 +73,7 @@ static struct irqaction ar5312_ahb_err_interrupt = { .name = "ar5312-ahb-error", }; -static void ar5312_misc_irq_handler(unsigned irq, struct irq_desc *desc) +static void ar5312_misc_irq_handler(struct irq_desc *desc) { u32 pending = ar5312_rst_reg_read(AR5312_ISR) & ar5312_rst_reg_read(AR5312_IMR); diff --git a/arch/mips/ath79/irq.c b/arch/mips/ath79/irq.c index 807132b838b2..15ecb4831e12 100644 --- a/arch/mips/ath79/irq.c +++ b/arch/mips/ath79/irq.c @@ -26,7 +26,7 @@ #include "common.h" #include "machtypes.h" -static void ath79_misc_irq_handler(unsigned int irq, struct irq_desc *desc) +static void ath79_misc_irq_handler(struct irq_desc *desc) { void __iomem *base = ath79_reset_base; u32 pending; @@ -119,7 +119,7 @@ static void __init ath79_misc_irq_init(void) irq_set_chained_handler(ATH79_CPU_IRQ(6), ath79_misc_irq_handler); } -static void ar934x_ip2_irq_dispatch(unsigned int irq, struct irq_desc *desc) +static void ar934x_ip2_irq_dispatch(struct irq_desc *desc) { u32 status; @@ -148,7 +148,7 @@ static void ar934x_ip2_irq_init(void) irq_set_chained_handler(ATH79_CPU_IRQ(2), ar934x_ip2_irq_dispatch); } -static void qca955x_ip2_irq_dispatch(unsigned int irq, struct irq_desc *desc) +static void qca955x_ip2_irq_dispatch(struct irq_desc *desc) { u32 status; @@ -171,7 +171,7 @@ static void qca955x_ip2_irq_dispatch(unsigned int irq, struct irq_desc *desc) } } -static void qca955x_ip3_irq_dispatch(unsigned int irq, struct irq_desc *desc) +static void qca955x_ip3_irq_dispatch(struct irq_desc *desc) { u32 status; diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index f26c3c661cca..0352bc8d56b3 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -2221,7 +2221,7 @@ static irqreturn_t octeon_irq_cib_handler(int my_irq, void *data) if (irqd_get_trigger_type(irq_data) & IRQ_TYPE_EDGE_BOTH) cvmx_write_csr(host_data->raw_reg, 1ull << i); - generic_handle_irq_desc(irq, desc); + generic_handle_irq_desc(desc); } } diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index e8c8d9d0c45f..3a54dbca9f7e 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -128,6 +128,7 @@ struct kvm_vcpu_stat { u32 msa_disabled_exits; u32 flush_dcache_exits; u32 halt_successful_poll; + u32 halt_attempted_poll; u32 halt_wakeup; }; diff --git a/arch/mips/include/asm/netlogic/common.h b/arch/mips/include/asm/netlogic/common.h index 2a4c128277e4..be52c2125d71 100644 --- a/arch/mips/include/asm/netlogic/common.h +++ b/arch/mips/include/asm/netlogic/common.h @@ -57,8 +57,8 @@ #include struct irq_desc; -void nlm_smp_function_ipi_handler(unsigned int irq, struct irq_desc *desc); -void nlm_smp_resched_ipi_handler(unsigned int irq, struct irq_desc *desc); +void nlm_smp_function_ipi_handler(struct irq_desc *desc); +void nlm_smp_resched_ipi_handler(struct irq_desc *desc); void nlm_smp_irq_init(int hwcpuid); void nlm_boot_secondary_cpus(void); int nlm_wakeup_secondary_cpus(void); diff --git a/arch/mips/jz4740/gpio.c b/arch/mips/jz4740/gpio.c index 6cd69fdaa1c5..a74e181058b0 100644 --- a/arch/mips/jz4740/gpio.c +++ b/arch/mips/jz4740/gpio.c @@ -291,7 +291,7 @@ static void jz_gpio_check_trigger_both(struct jz_gpio_chip *chip, unsigned int i writel(mask, reg); } -static void jz_gpio_irq_demux_handler(unsigned int irq, struct irq_desc *desc) +static void jz_gpio_irq_demux_handler(struct irq_desc *desc) { uint32_t flag; unsigned int gpio_irq; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index cd4c129ce743..49ff3bfc007e 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -55,6 +55,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU }, { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU }, + { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), KVM_STAT_VCPU }, { "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU }, {NULL} }; diff --git a/arch/mips/netlogic/common/smp.c b/arch/mips/netlogic/common/smp.c index 0136b4f9c9cd..10d86d54880a 100644 --- a/arch/mips/netlogic/common/smp.c +++ b/arch/mips/netlogic/common/smp.c @@ -82,7 +82,7 @@ void nlm_send_ipi_mask(const struct cpumask *mask, unsigned int action) } /* IRQ_IPI_SMP_FUNCTION Handler */ -void nlm_smp_function_ipi_handler(unsigned int __irq, struct irq_desc *desc) +void nlm_smp_function_ipi_handler(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); clear_c0_eimr(irq); @@ -92,7 +92,7 @@ void nlm_smp_function_ipi_handler(unsigned int __irq, struct irq_desc *desc) } /* IRQ_IPI_SMP_RESCHEDULE handler */ -void nlm_smp_resched_ipi_handler(unsigned int __irq, struct irq_desc *desc) +void nlm_smp_resched_ipi_handler(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); clear_c0_eimr(irq); diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c index f8d0acb4f973..b4fa6413c4e5 100644 --- a/arch/mips/pci/pci-ar2315.c +++ b/arch/mips/pci/pci-ar2315.c @@ -318,7 +318,7 @@ static int ar2315_pci_host_setup(struct ar2315_pci_ctrl *apc) return 0; } -static void ar2315_pci_irq_handler(unsigned irq, struct irq_desc *desc) +static void ar2315_pci_irq_handler(struct irq_desc *desc) { struct ar2315_pci_ctrl *apc = irq_desc_get_handler_data(desc); u32 pending = ar2315_pci_reg_read(apc, AR2315_PCI_ISR) & diff --git a/arch/mips/pci/pci-ar71xx.c b/arch/mips/pci/pci-ar71xx.c index ad35a5e6a56c..7db963deec73 100644 --- a/arch/mips/pci/pci-ar71xx.c +++ b/arch/mips/pci/pci-ar71xx.c @@ -226,7 +226,7 @@ static struct pci_ops ar71xx_pci_ops = { .write = ar71xx_pci_write_config, }; -static void ar71xx_pci_irq_handler(unsigned int irq, struct irq_desc *desc) +static void ar71xx_pci_irq_handler(struct irq_desc *desc) { struct ar71xx_pci_controller *apc; void __iomem *base = ath79_reset_base; diff --git a/arch/mips/pci/pci-ar724x.c b/arch/mips/pci/pci-ar724x.c index 907d11dd921b..2013dad700df 100644 --- a/arch/mips/pci/pci-ar724x.c +++ b/arch/mips/pci/pci-ar724x.c @@ -225,7 +225,7 @@ static struct pci_ops ar724x_pci_ops = { .write = ar724x_pci_write, }; -static void ar724x_pci_irq_handler(unsigned int irq, struct irq_desc *desc) +static void ar724x_pci_irq_handler(struct irq_desc *desc) { struct ar724x_pci_controller *apc; void __iomem *base; diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c index 53c8efaf1572..ed6732f9aa87 100644 --- a/arch/mips/pci/pci-rt3883.c +++ b/arch/mips/pci/pci-rt3883.c @@ -129,7 +129,7 @@ static void rt3883_pci_write_cfg32(struct rt3883_pci_controller *rpc, rt3883_pci_w32(rpc, val, RT3883_PCI_REG_CFGDATA); } -static void rt3883_pci_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void rt3883_pci_irq_handler(struct irq_desc *desc) { struct rt3883_pci_controller *rpc; u32 pending; diff --git a/arch/mips/ralink/irq.c b/arch/mips/ralink/irq.c index 8c624a8b9ea2..4cf77f358395 100644 --- a/arch/mips/ralink/irq.c +++ b/arch/mips/ralink/irq.c @@ -96,7 +96,7 @@ unsigned int get_c0_compare_int(void) return CP0_LEGACY_COMPARE_IRQ; } -static void ralink_intc_irq_handler(unsigned int irq, struct irq_desc *desc) +static void ralink_intc_irq_handler(struct irq_desc *desc) { u32 pending = rt_intc_r32(INTC_REG_STATUS0); diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 73eddda53b8e..4eec430d8fa8 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -28,6 +28,9 @@ BOOTCFLAGS += -m64 endif ifdef CONFIG_CPU_BIG_ENDIAN BOOTCFLAGS += -mbig-endian +else +BOOTCFLAGS += -mlittle-endian +BOOTCFLAGS += $(call cc-option,-mabi=elfv2) endif BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 98eebbf66340..195886a583ba 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -108,6 +108,7 @@ struct kvm_vcpu_stat { u32 dec_exits; u32 ext_intr_exits; u32 halt_successful_poll; + u32 halt_attempted_poll; u32 halt_wakeup; u32 dbell_exits; u32 gdbell_exits; diff --git a/arch/powerpc/include/asm/qe_ic.h b/arch/powerpc/include/asm/qe_ic.h index 25784cc959a0..1e155ca6d33c 100644 --- a/arch/powerpc/include/asm/qe_ic.h +++ b/arch/powerpc/include/asm/qe_ic.h @@ -59,14 +59,14 @@ enum qe_ic_grp_id { #ifdef CONFIG_QUICC_ENGINE void qe_ic_init(struct device_node *node, unsigned int flags, - void (*low_handler)(unsigned int irq, struct irq_desc *desc), - void (*high_handler)(unsigned int irq, struct irq_desc *desc)); + void (*low_handler)(struct irq_desc *desc), + void (*high_handler)(struct irq_desc *desc)); unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic); unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic); #else static inline void qe_ic_init(struct device_node *node, unsigned int flags, - void (*low_handler)(unsigned int irq, struct irq_desc *desc), - void (*high_handler)(unsigned int irq, struct irq_desc *desc)) + void (*low_handler)(struct irq_desc *desc), + void (*high_handler)(struct irq_desc *desc)) {} static inline unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic) { return 0; } @@ -78,8 +78,7 @@ void qe_ic_set_highest_priority(unsigned int virq, int high); int qe_ic_set_priority(unsigned int virq, unsigned int priority); int qe_ic_set_high_priority(unsigned int virq, unsigned int priority, int high); -static inline void qe_ic_cascade_low_ipic(unsigned int irq, - struct irq_desc *desc) +static inline void qe_ic_cascade_low_ipic(struct irq_desc *desc) { struct qe_ic *qe_ic = irq_desc_get_handler_data(desc); unsigned int cascade_irq = qe_ic_get_low_irq(qe_ic); @@ -88,8 +87,7 @@ static inline void qe_ic_cascade_low_ipic(unsigned int irq, generic_handle_irq(cascade_irq); } -static inline void qe_ic_cascade_high_ipic(unsigned int irq, - struct irq_desc *desc) +static inline void qe_ic_cascade_high_ipic(struct irq_desc *desc) { struct qe_ic *qe_ic = irq_desc_get_handler_data(desc); unsigned int cascade_irq = qe_ic_get_high_irq(qe_ic); @@ -98,8 +96,7 @@ static inline void qe_ic_cascade_high_ipic(unsigned int irq, generic_handle_irq(cascade_irq); } -static inline void qe_ic_cascade_low_mpic(unsigned int irq, - struct irq_desc *desc) +static inline void qe_ic_cascade_low_mpic(struct irq_desc *desc) { struct qe_ic *qe_ic = irq_desc_get_handler_data(desc); unsigned int cascade_irq = qe_ic_get_low_irq(qe_ic); @@ -111,8 +108,7 @@ static inline void qe_ic_cascade_low_mpic(unsigned int irq, chip->irq_eoi(&desc->irq_data); } -static inline void qe_ic_cascade_high_mpic(unsigned int irq, - struct irq_desc *desc) +static inline void qe_ic_cascade_high_mpic(struct irq_desc *desc) { struct qe_ic *qe_ic = irq_desc_get_handler_data(desc); unsigned int cascade_irq = qe_ic_get_high_irq(qe_ic); @@ -124,8 +120,7 @@ static inline void qe_ic_cascade_high_mpic(unsigned int irq, chip->irq_eoi(&desc->irq_data); } -static inline void qe_ic_cascade_muxed_mpic(unsigned int irq, - struct irq_desc *desc) +static inline void qe_ic_cascade_muxed_mpic(struct irq_desc *desc) { struct qe_ic *qe_ic = irq_desc_get_handler_data(desc); unsigned int cascade_irq; diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 71f2b3f02cf8..4d65499ee1c1 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -368,3 +368,4 @@ SYSCALL_SPU(memfd_create) SYSCALL_SPU(bpf) COMPAT_SYS(execveat) PPC64ONLY(switch_endian) +SYSCALL_SPU(userfaultfd) diff --git a/arch/powerpc/include/asm/tsi108_pci.h b/arch/powerpc/include/asm/tsi108_pci.h index 5653d7cc3e24..ae59d5b672b0 100644 --- a/arch/powerpc/include/asm/tsi108_pci.h +++ b/arch/powerpc/include/asm/tsi108_pci.h @@ -39,7 +39,7 @@ extern int tsi108_setup_pci(struct device_node *dev, u32 cfg_phys, int primary); extern void tsi108_pci_int_init(struct device_node *node); -extern void tsi108_irq_cascade(unsigned int irq, struct irq_desc *desc); +extern void tsi108_irq_cascade(struct irq_desc *desc); extern void tsi108_clear_pci_cfg_error(void); #endif /* _ASM_POWERPC_TSI108_PCI_H */ diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index f4f8b667d75b..4a055b6c2a64 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include -#define __NR_syscalls 364 +#define __NR_syscalls 365 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index e4aa173dae62..6ad58d4c879b 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -386,5 +386,6 @@ #define __NR_bpf 361 #define __NR_execveat 362 #define __NR_switch_endian 363 +#define __NR_userfaultfd 364 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 45096033d37b..290559df1e8b 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -441,7 +441,7 @@ void migrate_irqs(void) chip = irq_data_get_irq_chip(data); - cpumask_and(mask, data->affinity, map); + cpumask_and(mask, irq_data_get_affinity_mask(data), map); if (cpumask_any(mask) >= nr_cpu_ids) { pr_warn("Breaking affinity for irq %i\n", irq); cpumask_copy(mask, map); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index bb02e9f6944e..ad8c9db61237 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -38,6 +38,7 @@ #include #include #include +#include #define DBG(fmt...) @@ -109,6 +110,8 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) * This is called very early on the boot process, after a minimal * MMU environment has been set up but before MMU_init is called. */ +extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */ + notrace void __init machine_init(u64 dt_ptr) { lockdep_init(); @@ -116,6 +119,9 @@ notrace void __init machine_init(u64 dt_ptr) /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); + patch_instruction((unsigned int *)&memcpy, PPC_INST_NOP); + patch_instruction(&memset_nocache_branch, PPC_INST_NOP); + /* Do some early initialization based on the flat device tree */ early_init_devtree(__va(dt_ptr)); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index d75bf325f54a..cf009167d208 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -53,6 +53,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "ext_intr", VCPU_STAT(ext_intr_exits) }, { "queue_intr", VCPU_STAT(queue_intr) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll), }, + { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "pf_storage", VCPU_STAT(pf_storage) }, { "sp_storage", VCPU_STAT(sp_storage) }, diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index ae458f0fd061..fd5875179e5c 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -63,6 +63,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "dec", VCPU_STAT(dec_exits) }, { "ext_intr", VCPU_STAT(ext_intr_exits) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, + { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "doorbell", VCPU_STAT(dbell_exits) }, { "guest doorbell", VCPU_STAT(gdbell_exits) }, diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 2ef50c629470..c44df2dbedd5 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -73,6 +73,10 @@ CACHELINE_MASK = (L1_CACHE_BYTES-1) * Use dcbz on the complete cache lines in the destination * to set them to zero. This requires that the destination * area is cacheable. -- paulus + * + * During early init, cache might not be active yet, so dcbz cannot be used. + * We therefore skip the optimised bloc that uses dcbz. This jump is + * replaced by a nop once cache is active. This is done in machine_init() */ _GLOBAL(memset) rlwimi r4,r4,8,16,23 @@ -88,6 +92,8 @@ _GLOBAL(memset) subf r6,r0,r6 cmplwi 0,r4,0 bne 2f /* Use normal procedure if r4 is not zero */ +_GLOBAL(memset_nocache_branch) + b 2f /* Skip optimised bloc until cache is enabled */ clrlwi r7,r6,32-LG_CACHELINE_BYTES add r8,r7,r5 @@ -128,6 +134,10 @@ _GLOBAL(memset) * the destination area is cacheable. * We only use this version if the source and dest don't overlap. * -- paulus. + * + * During early init, cache might not be active yet, so dcbz cannot be used. + * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is + * replaced by a nop once cache is active. This is done in machine_init() */ _GLOBAL(memmove) cmplw 0,r3,r4 @@ -135,6 +145,7 @@ _GLOBAL(memmove) /* fall through */ _GLOBAL(memcpy) + b generic_memcpy add r7,r3,r5 /* test if the src & dst overlap */ add r8,r4,r5 cmplw 0,r4,r7 diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 43dafb9d6a46..4d87122cf6a7 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -85,7 +85,6 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, BUG_ON(index >= 4096); vpn = hpt_vpn(ea, vsid, ssize); - hash = hpt_hash(vpn, shift, ssize); hpte_slot_array = get_hpte_slot_array(pmdp); if (psize == MMU_PAGE_4K) { /* @@ -101,6 +100,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, valid = hpte_valid(hpte_slot_array, index); if (valid) { /* update the hpte bits */ + hash = hpt_hash(vpn, shift, ssize); hidx = hpte_hash_index(hpte_slot_array, index); if (hidx & _PTEIDX_SECONDARY) hash = ~hash; @@ -126,6 +126,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, if (!valid) { unsigned long hpte_group; + hash = hpt_hash(vpn, shift, ssize); /* insert new entry */ pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; new_pmd |= _PAGE_HASHPTE; diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c index 11090ab4bf59..0035d146df73 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c @@ -104,9 +104,10 @@ cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp, return irq_linear_revmap(cpld_pic_host, cpld_irq); } -static void -cpld_pic_cascade(unsigned int irq, struct irq_desc *desc) +static void cpld_pic_cascade(struct irq_desc *desc) { + unsigned int irq; + irq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status, &cpld_regs->pci_mask); if (irq != NO_IRQ) { diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c index 32cae33c4266..8fb95480fd73 100644 --- a/arch/powerpc/platforms/52xx/media5200.c +++ b/arch/powerpc/platforms/52xx/media5200.c @@ -80,7 +80,7 @@ static struct irq_chip media5200_irq_chip = { .irq_mask_ack = media5200_irq_mask, }; -void media5200_irq_cascade(unsigned int virq, struct irq_desc *desc) +static void media5200_irq_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); int sub_virq, val; diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 63016621aff8..78ac19aefa4d 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -191,7 +191,7 @@ static struct irq_chip mpc52xx_gpt_irq_chip = { .irq_set_type = mpc52xx_gpt_irq_set_type, }; -void mpc52xx_gpt_irq_cascade(unsigned int virq, struct irq_desc *desc) +static void mpc52xx_gpt_irq_cascade(struct irq_desc *desc) { struct mpc52xx_gpt_priv *gpt = irq_desc_get_handler_data(desc); int sub_virq; diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c index 2944bc84b9d6..4fe2074c88cb 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c @@ -196,7 +196,7 @@ static int mpc52xx_extirq_set_type(struct irq_data *d, unsigned int flow_type) ctrl_reg |= (type << (22 - (l2irq * 2))); out_be32(&intr->ctrl, ctrl_reg); - __irq_set_handler_locked(d->irq, handler); + irq_set_handler_locked(d, handler); return 0; } diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c index 74861a7fb807..60e89fc9c753 100644 --- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c +++ b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c @@ -78,7 +78,7 @@ static struct irq_chip pq2ads_pci_ic = { .irq_disable = pq2ads_pci_mask_irq }; -static void pq2ads_pci_irq_demux(unsigned int irq, struct irq_desc *desc) +static void pq2ads_pci_irq_demux(struct irq_desc *desc) { struct pq2ads_pci_pic *priv = irq_desc_get_handler_data(desc); u32 stat, mask, pend; diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c index 7bfb9b184dd4..23791de7b688 100644 --- a/arch/powerpc/platforms/85xx/common.c +++ b/arch/powerpc/platforms/85xx/common.c @@ -49,7 +49,7 @@ int __init mpc85xx_common_publish_devices(void) return of_platform_bus_probe(NULL, mpc85xx_common_ids, NULL); } #ifdef CONFIG_CPM2 -static void cpm2_cascade(unsigned int irq, struct irq_desc *desc) +static void cpm2_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); int cascade_irq; diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c index b0753e222086..5ac70de3e48a 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c @@ -192,8 +192,7 @@ void mpc85xx_cds_fixup_bus(struct pci_bus *bus) } #ifdef CONFIG_PPC_I8259 -static void mpc85xx_8259_cascade_handler(unsigned int irq, - struct irq_desc *desc) +static void mpc85xx_8259_cascade_handler(struct irq_desc *desc) { unsigned int cascade_irq = i8259_irq(); @@ -202,7 +201,7 @@ static void mpc85xx_8259_cascade_handler(unsigned int irq, generic_handle_irq(cascade_irq); /* check for any interrupts from the shared IRQ line */ - handle_fasteoi_irq(irq, desc); + handle_fasteoi_irq(desc); } static irqreturn_t mpc85xx_8259_cascade_action(int irq, void *dev_id) diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c index ffdf02121a7c..f858306dba6a 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c @@ -46,7 +46,7 @@ #endif #ifdef CONFIG_PPC_I8259 -static void mpc85xx_8259_cascade(unsigned int irq, struct irq_desc *desc) +static void mpc85xx_8259_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c index 55a9682b9529..b02d6a5bb035 100644 --- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c +++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c @@ -91,9 +91,10 @@ static inline unsigned int socrates_fpga_pic_get_irq(unsigned int irq) (irq_hw_number_t)i); } -void socrates_fpga_pic_cascade(unsigned int irq, struct irq_desc *desc) +static void socrates_fpga_pic_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); + unsigned int irq = irq_desc_get_irq(desc); unsigned int cascade_irq; /* diff --git a/arch/powerpc/platforms/86xx/pic.c b/arch/powerpc/platforms/86xx/pic.c index d5b98c0f958a..845defa1fd19 100644 --- a/arch/powerpc/platforms/86xx/pic.c +++ b/arch/powerpc/platforms/86xx/pic.c @@ -17,7 +17,7 @@ #include #ifdef CONFIG_PPC_I8259 -static void mpc86xx_8259_cascade(unsigned int irq, struct irq_desc *desc) +static void mpc86xx_8259_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index d3037747031d..c289fc77b4ba 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -214,7 +214,7 @@ void mpc8xx_restart(char *cmd) panic("Restart failed\n"); } -static void cpm_cascade(unsigned int irq, struct irq_desc *desc) +static void cpm_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); int cascade_irq = cpm_get_irq(); diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index 306888acb737..e0e68a1c0d3c 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -93,7 +93,7 @@ static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val) dcr_write(msic->dcr_host, dcr_n, val); } -static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc) +static void axon_msi_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct axon_msic *msic = irq_desc_get_handler_data(desc); diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index a15f1efc295f..9f609fc8d331 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -99,11 +99,12 @@ static void iic_ioexc_eoi(struct irq_data *d) { } -static void iic_ioexc_cascade(unsigned int irq, struct irq_desc *desc) +static void iic_ioexc_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct cbe_iic_regs __iomem *node_iic = (void __iomem *)irq_desc_get_handler_data(desc); + unsigned int irq = irq_desc_get_irq(desc); unsigned int base = (irq & 0xffffff00) | IIC_IRQ_TYPE_IOEXC; unsigned long bits, ack; int cascade; diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c index 1f72f4ab6353..9d27de62dc62 100644 --- a/arch/powerpc/platforms/cell/spider-pic.c +++ b/arch/powerpc/platforms/cell/spider-pic.c @@ -199,7 +199,7 @@ static const struct irq_domain_ops spider_host_ops = { .xlate = spider_host_xlate, }; -static void spider_irq_cascade(unsigned int irq, struct irq_desc *desc) +static void spider_irq_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct spider_pic *pic = irq_desc_get_handler_data(desc); diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c index 15ebc4e8a151..987d1b8d68e3 100644 --- a/arch/powerpc/platforms/chrp/setup.c +++ b/arch/powerpc/platforms/chrp/setup.c @@ -363,7 +363,7 @@ void __init chrp_setup_arch(void) if (ppc_md.progress) ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0x0); } -static void chrp_8259_cascade(unsigned int irq, struct irq_desc *desc) +static void chrp_8259_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index 9dd154d6f89a..9b7975706bfc 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -120,8 +120,7 @@ static unsigned int __hlwd_pic_get_irq(struct irq_domain *h) return irq_linear_revmap(h, irq); } -static void hlwd_pic_irq_cascade(unsigned int cascade_virq, - struct irq_desc *desc) +static void hlwd_pic_irq_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct irq_domain *irq_domain = irq_desc_get_handler_data(desc); diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c index 1613303177e6..8f65aa3747f5 100644 --- a/arch/powerpc/platforms/embedded6xx/mvme5100.c +++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c @@ -42,7 +42,7 @@ static phys_addr_t pci_membase; static u_char *restart; -static void mvme5100_8259_cascade(unsigned int irq, struct irq_desc *desc) +static void mvme5100_8259_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c index e66ef1943338..b304a9fe55cc 100644 --- a/arch/powerpc/platforms/pasemi/msi.c +++ b/arch/powerpc/platforms/pasemi/msi.c @@ -63,6 +63,7 @@ static struct irq_chip mpic_pasemi_msi_chip = { static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; + irq_hw_number_t hwirq; pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev); @@ -70,10 +71,10 @@ static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, - virq_to_hw(entry->irq), ALLOC_CHUNK); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, ALLOC_CHUNK); } return; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 2927cd5c8303..414fd1a00fda 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2049,9 +2049,23 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe) struct iommu_table *tbl = NULL; long rc; + /* + * crashkernel= specifies the kdump kernel's maximum memory at + * some offset and there is no guaranteed the result is a power + * of 2, which will cause errors later. + */ + const u64 max_memory = __rounddown_pow_of_two(memory_hotplug_max()); + + /* + * In memory constrained environments, e.g. kdump kernel, the + * DMA window can be larger than available memory, which will + * cause errors later. + */ + const u64 window_size = min((u64)pe->table_group.tce32_size, max_memory); + rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, IOMMU_PAGE_SHIFT_4K, - pe->table_group.tce32_size, + window_size, POWERNV_IOMMU_DEFAULT_LEVELS, &tbl); if (rc) { pe_err(pe, "Failed to create 32-bit TCE table, err %ld", diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 9b2480b265c0..f2dd77234240 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -99,6 +99,7 @@ void pnv_teardown_msi_irqs(struct pci_dev *pdev) struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; struct msi_desc *entry; + irq_hw_number_t hwirq; if (WARN_ON(!phb)) return; @@ -106,10 +107,10 @@ void pnv_teardown_msi_irqs(struct pci_dev *pdev) for_each_pci_msi_entry(entry, pdev) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&phb->msi_bmp, - virq_to_hw(entry->irq) - phb->msi_base, 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, 1); } } #endif /* CONFIG_PCI_MSI */ diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 47d9cebe7159..db17827eb746 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -422,8 +422,10 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count) dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent); of_node_put(parent); - if (!dn) + if (!dn) { + dlpar_release_drc(drc_index); return -EINVAL; + } rc = dlpar_attach_node(dn); if (rc) { diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 39a74fad3e04..9a83eb71b030 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -111,7 +111,7 @@ static void __init fwnmi_init(void) fwnmi_active = 1; } -static void pseries_8259_cascade(unsigned int irq, struct irq_desc *desc) +static void pseries_8259_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c index a11bd1d433ad..9e86074719a9 100644 --- a/arch/powerpc/sysdev/cpm2_pic.c +++ b/arch/powerpc/sysdev/cpm2_pic.c @@ -155,9 +155,9 @@ static int cpm2_set_irq_type(struct irq_data *d, unsigned int flow_type) irqd_set_trigger_type(d, flow_type); if (flow_type & IRQ_TYPE_LEVEL_LOW) - __irq_set_handler_locked(d->irq, handle_level_irq); + irq_set_handler_locked(d, handle_level_irq); else - __irq_set_handler_locked(d->irq, handle_edge_irq); + irq_set_handler_locked(d, handle_edge_irq); /* internal IRQ senses are LEVEL_LOW * EXT IRQ and Port C IRQ senses are programmable diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 5916da1856a7..48a576aa47b9 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -128,15 +128,16 @@ static void fsl_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; struct fsl_msi *msi_data; + irq_hw_number_t hwirq; for_each_pci_msi_entry(entry, pdev) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); msi_data = irq_get_chip_data(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_data->bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); } return; diff --git a/arch/powerpc/sysdev/ge/ge_pic.c b/arch/powerpc/sysdev/ge/ge_pic.c index 2bcb78bb3a15..d57b77573068 100644 --- a/arch/powerpc/sysdev/ge/ge_pic.c +++ b/arch/powerpc/sysdev/ge/ge_pic.c @@ -91,7 +91,7 @@ static int gef_pic_cascade_irq; * should be masked out. */ -void gef_pic_cascade(unsigned int irq, struct irq_desc *desc) +static void gef_pic_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq; diff --git a/arch/powerpc/sysdev/ge/ge_pic.h b/arch/powerpc/sysdev/ge/ge_pic.h index 908dbd9826b6..5bf7e4b81e36 100644 --- a/arch/powerpc/sysdev/ge/ge_pic.h +++ b/arch/powerpc/sysdev/ge/ge_pic.h @@ -1,8 +1,6 @@ #ifndef __GEF_PIC_H__ #define __GEF_PIC_H__ - -void gef_pic_cascade(unsigned int, struct irq_desc *); unsigned int gef_pic_get_irq(void); void gef_pic_init(struct device_node *); diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index 6b2b68914810..b1297ab1599b 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -624,10 +624,10 @@ static int ipic_set_irq_type(struct irq_data *d, unsigned int flow_type) irqd_set_trigger_type(d, flow_type); if (flow_type & IRQ_TYPE_LEVEL_LOW) { - __irq_set_handler_locked(d->irq, handle_level_irq); + irq_set_handler_locked(d, handle_level_irq); d->chip = &ipic_level_irq_chip; } else { - __irq_set_handler_locked(d->irq, handle_edge_irq); + irq_set_handler_locked(d, handle_edge_irq); d->chip = &ipic_edge_irq_chip; } diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/sysdev/mpc8xx_pic.c index d93a78be4346..9a423975853a 100644 --- a/arch/powerpc/sysdev/mpc8xx_pic.c +++ b/arch/powerpc/sysdev/mpc8xx_pic.c @@ -55,7 +55,7 @@ static int mpc8xx_set_irq_type(struct irq_data *d, unsigned int flow_type) unsigned int siel = in_be32(&siu_reg->sc_siel); siel |= mpc8xx_irqd_to_bit(d); out_be32(&siu_reg->sc_siel, siel); - __irq_set_handler_locked(d->irq, handle_edge_irq); + irq_set_handler_locked(d, handle_edge_irq); } return 0; } diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 97a8ae8f94dd..537e5db85a06 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1181,7 +1181,7 @@ static int mpic_host_xlate(struct irq_domain *h, struct device_node *ct, } /* IRQ handler for a secondary MPIC cascaded from another IRQ controller */ -static void mpic_cascade(unsigned int irq, struct irq_desc *desc) +static void mpic_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct mpic *mpic = irq_desc_get_handler_data(desc); diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index 70fbd5694a8b..2cbc7e29b85f 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -107,15 +107,16 @@ static u64 find_u4_magic_addr(struct pci_dev *pdev, unsigned int hwirq) static void u3msi_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; + irq_hw_number_t hwirq; for_each_pci_msi_entry(entry, pdev) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, 1); } return; diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c index 24d0470c1698..8fb806135043 100644 --- a/arch/powerpc/sysdev/ppc4xx_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_msi.c @@ -124,16 +124,17 @@ void ppc4xx_teardown_msi_irqs(struct pci_dev *dev) { struct msi_desc *entry; struct ppc4xx_msi *msi_data = &ppc4xx_msi; + irq_hw_number_t hwirq; dev_dbg(&dev->dev, "PCIE-MSI: tearing down msi irqs\n"); for_each_pci_msi_entry(entry, dev) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_data->bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); } } diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c index 47b352e4bc74..fbcc1f855a7f 100644 --- a/arch/powerpc/sysdev/qe_lib/qe_ic.c +++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c @@ -311,8 +311,8 @@ unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic) } void __init qe_ic_init(struct device_node *node, unsigned int flags, - void (*low_handler)(unsigned int irq, struct irq_desc *desc), - void (*high_handler)(unsigned int irq, struct irq_desc *desc)) + void (*low_handler)(struct irq_desc *desc), + void (*high_handler)(struct irq_desc *desc)) { struct qe_ic *qe_ic; struct resource res; diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c index 57b54476e747..379de955aae3 100644 --- a/arch/powerpc/sysdev/tsi108_pci.c +++ b/arch/powerpc/sysdev/tsi108_pci.c @@ -428,7 +428,7 @@ void __init tsi108_pci_int_init(struct device_node *node) init_pci_source(); } -void tsi108_irq_cascade(unsigned int irq, struct irq_desc *desc) +void tsi108_irq_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = get_pci_source(); diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/sysdev/uic.c index d77345338671..6893d8f236df 100644 --- a/arch/powerpc/sysdev/uic.c +++ b/arch/powerpc/sysdev/uic.c @@ -194,7 +194,7 @@ static const struct irq_domain_ops uic_host_ops = { .xlate = irq_domain_xlate_twocell, }; -void uic_irq_cascade(unsigned int virq, struct irq_desc *desc) +static void uic_irq_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct irq_data *idata = irq_desc_get_irq_data(desc); diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c index 11ac964d5175..27c936c080a6 100644 --- a/arch/powerpc/sysdev/xics/ics-opal.c +++ b/arch/powerpc/sysdev/xics/ics-opal.c @@ -54,7 +54,7 @@ static void ics_opal_unmask_irq(struct irq_data *d) if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) return; - server = xics_get_irq_server(d->irq, d->affinity, 0); + server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0); server = ics_opal_mangle_server(server); rc = opal_set_xive(hw_irq, server, DEFAULT_PRIORITY); diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c index d1c625c4cc5a..3854dd41558d 100644 --- a/arch/powerpc/sysdev/xics/ics-rtas.c +++ b/arch/powerpc/sysdev/xics/ics-rtas.c @@ -47,7 +47,7 @@ static void ics_rtas_unmask_irq(struct irq_data *d) if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS) return; - server = xics_get_irq_server(d->irq, d->affinity, 0); + server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0); call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, server, DEFAULT_PRIORITY); diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c index 43b8b275bc5c..0f52d7955796 100644 --- a/arch/powerpc/sysdev/xilinx_intc.c +++ b/arch/powerpc/sysdev/xilinx_intc.c @@ -222,7 +222,7 @@ int xilinx_intc_get_irq(void) /* * Support code for cascading to 8259 interrupt controllers */ -static void xilinx_i8259_cascade(unsigned int irq, struct irq_desc *desc) +static void xilinx_i8259_cascade(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned int cascade_irq = i8259_irq(); diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 3d012e071647..6ce4a0b7e8da 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -210,6 +210,7 @@ struct kvm_vcpu_stat { u32 exit_validity; u32 exit_instruction; u32 halt_successful_poll; + u32 halt_attempted_poll; u32 halt_wakeup; u32 instruction_lctl; u32 instruction_lctlg; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c91eb941b444..0a67c40eece9 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -63,6 +63,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, + { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, { "instruction_lctl", VCPU_STAT(instruction_lctl) }, @@ -1574,7 +1575,7 @@ static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu) static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu) { - atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20); + atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20); } /* diff --git a/arch/sh/boards/mach-se/7343/irq.c b/arch/sh/boards/mach-se/7343/irq.c index 6f97a8f0d0d6..6129aef6db76 100644 --- a/arch/sh/boards/mach-se/7343/irq.c +++ b/arch/sh/boards/mach-se/7343/irq.c @@ -29,7 +29,7 @@ static void __iomem *se7343_irq_regs; struct irq_domain *se7343_irq_domain; -static void se7343_irq_demux(unsigned int irq, struct irq_desc *desc) +static void se7343_irq_demux(struct irq_desc *desc) { struct irq_data *data = irq_desc_get_irq_data(desc); struct irq_chip *chip = irq_data_get_irq_chip(data); diff --git a/arch/sh/boards/mach-se/7722/irq.c b/arch/sh/boards/mach-se/7722/irq.c index 60aebd14ccf8..24c74a88290c 100644 --- a/arch/sh/boards/mach-se/7722/irq.c +++ b/arch/sh/boards/mach-se/7722/irq.c @@ -28,7 +28,7 @@ static void __iomem *se7722_irq_regs; struct irq_domain *se7722_irq_domain; -static void se7722_irq_demux(unsigned int irq, struct irq_desc *desc) +static void se7722_irq_demux(struct irq_desc *desc) { struct irq_data *data = irq_desc_get_irq_data(desc); struct irq_chip *chip = irq_data_get_irq_chip(data); diff --git a/arch/sh/boards/mach-se/7724/irq.c b/arch/sh/boards/mach-se/7724/irq.c index 9f2033898652..64e681e66c57 100644 --- a/arch/sh/boards/mach-se/7724/irq.c +++ b/arch/sh/boards/mach-se/7724/irq.c @@ -92,7 +92,7 @@ static struct irq_chip se7724_irq_chip __read_mostly = { .irq_unmask = enable_se7724_irq, }; -static void se7724_irq_demux(unsigned int __irq, struct irq_desc *desc) +static void se7724_irq_demux(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct fpga_irq set = get_fpga_irq(irq); diff --git a/arch/sh/boards/mach-x3proto/gpio.c b/arch/sh/boards/mach-x3proto/gpio.c index 24555c364d5b..1fb2cbee25f2 100644 --- a/arch/sh/boards/mach-x3proto/gpio.c +++ b/arch/sh/boards/mach-x3proto/gpio.c @@ -60,7 +60,7 @@ static int x3proto_gpio_to_irq(struct gpio_chip *chip, unsigned gpio) return virq; } -static void x3proto_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) +static void x3proto_gpio_irq_handler(struct irq_desc *desc) { struct irq_data *data = irq_desc_get_irq_data(desc); struct irq_chip *chip = irq_data_get_irq_chip(data); diff --git a/arch/sh/cchips/hd6446x/hd64461.c b/arch/sh/cchips/hd6446x/hd64461.c index e9735616bdc8..8180092502f7 100644 --- a/arch/sh/cchips/hd6446x/hd64461.c +++ b/arch/sh/cchips/hd6446x/hd64461.c @@ -56,7 +56,7 @@ static struct irq_chip hd64461_irq_chip = { .irq_unmask = hd64461_unmask_irq, }; -static void hd64461_irq_demux(unsigned int irq, struct irq_desc *desc) +static void hd64461_irq_demux(struct irq_desc *desc) { unsigned short intv = __raw_readw(HD64461_NIRR); unsigned int ext_irq = HD64461_IRQBASE; diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c index 0299f052a2ef..42efcf85f721 100644 --- a/arch/sparc/kernel/leon_kernel.c +++ b/arch/sparc/kernel/leon_kernel.c @@ -53,7 +53,7 @@ static inline unsigned int leon_eirq_get(int cpu) } /* Handle one or multiple IRQs from the extended interrupt controller */ -static void leon_handle_ext_irq(unsigned int irq, struct irq_desc *desc) +static void leon_handle_ext_irq(struct irq_desc *desc) { unsigned int eirq; struct irq_bucket *p; diff --git a/arch/sparc/kernel/leon_pci_grpci1.c b/arch/sparc/kernel/leon_pci_grpci1.c index 3382f7b3eeef..1e77128a8f88 100644 --- a/arch/sparc/kernel/leon_pci_grpci1.c +++ b/arch/sparc/kernel/leon_pci_grpci1.c @@ -357,7 +357,7 @@ static struct irq_chip grpci1_irq = { }; /* Handle one or multiple IRQs from the PCI core */ -static void grpci1_pci_flow_irq(unsigned int irq, struct irq_desc *desc) +static void grpci1_pci_flow_irq(struct irq_desc *desc) { struct grpci1_priv *priv = grpci1priv; int i, ack = 0; diff --git a/arch/sparc/kernel/leon_pci_grpci2.c b/arch/sparc/kernel/leon_pci_grpci2.c index 814fb1729b12..f727c4de1316 100644 --- a/arch/sparc/kernel/leon_pci_grpci2.c +++ b/arch/sparc/kernel/leon_pci_grpci2.c @@ -498,7 +498,7 @@ static struct irq_chip grpci2_irq = { }; /* Handle one or multiple IRQs from the PCI core */ -static void grpci2_pci_flow_irq(unsigned int irq, struct irq_desc *desc) +static void grpci2_pci_flow_irq(struct irq_desc *desc) { struct grpci2_priv *priv = grpci2priv; int i, ack = 0; diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index b3f73fd764a3..4c017d0d2de8 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -304,17 +304,16 @@ static struct irq_chip tilegx_legacy_irq_chip = { * to Linux which just calls handle_level_irq() after clearing the * MAC INTx Assert status bit associated with this interrupt. */ -static void trio_handle_level_irq(unsigned int __irq, struct irq_desc *desc) +static void trio_handle_level_irq(struct irq_desc *desc) { struct pci_controller *controller = irq_desc_get_handler_data(desc); gxio_trio_context_t *trio_context = controller->trio; uint64_t intx = (uint64_t)irq_desc_get_chip_data(desc); - unsigned int irq = irq_desc_get_irq(desc); int mac = controller->mac; unsigned int reg_offset; uint64_t level_mask; - handle_level_irq(irq, desc); + handle_level_irq(desc); /* * Clear the INTx Level status, otherwise future interrupts are diff --git a/arch/unicore32/kernel/irq.c b/arch/unicore32/kernel/irq.c index c53729d92e8d..eb1fd0030359 100644 --- a/arch/unicore32/kernel/irq.c +++ b/arch/unicore32/kernel/irq.c @@ -112,7 +112,7 @@ static struct irq_chip puv3_low_gpio_chip = { * irq_controller_lock held, and IRQs disabled. Decode the IRQ * and call the handler. */ -static void puv3_gpio_handler(unsigned int __irq, struct irq_desc *desc) +static void puv3_gpio_handler(struct irq_desc *desc) { unsigned int mask, irq; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7aef2d52daa0..328c8352480c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1006,7 +1006,7 @@ config X86_THERMAL_VECTOR depends on X86_MCE_INTEL config X86_LEGACY_VM86 - bool "Legacy VM86 support (obsolete)" + bool "Legacy VM86 support" default n depends on X86_32 ---help--- @@ -1018,19 +1018,20 @@ config X86_LEGACY_VM86 available to accelerate real mode DOS programs. However, any recent version of DOSEMU, X, or vbetool should be fully functional even without kernel VM86 support, as they will all - fall back to (pretty well performing) software emulation. + fall back to software emulation. Nevertheless, if you are using + a 16-bit DOS program where 16-bit performance matters, vm86 + mode might be faster than emulation and you might want to + enable this option. - Anything that works on a 64-bit kernel is unlikely to need - this option, as 64-bit kernels don't, and can't, support V8086 - mode. This option is also unrelated to 16-bit protected mode - and is not needed to run most 16-bit programs under Wine. + Note that any app that works on a 64-bit kernel is unlikely to + need this option, as 64-bit kernels don't, and can't, support + V8086 mode. This option is also unrelated to 16-bit protected + mode and is not needed to run most 16-bit programs under Wine. - Enabling this option adds considerable attack surface to the - kernel and slows down system calls and exception handling. + Enabling this option increases the complexity of the kernel + and slows down exception handling a tiny bit. - Unless you use very old userspace or need the last drop of - performance in your real mode DOS games and can't use KVM, - say N here. + If unsure, say N here. config VM86 bool diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 477fc28050e4..e6cf2ad350d1 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -241,6 +241,7 @@ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ +#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ /* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ #define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c12e845f59e6..349f80a82b82 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -711,6 +711,7 @@ struct kvm_vcpu_stat { u32 nmi_window_exits; u32 halt_exits; u32 halt_successful_poll; + u32 halt_attempted_poll; u32 halt_wakeup; u32 request_irq_exits; u32 irq_exits; diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index ce029e4fa7c6..31247b5bff7c 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -97,7 +97,6 @@ struct pv_lazy_ops { struct pv_time_ops { unsigned long long (*sched_clock)(void); unsigned long long (*steal_clock)(int cpu); - unsigned long (*get_tsc_khz)(void); }; struct pv_cpu_ops { diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 9d51fae1cba3..eaba08076030 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -39,18 +39,27 @@ static inline void queued_spin_unlock(struct qspinlock *lock) } #endif -#define virt_queued_spin_lock virt_queued_spin_lock - -static inline bool virt_queued_spin_lock(struct qspinlock *lock) +#ifdef CONFIG_PARAVIRT +#define virt_spin_lock virt_spin_lock +static inline bool virt_spin_lock(struct qspinlock *lock) { if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) return false; - while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0) - cpu_relax(); + /* + * On hypervisors without PARAVIRT_SPINLOCKS support we fall + * back to a Test-and-Set spinlock, because fair locks have + * horrible lock 'holder' preemption issues. + */ + + do { + while (atomic_read(&lock->val) != 0) + cpu_relax(); + } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0); return true; } +#endif /* CONFIG_PARAVIRT */ #include diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c42827eb86cf..25f909362b7a 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -338,10 +338,15 @@ done: static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr) { + unsigned long flags; + if (instr[0] != 0x90) return; + local_irq_save(flags); add_nops(instr + (a->instrlen - a->padlen), a->padlen); + sync_core(); + local_irq_restore(flags); DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ", instr, a->instrlen - a->padlen, a->padlen); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3ca3e46aa405..24e94ce454e2 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -336,6 +336,13 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) apic_write(APIC_LVTT, lvtt_value); if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) { + /* + * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode, + * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized. + * According to Intel, MFENCE can do the serialization here. + */ + asm volatile("mfence" : : : "memory"); + printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); return; } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 38a76f826530..5c60bb162622 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2522,6 +2522,7 @@ void __init setup_ioapic_dest(void) int pin, ioapic, irq, irq_entry; const struct cpumask *mask; struct irq_data *idata; + struct irq_chip *chip; if (skip_ioapic_setup == 1) return; @@ -2545,9 +2546,9 @@ void __init setup_ioapic_dest(void) else mask = apic->target_cpus(); - irq_set_affinity(irq, mask); + chip = irq_data_get_irq_chip(idata); + chip->irq_set_affinity(idata, mask, false); } - } #endif diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 1bbd0fe2c806..836d11b92811 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -489,10 +489,8 @@ static int apic_set_affinity(struct irq_data *irq_data, err = assign_irq_vector(irq, data, dest); if (err) { - struct irq_data *top = irq_get_irq_data(irq); - if (assign_irq_vector(irq, data, - irq_data_get_affinity_mask(top))) + irq_data_get_affinity_mask(irq_data))) pr_err("Failed to recover vector for irq %d\n", irq); return err; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 07ce52c22ec8..de22ea7ff82f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1110,10 +1110,10 @@ void print_cpu_info(struct cpuinfo_x86 *c) else printk(KERN_CONT "%d86", c->x86); - printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model); + printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model); if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask); + printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask); else printk(KERN_CONT ")\n"); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index cd9b6d0b10bf..3fefebfbdf4b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2316,9 +2316,12 @@ static struct event_constraint * intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) { - struct event_constraint *c1 = cpuc->event_constraint[idx]; + struct event_constraint *c1 = NULL; struct event_constraint *c2; + if (idx >= 0) /* fake does < 0 */ + c1 = cpuc->event_constraint[idx]; + /* * first time only * - static constraint: no change across incremental scheduling calls diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/kernel/cpu/perf_event_intel_bts.c index 54690e885759..d1c0f254afbe 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_bts.c +++ b/arch/x86/kernel/cpu/perf_event_intel_bts.c @@ -222,6 +222,7 @@ static void __bts_event_start(struct perf_event *event) if (!buf || bts_buffer_is_full(buf, bts)) return; + event->hw.itrace_started = 1; event->hw.state = 0; if (!buf->snapshot) diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index c80cf6699678..38da8f29a9c8 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -68,11 +68,10 @@ static inline void *current_stack(void) return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1)); } -static inline int -execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) +static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) { struct irq_stack *curstk, *irqstk; - u32 *isp, *prev_esp, arg1, arg2; + u32 *isp, *prev_esp, arg1; curstk = (struct irq_stack *) current_stack(); irqstk = __this_cpu_read(hardirq_stack); @@ -98,8 +97,8 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) asm volatile("xchgl %%ebx,%%esp \n" "call *%%edi \n" "movl %%ebx,%%esp \n" - : "=a" (arg1), "=d" (arg2), "=b" (isp) - : "0" (irq), "1" (desc), "2" (isp), + : "=a" (arg1), "=b" (isp) + : "0" (desc), "1" (isp), "D" (desc->handle_irq) : "memory", "cc", "ecx"); return 1; @@ -150,19 +149,15 @@ void do_softirq_own_stack(void) bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) { - unsigned int irq; - int overflow; - - overflow = check_stack_overflow(); + int overflow = check_stack_overflow(); if (IS_ERR_OR_NULL(desc)) return false; - irq = irq_desc_get_irq(desc); - if (user_mode(regs) || !execute_on_irq_stack(overflow, desc, irq)) { + if (user_mode(regs) || !execute_on_irq_stack(overflow, desc)) { if (unlikely(overflow)) print_stack_overflow(); - generic_handle_irq_desc(irq, desc); + generic_handle_irq_desc(desc); } return true; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index ff16ccb918f2..c767cf2bc80a 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -75,6 +75,6 @@ bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) if (unlikely(IS_ERR_OR_NULL(desc))) return false; - generic_handle_irq_desc(irq_desc_get_irq(desc), desc); + generic_handle_irq_desc(desc); return true; } diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 2bcc0525f1c1..6acc9dd91f36 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -58,7 +58,7 @@ static struct ldt_struct *alloc_ldt_struct(int size) if (alloc_size > PAGE_SIZE) new_ldt->entries = vzalloc(alloc_size); else - new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL); + new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL); if (!new_ldt->entries) { kfree(new_ldt); @@ -95,7 +95,7 @@ static void free_ldt_struct(struct ldt_struct *ldt) if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE) vfree(ldt->entries); else - kfree(ldt->entries); + free_page((unsigned long)ldt->entries); kfree(ldt); } diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 84b8ef82a159..1b55de1267cf 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -131,8 +131,8 @@ void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp) { - *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp); *gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp); if (!*dev) *dev = &x86_dma_fallback_dev; diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c8d52cb4cb6e..c3f7602cd038 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -21,6 +21,7 @@ #include #include #include +#include unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -1013,15 +1014,17 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable); static void __init check_system_tsc_reliable(void) { -#ifdef CONFIG_MGEODE_LX - /* RTSC counts during suspend */ +#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC) + if (is_geode_lx()) { + /* RTSC counts during suspend */ #define RTSC_SUSP 0x100 - unsigned long res_low, res_high; + unsigned long res_low, res_high; - rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); - /* Geode_LX - the OLPC CPU has a very reliable TSC */ - if (res_low & RTSC_SUSP) - tsc_clocksource_reliable = 1; + rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); + /* Geode_LX - the OLPC CPU has a very reliable TSC */ + if (res_low & RTSC_SUSP) + tsc_clocksource_reliable = 1; + } #endif if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) tsc_clocksource_reliable = 1; diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index abd8b856bd2b..524619351961 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -232,6 +233,32 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) struct pt_regs *regs = current_pt_regs(); unsigned long err = 0; + err = security_mmap_addr(0); + if (err) { + /* + * vm86 cannot virtualize the address space, so vm86 users + * need to manage the low 1MB themselves using mmap. Given + * that BIOS places important data in the first page, vm86 + * is essentially useless if mmap_min_addr != 0. DOSEMU, + * for example, won't even bother trying to use vm86 if it + * can't map a page at virtual address 0. + * + * To reduce the available kernel attack surface, simply + * disallow vm86(old) for users who cannot mmap at va 0. + * + * The implementation of security_mmap_addr will allow + * suitably privileged users to map va 0 even if + * vm.mmap_min_addr is set above 0, and we want this + * behavior for vm86 as well, as it ensures that legacy + * tools like vbetool will not fail just because of + * vm.mmap_min_addr. + */ + pr_info_once("Denied a call to vm86(old) from %s[%d] (uid: %d). Set the vm.mmap_min_addr sysctl to 0 and/or adjust LSM mmap_min_addr policy to enable vm86 if you are using a vm86-based DOS emulator.\n", + current->comm, task_pid_nr(current), + from_kuid_munged(&init_user_ns, current_uid())); + return -EPERM; + } + if (!vm86) { if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL))) return -ENOMEM; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d01986832afc..64076740251e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6064,6 +6064,8 @@ static __init int hardware_setup(void) memcpy(vmx_msr_bitmap_longmode_x2apic, vmx_msr_bitmap_longmode, PAGE_SIZE); + set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ + if (enable_apicv) { for (msr = 0x800; msr <= 0x8ff; msr++) vmx_disable_intercept_msr_read_x2apic(msr); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a60bdbccff51..6bbb0dfb99d0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -149,6 +149,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "nmi_window", VCPU_STAT(nmi_window_exits) }, { "halt_exits", VCPU_STAT(halt_exits) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, + { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "hypercalls", VCPU_STAT(hypercalls) }, { "request_irq", VCPU_STAT(request_irq_exits) }, diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 161804de124a..a0d09f6c6533 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1015,7 +1015,7 @@ static struct clock_event_device lguest_clockevent = { * This is the Guest timer interrupt handler (hardware interrupt 0). We just * call the clockevent infrastructure and it does whatever needs doing. */ -static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) +static void lguest_time_irq(struct irq_desc *desc) { unsigned long flags; diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 66338a60aa6e..c2aea63bee20 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -192,10 +192,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) node_set(node, numa_nodes_parsed); - pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s\n", + pr_info("SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]%s%s\n", node, pxm, (unsigned long long) start, (unsigned long long) end - 1, - hotpluggable ? " hotplug" : ""); + hotpluggable ? " hotplug" : "", + ma->flags & ACPI_SRAT_MEM_NON_VOLATILE ? " non-volatile" : ""); /* Mark hotplug range in memblock. */ if (hotpluggable && memblock_mark_hotplug(start, ma->length)) diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 4aecca79374a..14b8faf8b09d 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -140,6 +140,11 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, iv = bip->bip_vec + bip->bip_vcnt; + if (bip->bip_vcnt && + bvec_gap_to_prev(bdev_get_queue(bio->bi_bdev), + &bip->bip_vec[bip->bip_vcnt - 1], offset)) + return 0; + iv->bv_page = page; iv->bv_len = len; iv->bv_offset = offset; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index ac8370cb2515..55512dd62633 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -370,6 +370,9 @@ static void blkg_destroy_all(struct request_queue *q) blkg_destroy(blkg); spin_unlock(&blkcg->lock); } + + q->root_blkg = NULL; + q->root_rl.blkg = NULL; } /* diff --git a/block/blk-integrity.c b/block/blk-integrity.c index f548b64be092..75f29cf70188 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -204,6 +204,9 @@ bool blk_integrity_merge_rq(struct request_queue *q, struct request *req, q->limits.max_integrity_segments) return false; + if (integrity_req_gap_back_merge(req, next->bio)) + return false; + return true; } EXPORT_SYMBOL(blk_integrity_merge_rq); diff --git a/block/blk-map.c b/block/blk-map.c index 233841644c9d..f565e11f465a 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -9,6 +9,24 @@ #include "blk.h" +static bool iovec_gap_to_prv(struct request_queue *q, + struct iovec *prv, struct iovec *cur) +{ + unsigned long prev_end; + + if (!queue_virt_boundary(q)) + return false; + + if (prv->iov_base == NULL && prv->iov_len == 0) + /* prv is not set - don't check */ + return false; + + prev_end = (unsigned long)(prv->iov_base + prv->iov_len); + + return (((unsigned long)cur->iov_base & queue_virt_boundary(q)) || + prev_end & queue_virt_boundary(q)); +} + int blk_rq_append_bio(struct request_queue *q, struct request *rq, struct bio *bio) { @@ -67,7 +85,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, struct bio *bio; int unaligned = 0; struct iov_iter i; - struct iovec iov; + struct iovec iov, prv = {.iov_base = NULL, .iov_len = 0}; if (!iter || !iter->count) return -EINVAL; @@ -81,8 +99,12 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, /* * Keep going so we check length of all segments */ - if (uaddr & queue_dma_alignment(q)) + if ((uaddr & queue_dma_alignment(q)) || + iovec_gap_to_prv(q, &prv, &iov)) unaligned = 1; + + prv.iov_base = iov.iov_base; + prv.iov_len = iov.iov_len; } if (unaligned || (q->dma_pad_mask & iter->count) || map_data) diff --git a/block/blk-merge.c b/block/blk-merge.c index d088cffb8105..c4e9c37f3e38 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -66,36 +66,33 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, struct bio *bio, struct bio_set *bs) { - struct bio *split; - struct bio_vec bv, bvprv; + struct bio_vec bv, bvprv, *bvprvp = NULL; struct bvec_iter iter; unsigned seg_size = 0, nsegs = 0, sectors = 0; - int prev = 0; bio_for_each_segment(bv, bio, iter) { - sectors += bv.bv_len >> 9; - - if (sectors > queue_max_sectors(q)) + if (sectors + (bv.bv_len >> 9) > queue_max_sectors(q)) goto split; /* * If the queue doesn't support SG gaps and adding this * offset would create a gap, disallow it. */ - if (prev && bvec_gap_to_prev(q, &bvprv, bv.bv_offset)) + if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset)) goto split; - if (prev && blk_queue_cluster(q)) { + if (bvprvp && blk_queue_cluster(q)) { if (seg_size + bv.bv_len > queue_max_segment_size(q)) goto new_segment; - if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv)) + if (!BIOVEC_PHYS_MERGEABLE(bvprvp, &bv)) goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv)) + if (!BIOVEC_SEG_BOUNDARY(q, bvprvp, &bv)) goto new_segment; seg_size += bv.bv_len; bvprv = bv; - prev = 1; + bvprvp = &bv; + sectors += bv.bv_len >> 9; continue; } new_segment: @@ -104,23 +101,14 @@ new_segment: nsegs++; bvprv = bv; - prev = 1; + bvprvp = &bv; seg_size = bv.bv_len; + sectors += bv.bv_len >> 9; } return NULL; split: - split = bio_clone_bioset(bio, GFP_NOIO, bs); - - split->bi_iter.bi_size -= iter.bi_size; - bio->bi_iter = iter; - - if (bio_integrity(bio)) { - bio_integrity_advance(bio, split->bi_iter.bi_size); - bio_integrity_trim(split, 0, bio_sectors(split)); - } - - return split; + return bio_split(bio, sectors, GFP_NOIO, bs); } void blk_queue_split(struct request_queue *q, struct bio **bio, @@ -439,6 +427,11 @@ no_merge: int ll_back_merge_fn(struct request_queue *q, struct request *req, struct bio *bio) { + if (req_gap_back_merge(req, bio)) + return 0; + if (blk_integrity_rq(req) && + integrity_req_gap_back_merge(req, bio)) + return 0; if (blk_rq_sectors(req) + bio_sectors(bio) > blk_rq_get_max_sectors(req)) { req->cmd_flags |= REQ_NOMERGE; @@ -457,6 +450,12 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, int ll_front_merge_fn(struct request_queue *q, struct request *req, struct bio *bio) { + + if (req_gap_front_merge(req, bio)) + return 0; + if (blk_integrity_rq(req) && + integrity_req_gap_front_merge(req, bio)) + return 0; if (blk_rq_sectors(req) + bio_sectors(bio) > blk_rq_get_max_sectors(req)) { req->cmd_flags |= REQ_NOMERGE; @@ -483,14 +482,6 @@ static bool req_no_special_merge(struct request *req) return !q->mq_ops && req->special; } -static int req_gap_to_prev(struct request *req, struct bio *next) -{ - struct bio *prev = req->biotail; - - return bvec_gap_to_prev(req->q, &prev->bi_io_vec[prev->bi_vcnt - 1], - next->bi_io_vec[0].bv_offset); -} - static int ll_merge_requests_fn(struct request_queue *q, struct request *req, struct request *next) { @@ -505,7 +496,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, if (req_no_special_merge(req) || req_no_special_merge(next)) return 0; - if (req_gap_to_prev(req, next->bio)) + if (req_gap_back_merge(req, next->bio)) return 0; /* @@ -713,10 +704,6 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) !blk_write_same_mergeable(rq->bio, bio)) return false; - /* Only check gaps if the bio carries data */ - if (bio_has_data(bio) && req_gap_to_prev(rq, bio)) - return false; - return true; } diff --git a/block/bounce.c b/block/bounce.c index 0611aea1cfe9..1cb5dd3a5da1 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -128,12 +128,14 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool) struct bio *bio_orig = bio->bi_private; struct bio_vec *bvec, *org_vec; int i; + int start = bio_orig->bi_iter.bi_idx; /* * free up bounce indirect pages used */ bio_for_each_segment_all(bvec, bio, i) { - org_vec = bio_orig->bi_io_vec + i; + org_vec = bio_orig->bi_io_vec + i + start; + if (bvec->bv_page == org_vec->bv_page) continue; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 35c2de136971..fa18753f5c34 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -940,6 +940,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc, char *xbuf[XBUFSIZE]; char *xoutbuf[XBUFSIZE]; int ret = -ENOMEM; + unsigned int ivsize = crypto_skcipher_ivsize(tfm); if (testmgr_alloc_buf(xbuf)) goto out_nobuf; @@ -975,7 +976,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc, continue; if (template[i].iv) - memcpy(iv, template[i].iv, MAX_IVLEN); + memcpy(iv, template[i].iv, ivsize); else memset(iv, 0, MAX_IVLEN); @@ -1051,7 +1052,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc, continue; if (template[i].iv) - memcpy(iv, template[i].iv, MAX_IVLEN); + memcpy(iv, template[i].iv, ivsize); else memset(iv, 0, MAX_IVLEN); diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c index 1857a5dd0816..134483daac25 100644 --- a/drivers/base/platform-msi.c +++ b/drivers/base/platform-msi.c @@ -63,20 +63,8 @@ static int platform_msi_init(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq, msi_alloc_info_t *arg) { - struct irq_data *data; - - irq_domain_set_hwirq_and_chip(domain, virq, hwirq, - info->chip, info->chip_data); - - /* - * Save the MSI descriptor in handler_data so that the - * irq_write_msi_msg callback can retrieve it (and the - * associated device). - */ - data = irq_domain_get_irq_data(domain, virq); - data->handler_data = arg->desc; - - return 0; + return irq_domain_set_hwirq_and_chip(domain, virq, hwirq, + info->chip, info->chip_data); } #else #define platform_msi_set_desc NULL @@ -97,7 +85,7 @@ static void platform_msi_update_dom_ops(struct msi_domain_info *info) static void platform_msi_write_msg(struct irq_data *data, struct msi_msg *msg) { - struct msi_desc *desc = irq_data_get_irq_handler_data(data); + struct msi_desc *desc = irq_data_get_msi_desc(data); struct platform_msi_priv_data *priv_data; priv_data = desc->platform.msi_priv_data; diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 17269a3b85f2..a295b98c6bae 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -406,6 +406,22 @@ static struct blk_mq_ops null_mq_ops = { .complete = null_softirq_done_fn, }; +static void cleanup_queue(struct nullb_queue *nq) +{ + kfree(nq->tag_map); + kfree(nq->cmds); +} + +static void cleanup_queues(struct nullb *nullb) +{ + int i; + + for (i = 0; i < nullb->nr_queues; i++) + cleanup_queue(&nullb->queues[i]); + + kfree(nullb->queues); +} + static void null_del_dev(struct nullb *nullb) { list_del_init(&nullb->list); @@ -415,6 +431,7 @@ static void null_del_dev(struct nullb *nullb) if (queue_mode == NULL_Q_MQ) blk_mq_free_tag_set(&nullb->tag_set); put_disk(nullb->disk); + cleanup_queues(nullb); kfree(nullb); } @@ -459,22 +476,6 @@ static int setup_commands(struct nullb_queue *nq) return 0; } -static void cleanup_queue(struct nullb_queue *nq) -{ - kfree(nq->tag_map); - kfree(nq->cmds); -} - -static void cleanup_queues(struct nullb *nullb) -{ - int i; - - for (i = 0; i < nullb->nr_queues; i++) - cleanup_queue(&nullb->queues[i]); - - kfree(nullb->queues); -} - static int setup_queues(struct nullb *nullb) { nullb->queues = kzalloc(submit_queues * sizeof(struct nullb_queue), @@ -588,8 +589,7 @@ static int null_add_dev(void) blk_queue_physical_block_size(nullb->q, bs); size = gb * 1024 * 1024 * 1024ULL; - sector_div(size, bs); - set_capacity(disk, size); + set_capacity(disk, size >> 9); disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO; disk->major = null_major; diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 965d1afb0eaa..5cb13ca3a3ac 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -330,12 +330,14 @@ void zcomp_destroy(struct zcomp *comp) * allocate new zcomp and initialize it. return compressing * backend pointer or ERR_PTR if things went bad. ERR_PTR(-EINVAL) * if requested algorithm is not supported, ERR_PTR(-ENOMEM) in - * case of allocation error. + * case of allocation error, or any other error potentially + * returned by functions zcomp_strm_{multi,single}_create. */ struct zcomp *zcomp_create(const char *compress, int max_strm) { struct zcomp *comp; struct zcomp_backend *backend; + int error; backend = find_backend(compress); if (!backend) @@ -347,12 +349,12 @@ struct zcomp *zcomp_create(const char *compress, int max_strm) comp->backend = backend; if (max_strm > 1) - zcomp_strm_multi_create(comp, max_strm); + error = zcomp_strm_multi_create(comp, max_strm); else - zcomp_strm_single_create(comp); - if (!comp->stream) { + error = zcomp_strm_single_create(comp); + if (error) { kfree(comp); - return ERR_PTR(-ENOMEM); + return ERR_PTR(error); } return comp; } diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 43e2c3ad6c31..0ebcf449778a 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -2437,7 +2437,8 @@ static int __clk_init(struct device *dev, struct clk *clk_user) hlist_for_each_entry_safe(orphan, tmp2, &clk_orphan_list, child_node) { if (orphan->num_parents && orphan->ops->get_parent) { i = orphan->ops->get_parent(orphan->hw); - if (!strcmp(core->name, orphan->parent_names[i])) + if (i >= 0 && i < orphan->num_parents && + !strcmp(core->name, orphan->parent_names[i])) clk_core_reparent(orphan, core); continue; } diff --git a/drivers/clk/h8300/clk-h8s2678.c b/drivers/clk/h8300/clk-h8s2678.c index 2a38eb4a2552..6cf38dc1c929 100644 --- a/drivers/clk/h8300/clk-h8s2678.c +++ b/drivers/clk/h8300/clk-h8s2678.c @@ -8,6 +8,7 @@ #include #include #include +#include static DEFINE_SPINLOCK(clklock); diff --git a/drivers/clk/hisilicon/Kconfig b/drivers/clk/hisilicon/Kconfig index 2c16807341dc..e43485448612 100644 --- a/drivers/clk/hisilicon/Kconfig +++ b/drivers/clk/hisilicon/Kconfig @@ -1,6 +1,12 @@ config COMMON_CLK_HI6220 bool "Hi6220 Clock Driver" - depends on (ARCH_HISI || COMPILE_TEST) && MAILBOX + depends on ARCH_HISI || COMPILE_TEST default ARCH_HISI help Build the Hisilicon Hi6220 clock driver based on the common clock framework. + +config STUB_CLK_HI6220 + bool "Hi6220 Stub Clock Driver" + depends on COMMON_CLK_HI6220 && MAILBOX + help + Build the Hisilicon Hi6220 stub clock driver. diff --git a/drivers/clk/hisilicon/Makefile b/drivers/clk/hisilicon/Makefile index 4a1001a11f04..74dba31590f9 100644 --- a/drivers/clk/hisilicon/Makefile +++ b/drivers/clk/hisilicon/Makefile @@ -7,4 +7,5 @@ obj-y += clk.o clkgate-separated.o clkdivider-hi6220.o obj-$(CONFIG_ARCH_HI3xxx) += clk-hi3620.o obj-$(CONFIG_ARCH_HIP04) += clk-hip04.o obj-$(CONFIG_ARCH_HIX5HD2) += clk-hix5hd2.o -obj-$(CONFIG_COMMON_CLK_HI6220) += clk-hi6220.o clk-hi6220-stub.o +obj-$(CONFIG_COMMON_CLK_HI6220) += clk-hi6220.o +obj-$(CONFIG_STUB_CLK_HI6220) += clk-hi6220-stub.o diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c index ed02bbc7b11f..abb47608713b 100644 --- a/drivers/clk/rockchip/clk-rk3188.c +++ b/drivers/clk/rockchip/clk-rk3188.c @@ -716,6 +716,8 @@ static const char *const rk3188_critical_clocks[] __initconst = { "aclk_cpu", "aclk_peri", "hclk_peri", + "pclk_cpu", + "pclk_peri", }; static void __init rk3188_common_clk_init(struct device_node *np) @@ -744,8 +746,6 @@ static void __init rk3188_common_clk_init(struct device_node *np) rockchip_clk_register_branches(common_clk_branches, ARRAY_SIZE(common_clk_branches)); - rockchip_clk_protect_critical(rk3188_critical_clocks, - ARRAY_SIZE(rk3188_critical_clocks)); rockchip_register_softrst(np, 9, reg_base + RK2928_SOFTRST_CON(0), ROCKCHIP_SOFTRST_HIWORD_MASK); @@ -765,6 +765,8 @@ static void __init rk3066a_clk_init(struct device_node *np) mux_armclk_p, ARRAY_SIZE(mux_armclk_p), &rk3066_cpuclk_data, rk3066_cpuclk_rates, ARRAY_SIZE(rk3066_cpuclk_rates)); + rockchip_clk_protect_critical(rk3188_critical_clocks, + ARRAY_SIZE(rk3188_critical_clocks)); } CLK_OF_DECLARE(rk3066a_cru, "rockchip,rk3066a-cru", rk3066a_clk_init); @@ -801,6 +803,9 @@ static void __init rk3188a_clk_init(struct device_node *np) pr_warn("%s: missing clocks to reparent aclk_cpu_pre to gpll\n", __func__); } + + rockchip_clk_protect_critical(rk3188_critical_clocks, + ARRAY_SIZE(rk3188_critical_clocks)); } CLK_OF_DECLARE(rk3188a_cru, "rockchip,rk3188a-cru", rk3188a_clk_init); diff --git a/drivers/clk/rockchip/clk-rk3368.c b/drivers/clk/rockchip/clk-rk3368.c index 9c5d61e698ef..7e6b783e6eee 100644 --- a/drivers/clk/rockchip/clk-rk3368.c +++ b/drivers/clk/rockchip/clk-rk3368.c @@ -818,6 +818,10 @@ static struct rockchip_clk_branch rk3368_clk_branches[] __initdata = { GATE(0, "sclk_timer00", "xin24m", CLK_IGNORE_UNUSED, RK3368_CLKGATE_CON(24), 0, GFLAGS), }; +static const char *const rk3368_critical_clocks[] __initconst = { + "pclk_pd_pmu", +}; + static void __init rk3368_clk_init(struct device_node *np) { void __iomem *reg_base; @@ -862,6 +866,8 @@ static void __init rk3368_clk_init(struct device_node *np) RK3368_GRF_SOC_STATUS0); rockchip_clk_register_branches(rk3368_clk_branches, ARRAY_SIZE(rk3368_clk_branches)); + rockchip_clk_protect_critical(rk3368_critical_clocks, + ARRAY_SIZE(rk3368_critical_clocks)); rockchip_clk_register_armclk(ARMCLKB, "armclkb", mux_armclkb_p, ARRAY_SIZE(mux_armclkb_p), diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c index 83ccf142ff2a..576cd0354d48 100644 --- a/drivers/clk/st/clkgen-fsyn.c +++ b/drivers/clk/st/clkgen-fsyn.c @@ -307,7 +307,7 @@ static const struct clkgen_quadfs_data st_fs660c32_F_416 = { .get_rate = clk_fs660c32_dig_get_rate, }; -static const struct clkgen_quadfs_data st_fs660c32_C_407 = { +static const struct clkgen_quadfs_data st_fs660c32_C = { .nrst_present = true, .nrst = { CLKGEN_FIELD(0x2f0, 0x1, 0), CLKGEN_FIELD(0x2f0, 0x1, 1), @@ -350,7 +350,7 @@ static const struct clkgen_quadfs_data st_fs660c32_C_407 = { .get_rate = clk_fs660c32_dig_get_rate, }; -static const struct clkgen_quadfs_data st_fs660c32_D_407 = { +static const struct clkgen_quadfs_data st_fs660c32_D = { .nrst_present = true, .nrst = { CLKGEN_FIELD(0x2a0, 0x1, 0), CLKGEN_FIELD(0x2a0, 0x1, 1), @@ -1077,11 +1077,11 @@ static const struct of_device_id quadfs_of_match[] = { }, { .compatible = "st,stih407-quadfs660-C", - .data = &st_fs660c32_C_407 + .data = &st_fs660c32_C }, { .compatible = "st,stih407-quadfs660-D", - .data = &st_fs660c32_D_407 + .data = &st_fs660c32_D }, {} }; diff --git a/drivers/clk/st/clkgen-pll.c b/drivers/clk/st/clkgen-pll.c index 47a38a994cac..b2a332cf8985 100644 --- a/drivers/clk/st/clkgen-pll.c +++ b/drivers/clk/st/clkgen-pll.c @@ -193,7 +193,7 @@ static const struct clkgen_pll_data st_pll3200c32_407_a0 = { .ops = &stm_pll3200c32_ops, }; -static const struct clkgen_pll_data st_pll3200c32_407_c0_0 = { +static const struct clkgen_pll_data st_pll3200c32_cx_0 = { /* 407 C0 PLL0 */ .pdn_status = CLKGEN_FIELD(0x2a0, 0x1, 8), .locked_status = CLKGEN_FIELD(0x2a0, 0x1, 24), @@ -205,7 +205,7 @@ static const struct clkgen_pll_data st_pll3200c32_407_c0_0 = { .ops = &stm_pll3200c32_ops, }; -static const struct clkgen_pll_data st_pll3200c32_407_c0_1 = { +static const struct clkgen_pll_data st_pll3200c32_cx_1 = { /* 407 C0 PLL1 */ .pdn_status = CLKGEN_FIELD(0x2c8, 0x1, 8), .locked_status = CLKGEN_FIELD(0x2c8, 0x1, 24), @@ -624,12 +624,12 @@ static const struct of_device_id c32_pll_of_match[] = { .data = &st_pll3200c32_407_a0, }, { - .compatible = "st,stih407-plls-c32-c0_0", - .data = &st_pll3200c32_407_c0_0, + .compatible = "st,plls-c32-cx_0", + .data = &st_pll3200c32_cx_0, }, { - .compatible = "st,stih407-plls-c32-c0_1", - .data = &st_pll3200c32_407_c0_1, + .compatible = "st,plls-c32-cx_1", + .data = &st_pll3200c32_cx_1, }, { .compatible = "st,stih407-plls-c32-a9", diff --git a/drivers/clk/tegra/clk-dfll.c b/drivers/clk/tegra/clk-dfll.c index c2ff859ee0e8..c4e3a52e225b 100644 --- a/drivers/clk/tegra/clk-dfll.c +++ b/drivers/clk/tegra/clk-dfll.c @@ -682,11 +682,17 @@ static int find_lut_index_for_rate(struct tegra_dfll *td, unsigned long rate) struct dev_pm_opp *opp; int i, uv; + rcu_read_lock(); + opp = dev_pm_opp_find_freq_ceil(td->soc->dev, &rate); - if (IS_ERR(opp)) + if (IS_ERR(opp)) { + rcu_read_unlock(); return PTR_ERR(opp); + } uv = dev_pm_opp_get_voltage(opp); + rcu_read_unlock(); + for (i = 0; i < td->i2c_lut_size; i++) { if (regulator_list_voltage(td->vdd_reg, td->i2c_lut[i]) == uv) return i; diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 07bc7aa6b224..d234719065a5 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -461,7 +461,7 @@ config CRYPTO_DEV_QCE config CRYPTO_DEV_VMX bool "Support for VMX cryptographic acceleration instructions" - depends on PPC64 + depends on PPC64 && VSX help Support for VMX cryptographic acceleration instructions. diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c index e070c316e8b7..a19ee127edca 100644 --- a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c +++ b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c @@ -104,7 +104,7 @@ static int sun4i_ss_opti_poll(struct ablkcipher_request *areq) sg_miter_next(&mo); oo = 0; } - } while (mo.length > 0); + } while (oleft > 0); if (areq->info) { for (i = 0; i < 4 && i < ivsize / 4; i++) { diff --git a/drivers/dma/ipu/ipu_irq.c b/drivers/dma/ipu/ipu_irq.c index 4768a829253a..2bf37e68ad0f 100644 --- a/drivers/dma/ipu/ipu_irq.c +++ b/drivers/dma/ipu/ipu_irq.c @@ -266,7 +266,7 @@ int ipu_irq_unmap(unsigned int source) } /* Chained IRQ handler for IPU function and error interrupt */ -static void ipu_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void ipu_irq_handler(struct irq_desc *desc) { struct ipu *ipu = irq_desc_get_handler_data(desc); u32 status; diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index b4fc9e4d24c6..8949b3f6f74d 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -356,7 +356,7 @@ config GPIO_PXA config GPIO_RCAR tristate "Renesas R-Car GPIO" - depends on ARM && (ARCH_SHMOBILE || COMPILE_TEST) + depends on ARCH_SHMOBILE || COMPILE_TEST select GPIOLIB_IRQCHIP help Say yes here to support GPIO on Renesas R-Car SoCs. diff --git a/drivers/gpio/gpio-altera.c b/drivers/gpio/gpio-altera.c index 9b7e0b3db387..1b44941574fa 100644 --- a/drivers/gpio/gpio-altera.c +++ b/drivers/gpio/gpio-altera.c @@ -201,8 +201,7 @@ static int altera_gpio_direction_output(struct gpio_chip *gc, return 0; } -static void altera_gpio_irq_edge_handler(unsigned int irq, - struct irq_desc *desc) +static void altera_gpio_irq_edge_handler(struct irq_desc *desc) { struct altera_gpio_chip *altera_gc; struct irq_chip *chip; @@ -231,8 +230,7 @@ static void altera_gpio_irq_edge_handler(unsigned int irq, } -static void altera_gpio_irq_leveL_high_handler(unsigned int irq, - struct irq_desc *desc) +static void altera_gpio_irq_leveL_high_handler(struct irq_desc *desc) { struct altera_gpio_chip *altera_gc; struct irq_chip *chip; diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c index 31b90ac15204..33a1f9779b86 100644 --- a/drivers/gpio/gpio-bcm-kona.c +++ b/drivers/gpio/gpio-bcm-kona.c @@ -433,7 +433,7 @@ static int bcm_kona_gpio_irq_set_type(struct irq_data *d, unsigned int type) return 0; } -static void bcm_kona_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) +static void bcm_kona_gpio_irq_handler(struct irq_desc *desc) { void __iomem *reg_base; int bit, bank_id; diff --git a/drivers/gpio/gpio-brcmstb.c b/drivers/gpio/gpio-brcmstb.c index 9ea86d2ac054..4c64627c6bb5 100644 --- a/drivers/gpio/gpio-brcmstb.c +++ b/drivers/gpio/gpio-brcmstb.c @@ -236,7 +236,7 @@ static void brcmstb_gpio_irq_bank_handler(struct brcmstb_gpio_bank *bank) } /* Each UPG GIO block has one IRQ for all banks */ -static void brcmstb_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) +static void brcmstb_gpio_irq_handler(struct irq_desc *desc) { struct gpio_chip *gc = irq_desc_get_handler_data(desc); struct brcmstb_gpio_priv *priv = brcmstb_gpio_gc_to_priv(gc); diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c index 94b0ab709721..5e715388803d 100644 --- a/drivers/gpio/gpio-davinci.c +++ b/drivers/gpio/gpio-davinci.c @@ -326,8 +326,7 @@ static struct irq_chip gpio_irqchip = { .flags = IRQCHIP_SET_TYPE_MASKED, }; -static void -gpio_irq_handler(unsigned __irq, struct irq_desc *desc) +static void gpio_irq_handler(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct davinci_gpio_regs __iomem *g; diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index c5be4b9b8baf..fcd5b0acfc72 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -147,7 +147,7 @@ static u32 dwapb_do_irq(struct dwapb_gpio *gpio) return ret; } -static void dwapb_irq_handler(u32 irq, struct irq_desc *desc) +static void dwapb_irq_handler(struct irq_desc *desc) { struct dwapb_gpio *gpio = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); diff --git a/drivers/gpio/gpio-ep93xx.c b/drivers/gpio/gpio-ep93xx.c index 9d90366ea259..3e3947b35c83 100644 --- a/drivers/gpio/gpio-ep93xx.c +++ b/drivers/gpio/gpio-ep93xx.c @@ -78,7 +78,7 @@ static void ep93xx_gpio_int_debounce(unsigned int irq, bool enable) EP93XX_GPIO_REG(int_debounce_register_offset[port])); } -static void ep93xx_gpio_ab_irq_handler(unsigned int irq, struct irq_desc *desc) +static void ep93xx_gpio_ab_irq_handler(struct irq_desc *desc) { unsigned char status; int i; @@ -100,8 +100,7 @@ static void ep93xx_gpio_ab_irq_handler(unsigned int irq, struct irq_desc *desc) } } -static void ep93xx_gpio_f_irq_handler(unsigned int __irq, - struct irq_desc *desc) +static void ep93xx_gpio_f_irq_handler(struct irq_desc *desc) { /* * map discontiguous hw irq range to continuous sw irq range: diff --git a/drivers/gpio/gpio-intel-mid.c b/drivers/gpio/gpio-intel-mid.c index aa28c65eb6b4..70097472b02c 100644 --- a/drivers/gpio/gpio-intel-mid.c +++ b/drivers/gpio/gpio-intel-mid.c @@ -301,7 +301,7 @@ static const struct pci_device_id intel_gpio_ids[] = { }; MODULE_DEVICE_TABLE(pci, intel_gpio_ids); -static void intel_mid_irq_handler(unsigned irq, struct irq_desc *desc) +static void intel_mid_irq_handler(struct irq_desc *desc) { struct gpio_chip *gc = irq_desc_get_handler_data(desc); struct intel_mid_gpio *priv = to_intel_gpio_priv(gc); diff --git a/drivers/gpio/gpio-lynxpoint.c b/drivers/gpio/gpio-lynxpoint.c index 153af464c7a7..127c37b380ae 100644 --- a/drivers/gpio/gpio-lynxpoint.c +++ b/drivers/gpio/gpio-lynxpoint.c @@ -234,7 +234,7 @@ static int lp_gpio_direction_output(struct gpio_chip *chip, return 0; } -static void lp_gpio_irq_handler(unsigned hwirq, struct irq_desc *desc) +static void lp_gpio_irq_handler(struct irq_desc *desc) { struct irq_data *data = irq_desc_get_irq_data(desc); struct gpio_chip *gc = irq_desc_get_handler_data(desc); diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c index 8ef7a12de983..48ef368347ab 100644 --- a/drivers/gpio/gpio-mpc8xxx.c +++ b/drivers/gpio/gpio-mpc8xxx.c @@ -194,7 +194,7 @@ static int mpc8xxx_gpio_to_irq(struct gpio_chip *gc, unsigned offset) return -ENXIO; } -static void mpc8xxx_gpio_irq_cascade(unsigned int irq, struct irq_desc *desc) +static void mpc8xxx_gpio_irq_cascade(struct irq_desc *desc) { struct mpc8xxx_gpio_chip *mpc8xxx_gc = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); diff --git a/drivers/gpio/gpio-msic.c b/drivers/gpio/gpio-msic.c index 7bcfb87a5fa6..22523aae8abe 100644 --- a/drivers/gpio/gpio-msic.c +++ b/drivers/gpio/gpio-msic.c @@ -232,7 +232,7 @@ static struct irq_chip msic_irqchip = { .irq_bus_sync_unlock = msic_bus_sync_unlock, }; -static void msic_gpio_irq_handler(unsigned irq, struct irq_desc *desc) +static void msic_gpio_irq_handler(struct irq_desc *desc) { struct irq_data *data = irq_desc_get_irq_data(desc); struct msic_gpio *mg = irq_data_get_irq_handler_data(data); diff --git a/drivers/gpio/gpio-msm-v2.c b/drivers/gpio/gpio-msm-v2.c index d2012cfb5571..4b4222145f10 100644 --- a/drivers/gpio/gpio-msm-v2.c +++ b/drivers/gpio/gpio-msm-v2.c @@ -305,7 +305,7 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int flow_type) * which have been set as summary IRQ lines and which are triggered, * and to call their interrupt handlers. */ -static void msm_summary_irq_handler(unsigned int irq, struct irq_desc *desc) +static void msm_summary_irq_handler(struct irq_desc *desc) { unsigned long i; struct irq_chip *chip = irq_desc_get_chip(desc); diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index b396bf3bf294..df418b81456d 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -458,7 +458,7 @@ static int mvebu_gpio_irq_set_type(struct irq_data *d, unsigned int type) return 0; } -static void mvebu_gpio_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void mvebu_gpio_irq_handler(struct irq_desc *desc) { struct mvebu_gpio_chip *mvchip = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c index b752b560126e..b8dd847443c5 100644 --- a/drivers/gpio/gpio-mxc.c +++ b/drivers/gpio/gpio-mxc.c @@ -272,7 +272,7 @@ static void mxc_gpio_irq_handler(struct mxc_gpio_port *port, u32 irq_stat) } /* MX1 and MX3 has one interrupt *per* gpio port */ -static void mx3_gpio_irq_handler(u32 irq, struct irq_desc *desc) +static void mx3_gpio_irq_handler(struct irq_desc *desc) { u32 irq_stat; struct mxc_gpio_port *port = irq_desc_get_handler_data(desc); @@ -288,7 +288,7 @@ static void mx3_gpio_irq_handler(u32 irq, struct irq_desc *desc) } /* MX2 has one interrupt *for all* gpio ports */ -static void mx2_gpio_irq_handler(u32 irq, struct irq_desc *desc) +static void mx2_gpio_irq_handler(struct irq_desc *desc) { u32 irq_msk, irq_stat; struct mxc_gpio_port *port; @@ -339,13 +339,15 @@ static int gpio_set_wake_irq(struct irq_data *d, u32 enable) return 0; } -static void mxc_gpio_init_gc(struct mxc_gpio_port *port, int irq_base) +static int mxc_gpio_init_gc(struct mxc_gpio_port *port, int irq_base) { struct irq_chip_generic *gc; struct irq_chip_type *ct; gc = irq_alloc_generic_chip("gpio-mxc", 1, irq_base, port->base, handle_level_irq); + if (!gc) + return -ENOMEM; gc->private = port; ct = gc->chip_types; @@ -360,6 +362,8 @@ static void mxc_gpio_init_gc(struct mxc_gpio_port *port, int irq_base) irq_setup_generic_chip(gc, IRQ_MSK(32), IRQ_GC_INIT_NESTED_LOCK, IRQ_NOREQUEST, 0); + + return 0; } static void mxc_gpio_get_hw(struct platform_device *pdev) @@ -477,12 +481,16 @@ static int mxc_gpio_probe(struct platform_device *pdev) } /* gpio-mxc can be a generic irq chip */ - mxc_gpio_init_gc(port, irq_base); + err = mxc_gpio_init_gc(port, irq_base); + if (err < 0) + goto out_irqdomain_remove; list_add_tail(&port->node, &mxc_gpio_ports); return 0; +out_irqdomain_remove: + irq_domain_remove(port->domain); out_irqdesc_free: irq_free_descs(irq_base, 32); out_gpiochip_remove: diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c index b7f383eb18d9..a4288f428819 100644 --- a/drivers/gpio/gpio-mxs.c +++ b/drivers/gpio/gpio-mxs.c @@ -154,7 +154,7 @@ static void mxs_flip_edge(struct mxs_gpio_port *port, u32 gpio) } /* MXS has one interrupt *per* gpio port */ -static void mxs_gpio_irq_handler(u32 irq, struct irq_desc *desc) +static void mxs_gpio_irq_handler(struct irq_desc *desc) { u32 irq_stat; struct mxs_gpio_port *port = irq_desc_get_handler_data(desc); @@ -196,13 +196,16 @@ static int mxs_gpio_set_wake_irq(struct irq_data *d, unsigned int enable) return 0; } -static void __init mxs_gpio_init_gc(struct mxs_gpio_port *port, int irq_base) +static int __init mxs_gpio_init_gc(struct mxs_gpio_port *port, int irq_base) { struct irq_chip_generic *gc; struct irq_chip_type *ct; gc = irq_alloc_generic_chip("gpio-mxs", 1, irq_base, port->base, handle_level_irq); + if (!gc) + return -ENOMEM; + gc->private = port; ct = gc->chip_types; @@ -216,6 +219,8 @@ static void __init mxs_gpio_init_gc(struct mxs_gpio_port *port, int irq_base) irq_setup_generic_chip(gc, IRQ_MSK(32), IRQ_GC_INIT_NESTED_LOCK, IRQ_NOREQUEST, 0); + + return 0; } static int mxs_gpio_to_irq(struct gpio_chip *gc, unsigned offset) @@ -317,7 +322,9 @@ static int mxs_gpio_probe(struct platform_device *pdev) } /* gpio-mxs can be a generic irq chip */ - mxs_gpio_init_gc(port, irq_base); + err = mxs_gpio_init_gc(port, irq_base); + if (err < 0) + goto out_irqdomain_remove; /* setup one handler for each entry */ irq_set_chained_handler_and_data(port->irq, mxs_gpio_irq_handler, @@ -343,6 +350,8 @@ static int mxs_gpio_probe(struct platform_device *pdev) out_bgpio_remove: bgpio_remove(&port->bgc); +out_irqdomain_remove: + irq_domain_remove(port->domain); out_irqdesc_free: irq_free_descs(irq_base, 32); return err; diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 2ae0d47e9554..5236db161e76 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -709,7 +709,7 @@ static void omap_gpio_free(struct gpio_chip *chip, unsigned offset) * line's interrupt handler has been run, we may miss some nested * interrupts. */ -static void omap_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) +static void omap_gpio_irq_handler(struct irq_desc *desc) { void __iomem *isr_reg = NULL; u32 isr; @@ -1098,7 +1098,6 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc) } else { bank->chip.label = "gpio"; bank->chip.base = gpio; - gpio += bank->width; } bank->chip.ngpio = bank->width; @@ -1108,6 +1107,9 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc) return ret; } + if (!bank->is_mpuio) + gpio += bank->width; + #ifdef CONFIG_ARCH_OMAP1 /* * REVISIT: Once we have OMAP1 supporting SPARSE_IRQ, we can drop @@ -1253,8 +1255,11 @@ static int omap_gpio_probe(struct platform_device *pdev) omap_gpio_mod_init(bank); ret = omap_gpio_chip_init(bank, irqc); - if (ret) + if (ret) { + pm_runtime_put_sync(bank->dev); + pm_runtime_disable(bank->dev); return ret; + } omap_gpio_show_rev(bank); diff --git a/drivers/gpio/gpio-pl061.c b/drivers/gpio/gpio-pl061.c index 04756130437f..229ef653e0f8 100644 --- a/drivers/gpio/gpio-pl061.c +++ b/drivers/gpio/gpio-pl061.c @@ -187,7 +187,7 @@ static int pl061_irq_type(struct irq_data *d, unsigned trigger) return 0; } -static void pl061_irq_handler(unsigned irq, struct irq_desc *desc) +static void pl061_irq_handler(struct irq_desc *desc) { unsigned long pending; int offset; diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c index 55a11de3d5b7..df2ce550f309 100644 --- a/drivers/gpio/gpio-pxa.c +++ b/drivers/gpio/gpio-pxa.c @@ -401,7 +401,7 @@ static int pxa_gpio_irq_type(struct irq_data *d, unsigned int type) return 0; } -static void pxa_gpio_demux_handler(unsigned int irq, struct irq_desc *desc) +static void pxa_gpio_demux_handler(struct irq_desc *desc) { struct pxa_gpio_chip *c; int loop, gpio, gpio_base, n; diff --git a/drivers/gpio/gpio-sa1100.c b/drivers/gpio/gpio-sa1100.c index 67bd2f5d89e8..990fa9023e22 100644 --- a/drivers/gpio/gpio-sa1100.c +++ b/drivers/gpio/gpio-sa1100.c @@ -172,8 +172,7 @@ static struct irq_domain *sa1100_gpio_irqdomain; * irq_controller_lock held, and IRQs disabled. Decode the IRQ * and call the handler. */ -static void -sa1100_gpio_handler(unsigned int __irq, struct irq_desc *desc) +static void sa1100_gpio_handler(struct irq_desc *desc) { unsigned int irq, mask; diff --git a/drivers/gpio/gpio-sx150x.c b/drivers/gpio/gpio-sx150x.c index 458d9d7952b8..9c6b96707c9f 100644 --- a/drivers/gpio/gpio-sx150x.c +++ b/drivers/gpio/gpio-sx150x.c @@ -706,4 +706,3 @@ module_exit(sx150x_exit); MODULE_AUTHOR("Gregory Bean "); MODULE_DESCRIPTION("Driver for Semtech SX150X I2C GPIO Expanders"); MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("i2c:sx150x"); diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c index 9b14aafb576d..027e5f47dd28 100644 --- a/drivers/gpio/gpio-tegra.c +++ b/drivers/gpio/gpio-tegra.c @@ -266,7 +266,7 @@ static void tegra_gpio_irq_shutdown(struct irq_data *d) gpiochip_unlock_as_irq(&tegra_gpio_chip, gpio); } -static void tegra_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) +static void tegra_gpio_irq_handler(struct irq_desc *desc) { int port; int pin; diff --git a/drivers/gpio/gpio-timberdale.c b/drivers/gpio/gpio-timberdale.c index 5a492054589f..30653e6319e9 100644 --- a/drivers/gpio/gpio-timberdale.c +++ b/drivers/gpio/gpio-timberdale.c @@ -192,7 +192,7 @@ out: return ret; } -static void timbgpio_irq(unsigned int irq, struct irq_desc *desc) +static void timbgpio_irq(struct irq_desc *desc) { struct timbgpio *tgpio = irq_desc_get_handler_data(desc); struct irq_data *data = irq_desc_get_irq_data(desc); diff --git a/drivers/gpio/gpio-tz1090.c b/drivers/gpio/gpio-tz1090.c index bbac92ae4c32..87bb1b1eee8d 100644 --- a/drivers/gpio/gpio-tz1090.c +++ b/drivers/gpio/gpio-tz1090.c @@ -375,7 +375,7 @@ static int gpio_set_irq_wake(struct irq_data *data, unsigned int on) #define gpio_set_irq_wake NULL #endif -static void tz1090_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) +static void tz1090_gpio_irq_handler(struct irq_desc *desc) { irq_hw_number_t hw; unsigned int irq_stat, irq_no; @@ -400,7 +400,7 @@ static void tz1090_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) == IRQ_TYPE_EDGE_BOTH) tz1090_gpio_irq_next_edge(bank, hw); - generic_handle_irq_desc(irq_no, child_desc); + generic_handle_irq_desc(child_desc); } } diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index 3d5714d4f405..069f9e4b7daa 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -120,7 +120,7 @@ static int vf610_gpio_direction_output(struct gpio_chip *chip, unsigned gpio, return pinctrl_gpio_direction_output(chip->base + gpio); } -static void vf610_gpio_irq_handler(u32 irq, struct irq_desc *desc) +static void vf610_gpio_irq_handler(struct irq_desc *desc) { struct vf610_gpio_port *port = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); @@ -176,9 +176,9 @@ static int vf610_gpio_irq_set_type(struct irq_data *d, u32 type) port->irqc[d->hwirq] = irqc; if (type & IRQ_TYPE_LEVEL_MASK) - __irq_set_handler_locked(d->irq, handle_level_irq); + irq_set_handler_locked(d, handle_level_irq); else - __irq_set_handler_locked(d->irq, handle_edge_irq); + irq_set_handler_locked(d, handle_edge_irq); return 0; } diff --git a/drivers/gpio/gpio-zx.c b/drivers/gpio/gpio-zx.c index 12ee1969298c..4b8a26910705 100644 --- a/drivers/gpio/gpio-zx.c +++ b/drivers/gpio/gpio-zx.c @@ -177,7 +177,7 @@ static int zx_irq_type(struct irq_data *d, unsigned trigger) return 0; } -static void zx_irq_handler(unsigned irq, struct irq_desc *desc) +static void zx_irq_handler(struct irq_desc *desc) { unsigned long pending; int offset; diff --git a/drivers/gpio/gpio-zynq.c b/drivers/gpio/gpio-zynq.c index 27348e7cb705..1d1a5865ede9 100644 --- a/drivers/gpio/gpio-zynq.c +++ b/drivers/gpio/gpio-zynq.c @@ -514,7 +514,7 @@ static void zynq_gpio_handle_bank_irq(struct zynq_gpio *gpio, * application for that pin. * Note: A bug is reported if no handler is set for the gpio pin. */ -static void zynq_gpio_irqhandler(unsigned int irq, struct irq_desc *desc) +static void zynq_gpio_irqhandler(struct irq_desc *desc) { u32 int_sts, int_enb; unsigned int bank_num; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 980c1f87866a..5db3445552b1 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1174,15 +1174,16 @@ EXPORT_SYMBOL_GPL(gpiod_is_active_low); * that the GPIO was actually requested. */ -static bool _gpiod_get_raw_value(const struct gpio_desc *desc) +static int _gpiod_get_raw_value(const struct gpio_desc *desc) { struct gpio_chip *chip; - bool value; int offset; + int value; chip = desc->chip; offset = gpio_chip_hwgpio(desc); - value = chip->get ? chip->get(chip, offset) : false; + value = chip->get ? chip->get(chip, offset) : -EIO; + value = value < 0 ? value : !!value; trace_gpio_value(desc_to_gpio(desc), 1, value); return value; } @@ -1192,7 +1193,7 @@ static bool _gpiod_get_raw_value(const struct gpio_desc *desc) * @desc: gpio whose value will be returned * * Return the GPIO's raw value, i.e. the value of the physical line disregarding - * its ACTIVE_LOW status. + * its ACTIVE_LOW status, or negative errno on failure. * * This function should be called from contexts where we cannot sleep, and will * complain if the GPIO chip functions potentially sleep. @@ -1212,7 +1213,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_raw_value); * @desc: gpio whose value will be returned * * Return the GPIO's logical value, i.e. taking the ACTIVE_LOW status into - * account. + * account, or negative errno on failure. * * This function should be called from contexts where we cannot sleep, and will * complain if the GPIO chip functions potentially sleep. @@ -1226,6 +1227,9 @@ int gpiod_get_value(const struct gpio_desc *desc) WARN_ON(desc->chip->can_sleep); value = _gpiod_get_raw_value(desc); + if (value < 0) + return value; + if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) value = !value; @@ -1548,7 +1552,7 @@ EXPORT_SYMBOL_GPL(gpiochip_unlock_as_irq); * @desc: gpio whose value will be returned * * Return the GPIO's raw value, i.e. the value of the physical line disregarding - * its ACTIVE_LOW status. + * its ACTIVE_LOW status, or negative errno on failure. * * This function is to be called from contexts that can sleep. */ @@ -1566,7 +1570,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_raw_value_cansleep); * @desc: gpio whose value will be returned * * Return the GPIO's logical value, i.e. taking the ACTIVE_LOW status into - * account. + * account, or negative errno on failure. * * This function is to be called from contexts that can sleep. */ @@ -1579,6 +1583,9 @@ int gpiod_get_value_cansleep(const struct gpio_desc *desc) return 0; value = _gpiod_get_raw_value(desc); + if (value < 0) + return value; + if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) value = !value; diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c index b1f73bee1368..b0d4b53b97f4 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c @@ -178,7 +178,6 @@ static int mdp5_hw_irqdomain_map(struct irq_domain *d, irq_set_chip_and_handler(irq, &mdp5_hw_irq_chip, handle_level_irq); irq_set_chip_data(irq, mdp5_kms); - set_irq_flags(irq, IRQF_VALID); return 0; } diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c index 243f99a80253..e5a38d202a21 100644 --- a/drivers/gpu/ipu-v3/ipu-common.c +++ b/drivers/gpu/ipu-v3/ipu-common.c @@ -912,7 +912,7 @@ static void ipu_irq_handle(struct ipu_soc *ipu, const int *regs, int num_regs) } } -static void ipu_irq_handler(unsigned int irq, struct irq_desc *desc) +static void ipu_irq_handler(struct irq_desc *desc) { struct ipu_soc *ipu = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); @@ -925,7 +925,7 @@ static void ipu_irq_handler(unsigned int irq, struct irq_desc *desc) chained_irq_exit(chip, desc); } -static void ipu_err_irq_handler(unsigned int irq, struct irq_desc *desc) +static void ipu_err_irq_handler(struct irq_desc *desc) { struct ipu_soc *ipu = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); @@ -1099,8 +1099,7 @@ static int ipu_irq_init(struct ipu_soc *ipu) } ret = irq_alloc_domain_generic_chips(ipu->domain, 32, 1, "IPU", - handle_level_irq, 0, - IRQF_VALID, 0); + handle_level_irq, 0, 0, 0); if (ret < 0) { dev_err(ipu->dev, "failed to alloc generic irq chips\n"); irq_domain_remove(ipu->domain); diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 500b262b89bb..e13c902e8966 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -1140,8 +1140,8 @@ config SENSORS_NCT6775 help If you say yes here you get support for the hardware monitoring functionality of the Nuvoton NCT6106D, NCT6775F, NCT6776F, NCT6779D, - NCT6791D, NCT6792D and compatible Super-I/O chips. This driver - replaces the w83627ehf driver for NCT6775F and NCT6776F. + NCT6791D, NCT6792D, NCT6793D, and compatible Super-I/O chips. This + driver replaces the w83627ehf driver for NCT6775F and NCT6776F. This driver can also be built as a module. If so, the module will be called nct6775. diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index bd1c99deac71..8b4fa55e46c6 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -39,6 +39,7 @@ * nct6779d 15 5 5 2+6 0xc560 0xc1 0x5ca3 * nct6791d 15 6 6 2+6 0xc800 0xc1 0x5ca3 * nct6792d 15 6 6 2+6 0xc910 0xc1 0x5ca3 + * nct6793d 15 6 6 2+6 0xd120 0xc1 0x5ca3 * * #temp lists the number of monitored temperature sources (first value) plus * the number of directly connectable temperature sensors (second value). @@ -63,7 +64,7 @@ #define USE_ALTERNATE -enum kinds { nct6106, nct6775, nct6776, nct6779, nct6791, nct6792 }; +enum kinds { nct6106, nct6775, nct6776, nct6779, nct6791, nct6792, nct6793 }; /* used to set data->name = nct6775_device_names[data->sio_kind] */ static const char * const nct6775_device_names[] = { @@ -73,6 +74,17 @@ static const char * const nct6775_device_names[] = { "nct6779", "nct6791", "nct6792", + "nct6793", +}; + +static const char * const nct6775_sio_names[] __initconst = { + "NCT6106D", + "NCT6775F", + "NCT6776D/F", + "NCT6779D", + "NCT6791D", + "NCT6792D", + "NCT6793D", }; static unsigned short force_id; @@ -104,6 +116,7 @@ MODULE_PARM_DESC(fan_debounce, "Enable debouncing for fan RPM signal"); #define SIO_NCT6779_ID 0xc560 #define SIO_NCT6791_ID 0xc800 #define SIO_NCT6792_ID 0xc910 +#define SIO_NCT6793_ID 0xd120 #define SIO_ID_MASK 0xFFF0 enum pwm_enable { off, manual, thermal_cruise, speed_cruise, sf3, sf4 }; @@ -354,6 +367,10 @@ static const u16 NCT6775_REG_TEMP_CRIT[ARRAY_SIZE(nct6775_temp_label) - 1] /* NCT6776 specific data */ +/* STEP_UP_TIME and STEP_DOWN_TIME regs are swapped for all chips but NCT6775 */ +#define NCT6776_REG_FAN_STEP_UP_TIME NCT6775_REG_FAN_STEP_DOWN_TIME +#define NCT6776_REG_FAN_STEP_DOWN_TIME NCT6775_REG_FAN_STEP_UP_TIME + static const s8 NCT6776_ALARM_BITS[] = { 0, 1, 2, 3, 8, 21, 20, 16, /* in0.. in7 */ 17, -1, -1, -1, -1, -1, -1, /* in8..in14 */ @@ -533,7 +550,7 @@ static const s8 NCT6791_ALARM_BITS[] = { 4, 5, 13, -1, -1, -1, /* temp1..temp6 */ 12, 9 }; /* intrusion0, intrusion1 */ -/* NCT6792 specific data */ +/* NCT6792/NCT6793 specific data */ static const u16 NCT6792_REG_TEMP_MON[] = { 0x73, 0x75, 0x77, 0x79, 0x7b, 0x7d }; @@ -1056,6 +1073,7 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg) case nct6779: case nct6791: case nct6792: + case nct6793: return reg == 0x150 || reg == 0x153 || reg == 0x155 || ((reg & 0xfff0) == 0x4b0 && (reg & 0x000f) < 0x0b) || reg == 0x402 || @@ -1407,6 +1425,7 @@ static void nct6775_update_pwm_limits(struct device *dev) case nct6779: case nct6791: case nct6792: + case nct6793: reg = nct6775_read_value(data, data->REG_CRITICAL_PWM_ENABLE[i]); if (reg & data->CRITICAL_PWM_ENABLE_MASK) @@ -2822,6 +2841,7 @@ store_auto_pwm(struct device *dev, struct device_attribute *attr, case nct6779: case nct6791: case nct6792: + case nct6793: nct6775_write_value(data, data->REG_CRITICAL_PWM[nr], val); reg = nct6775_read_value(data, @@ -3256,7 +3276,7 @@ nct6775_check_fan_inputs(struct nct6775_data *data) pwm4pin = false; pwm5pin = false; pwm6pin = false; - } else { /* NCT6779D, NCT6791D, or NCT6792D */ + } else { /* NCT6779D, NCT6791D, NCT6792D, or NCT6793D */ regval = superio_inb(sioreg, 0x1c); fan3pin = !(regval & (1 << 5)); @@ -3269,7 +3289,8 @@ nct6775_check_fan_inputs(struct nct6775_data *data) fan4min = fan4pin; - if (data->kind == nct6791 || data->kind == nct6792) { + if (data->kind == nct6791 || data->kind == nct6792 || + data->kind == nct6793) { regval = superio_inb(sioreg, 0x2d); fan6pin = (regval & (1 << 1)); pwm6pin = (regval & (1 << 0)); @@ -3528,8 +3549,8 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_FAN_PULSES = NCT6776_REG_FAN_PULSES; data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT; data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME; - data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME; - data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME; + data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME; + data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME; data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H; data->REG_PWM[0] = NCT6775_REG_PWM; data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT; @@ -3600,8 +3621,8 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_FAN_PULSES = NCT6779_REG_FAN_PULSES; data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT; data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME; - data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME; - data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME; + data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME; + data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME; data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H; data->REG_PWM[0] = NCT6775_REG_PWM; data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT; @@ -3643,6 +3664,7 @@ static int nct6775_probe(struct platform_device *pdev) break; case nct6791: case nct6792: + case nct6793: data->in_num = 15; data->pwm_num = 6; data->auto_pwm_num = 4; @@ -3677,8 +3699,8 @@ static int nct6775_probe(struct platform_device *pdev) data->REG_FAN_PULSES = NCT6779_REG_FAN_PULSES; data->FAN_PULSE_SHIFT = NCT6775_FAN_PULSE_SHIFT; data->REG_FAN_TIME[0] = NCT6775_REG_FAN_STOP_TIME; - data->REG_FAN_TIME[1] = NCT6775_REG_FAN_STEP_UP_TIME; - data->REG_FAN_TIME[2] = NCT6775_REG_FAN_STEP_DOWN_TIME; + data->REG_FAN_TIME[1] = NCT6776_REG_FAN_STEP_UP_TIME; + data->REG_FAN_TIME[2] = NCT6776_REG_FAN_STEP_DOWN_TIME; data->REG_TOLERANCE_H = NCT6776_REG_TOLERANCE_H; data->REG_PWM[0] = NCT6775_REG_PWM; data->REG_PWM[1] = NCT6775_REG_FAN_START_OUTPUT; @@ -3918,6 +3940,7 @@ static int nct6775_probe(struct platform_device *pdev) case nct6779: case nct6791: case nct6792: + case nct6793: break; } @@ -3950,6 +3973,7 @@ static int nct6775_probe(struct platform_device *pdev) break; case nct6791: case nct6792: + case nct6793: tmp |= 0x7e; break; } @@ -4047,7 +4071,8 @@ static int __maybe_unused nct6775_resume(struct device *dev) if (reg != data->sio_reg_enable) superio_outb(sioreg, SIO_REG_ENABLE, data->sio_reg_enable); - if (data->kind == nct6791 || data->kind == nct6792) + if (data->kind == nct6791 || data->kind == nct6792 || + data->kind == nct6793) nct6791_enable_io_mapping(sioreg); superio_exit(sioreg); @@ -4106,15 +4131,6 @@ static struct platform_driver nct6775_driver = { .probe = nct6775_probe, }; -static const char * const nct6775_sio_names[] __initconst = { - "NCT6106D", - "NCT6775F", - "NCT6776D/F", - "NCT6779D", - "NCT6791D", - "NCT6792D", -}; - /* nct6775_find() looks for a '627 in the Super-I/O config space */ static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data) { @@ -4150,6 +4166,9 @@ static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data) case SIO_NCT6792_ID: sio_data->kind = nct6792; break; + case SIO_NCT6793_ID: + sio_data->kind = nct6793; + break; default: if (val != 0xffff) pr_debug("unsupported chip ID: 0x%04x\n", val); @@ -4175,7 +4194,8 @@ static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data) superio_outb(sioaddr, SIO_REG_ENABLE, val | 0x01); } - if (sio_data->kind == nct6791 || sio_data->kind == nct6792) + if (sio_data->kind == nct6791 || sio_data->kind == nct6792 || + sio_data->kind == nct6793) nct6791_enable_io_mapping(sioaddr); superio_exit(sioaddr); @@ -4285,7 +4305,7 @@ static void __exit sensors_nct6775_exit(void) } MODULE_AUTHOR("Guenter Roeck "); -MODULE_DESCRIPTION("NCT6106D/NCT6775F/NCT6776F/NCT6779D/NCT6791D/NCT6792D driver"); +MODULE_DESCRIPTION("Driver for NCT6775F and compatible chips"); MODULE_LICENSE("GPL"); module_init(sensors_nct6775_init); diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index da4c6979fbb8..aa26f3c3416b 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -56,7 +56,6 @@ config INFINIBAND_ADDR_TRANS source "drivers/infiniband/hw/mthca/Kconfig" source "drivers/infiniband/hw/qib/Kconfig" -source "drivers/infiniband/hw/ehca/Kconfig" source "drivers/infiniband/hw/cxgb3/Kconfig" source "drivers/infiniband/hw/cxgb4/Kconfig" source "drivers/infiniband/hw/mlx4/Kconfig" diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index 1bdb9996d371..aded2a5cc2d5 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -1,6 +1,5 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += mthca/ obj-$(CONFIG_INFINIBAND_QIB) += qib/ -obj-$(CONFIG_INFINIBAND_EHCA) += ehca/ obj-$(CONFIG_INFINIBAND_CXGB3) += cxgb3/ obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/ obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/ diff --git a/drivers/infiniband/hw/ehca/Kconfig b/drivers/infiniband/hw/ehca/Kconfig deleted file mode 100644 index 59f807d8d58e..000000000000 --- a/drivers/infiniband/hw/ehca/Kconfig +++ /dev/null @@ -1,9 +0,0 @@ -config INFINIBAND_EHCA - tristate "eHCA support" - depends on IBMEBUS - ---help--- - This driver supports the IBM pSeries eHCA InfiniBand adapter. - - To compile the driver as a module, choose M here. The module - will be called ib_ehca. - diff --git a/drivers/infiniband/hw/ehca/Makefile b/drivers/infiniband/hw/ehca/Makefile deleted file mode 100644 index 74d284e46a40..000000000000 --- a/drivers/infiniband/hw/ehca/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -# Authors: Heiko J Schick -# Christoph Raisch -# Joachim Fenkes -# -# Copyright (c) 2005 IBM Corporation -# -# All rights reserved. -# -# This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD. - -obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o - -ib_ehca-objs = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \ - ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \ - ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o - diff --git a/drivers/infiniband/hw/ehca/ehca_av.c b/drivers/infiniband/hw/ehca/ehca_av.c deleted file mode 100644 index 465926319f3d..000000000000 --- a/drivers/infiniband/hw/ehca/ehca_av.c +++ /dev/null @@ -1,277 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * address vector functions - * - * Authors: Hoang-Nam Nguyen - * Khadija Souissi - * Reinhard Ernst - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_tools.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" - -static struct kmem_cache *av_cache; - -int ehca_calc_ipd(struct ehca_shca *shca, int port, - enum ib_rate path_rate, u32 *ipd) -{ - int path = ib_rate_to_mult(path_rate); - int link, ret; - struct ib_port_attr pa; - - if (path_rate == IB_RATE_PORT_CURRENT) { - *ipd = 0; - return 0; - } - - if (unlikely(path < 0)) { - ehca_err(&shca->ib_device, "Invalid static rate! path_rate=%x", - path_rate); - return -EINVAL; - } - - ret = ehca_query_port(&shca->ib_device, port, &pa); - if (unlikely(ret < 0)) { - ehca_err(&shca->ib_device, "Failed to query port ret=%i", ret); - return ret; - } - - link = ib_width_enum_to_int(pa.active_width) * pa.active_speed; - - if (path >= link) - /* no need to throttle if path faster than link */ - *ipd = 0; - else - /* IPD = round((link / path) - 1) */ - *ipd = ((link + (path >> 1)) / path) - 1; - - return 0; -} - -struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) -{ - int ret; - struct ehca_av *av; - struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, - ib_device); - - av = kmem_cache_alloc(av_cache, GFP_KERNEL); - if (!av) { - ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p", - pd, ah_attr); - return ERR_PTR(-ENOMEM); - } - - av->av.sl = ah_attr->sl; - av->av.dlid = ah_attr->dlid; - av->av.slid_path_bits = ah_attr->src_path_bits; - - if (ehca_static_rate < 0) { - u32 ipd; - if (ehca_calc_ipd(shca, ah_attr->port_num, - ah_attr->static_rate, &ipd)) { - ret = -EINVAL; - goto create_ah_exit1; - } - av->av.ipd = ipd; - } else - av->av.ipd = ehca_static_rate; - - av->av.lnh = ah_attr->ah_flags; - av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6); - av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK, - ah_attr->grh.traffic_class); - av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, - ah_attr->grh.flow_label); - av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, - ah_attr->grh.hop_limit); - av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B); - /* set sgid in grh.word_1 */ - if (ah_attr->ah_flags & IB_AH_GRH) { - int rc; - struct ib_port_attr port_attr; - union ib_gid gid; - memset(&port_attr, 0, sizeof(port_attr)); - rc = ehca_query_port(pd->device, ah_attr->port_num, - &port_attr); - if (rc) { /* invalid port number */ - ret = -EINVAL; - ehca_err(pd->device, "Invalid port number " - "ehca_query_port() returned %x " - "pd=%p ah_attr=%p", rc, pd, ah_attr); - goto create_ah_exit1; - } - memset(&gid, 0, sizeof(gid)); - rc = ehca_query_gid(pd->device, - ah_attr->port_num, - ah_attr->grh.sgid_index, &gid); - if (rc) { - ret = -EINVAL; - ehca_err(pd->device, "Failed to retrieve sgid " - "ehca_query_gid() returned %x " - "pd=%p ah_attr=%p", rc, pd, ah_attr); - goto create_ah_exit1; - } - memcpy(&av->av.grh.word_1, &gid, sizeof(gid)); - } - av->av.pmtu = shca->max_mtu; - - /* dgid comes in grh.word_3 */ - memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid, - sizeof(ah_attr->grh.dgid)); - - return &av->ib_ah; - -create_ah_exit1: - kmem_cache_free(av_cache, av); - - return ERR_PTR(ret); -} - -int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) -{ - struct ehca_av *av; - struct ehca_ud_av new_ehca_av; - struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca, - ib_device); - - memset(&new_ehca_av, 0, sizeof(new_ehca_av)); - new_ehca_av.sl = ah_attr->sl; - new_ehca_av.dlid = ah_attr->dlid; - new_ehca_av.slid_path_bits = ah_attr->src_path_bits; - new_ehca_av.ipd = ah_attr->static_rate; - new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK, - (ah_attr->ah_flags & IB_AH_GRH) > 0); - new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK, - ah_attr->grh.traffic_class); - new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, - ah_attr->grh.flow_label); - new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, - ah_attr->grh.hop_limit); - new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b); - - /* set sgid in grh.word_1 */ - if (ah_attr->ah_flags & IB_AH_GRH) { - int rc; - struct ib_port_attr port_attr; - union ib_gid gid; - memset(&port_attr, 0, sizeof(port_attr)); - rc = ehca_query_port(ah->device, ah_attr->port_num, - &port_attr); - if (rc) { /* invalid port number */ - ehca_err(ah->device, "Invalid port number " - "ehca_query_port() returned %x " - "ah=%p ah_attr=%p port_num=%x", - rc, ah, ah_attr, ah_attr->port_num); - return -EINVAL; - } - memset(&gid, 0, sizeof(gid)); - rc = ehca_query_gid(ah->device, - ah_attr->port_num, - ah_attr->grh.sgid_index, &gid); - if (rc) { - ehca_err(ah->device, "Failed to retrieve sgid " - "ehca_query_gid() returned %x " - "ah=%p ah_attr=%p port_num=%x " - "sgid_index=%x", - rc, ah, ah_attr, ah_attr->port_num, - ah_attr->grh.sgid_index); - return -EINVAL; - } - memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid)); - } - - new_ehca_av.pmtu = shca->max_mtu; - - memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid, - sizeof(ah_attr->grh.dgid)); - - av = container_of(ah, struct ehca_av, ib_ah); - av->av = new_ehca_av; - - return 0; -} - -int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) -{ - struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah); - - memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3, - sizeof(ah_attr->grh.dgid)); - ah_attr->sl = av->av.sl; - - ah_attr->dlid = av->av.dlid; - - ah_attr->src_path_bits = av->av.slid_path_bits; - ah_attr->static_rate = av->av.ipd; - ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh); - ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK, - av->av.grh.word_0); - ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK, - av->av.grh.word_0); - ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK, - av->av.grh.word_0); - - return 0; -} - -int ehca_destroy_ah(struct ib_ah *ah) -{ - kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah)); - - return 0; -} - -int ehca_init_av_cache(void) -{ - av_cache = kmem_cache_create("ehca_cache_av", - sizeof(struct ehca_av), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!av_cache) - return -ENOMEM; - return 0; -} - -void ehca_cleanup_av_cache(void) -{ - if (av_cache) - kmem_cache_destroy(av_cache); -} diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h deleted file mode 100644 index bd45e0f3923f..000000000000 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ /dev/null @@ -1,482 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Struct definition for eHCA internal structures - * - * Authors: Heiko J Schick - * Christoph Raisch - * Joachim Fenkes - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __EHCA_CLASSES_H__ -#define __EHCA_CLASSES_H__ - -struct ehca_module; -struct ehca_qp; -struct ehca_cq; -struct ehca_eq; -struct ehca_mr; -struct ehca_mw; -struct ehca_pd; -struct ehca_av; - -#include -#include - -#include -#include - -#ifdef CONFIG_PPC64 -#include "ehca_classes_pSeries.h" -#endif -#include "ipz_pt_fn.h" -#include "ehca_qes.h" -#include "ehca_irq.h" - -#define EHCA_EQE_CACHE_SIZE 20 -#define EHCA_MAX_NUM_QUEUES 0xffff - -struct ehca_eqe_cache_entry { - struct ehca_eqe *eqe; - struct ehca_cq *cq; -}; - -struct ehca_eq { - u32 length; - struct ipz_queue ipz_queue; - struct ipz_eq_handle ipz_eq_handle; - struct work_struct work; - struct h_galpas galpas; - int is_initialized; - struct ehca_pfeq pf; - spinlock_t spinlock; - struct tasklet_struct interrupt_task; - u32 ist; - spinlock_t irq_spinlock; - struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE]; -}; - -struct ehca_sma_attr { - u16 lid, lmc, sm_sl, sm_lid; - u16 pkey_tbl_len, pkeys[16]; -}; - -struct ehca_sport { - struct ib_cq *ibcq_aqp1; - struct ib_qp *ibqp_sqp[2]; - /* lock to serialze modify_qp() calls for sqp in normal - * and irq path (when event PORT_ACTIVE is received first time) - */ - spinlock_t mod_sqp_lock; - enum ib_port_state port_state; - struct ehca_sma_attr saved_attr; - u32 pma_qp_nr; -}; - -#define HCA_CAP_MR_PGSIZE_4K 0x80000000 -#define HCA_CAP_MR_PGSIZE_64K 0x40000000 -#define HCA_CAP_MR_PGSIZE_1M 0x20000000 -#define HCA_CAP_MR_PGSIZE_16M 0x10000000 - -struct ehca_shca { - struct ib_device ib_device; - struct platform_device *ofdev; - u8 num_ports; - int hw_level; - struct list_head shca_list; - struct ipz_adapter_handle ipz_hca_handle; - struct ehca_sport sport[2]; - struct ehca_eq eq; - struct ehca_eq neq; - struct ehca_mr *maxmr; - struct ehca_pd *pd; - struct h_galpas galpas; - struct mutex modify_mutex; - u64 hca_cap; - /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */ - u32 hca_cap_mr_pgsize; - int max_mtu; - int max_num_qps; - int max_num_cqs; - atomic_t num_cqs; - atomic_t num_qps; -}; - -struct ehca_pd { - struct ib_pd ib_pd; - struct ipz_pd fw_pd; - /* small queue mgmt */ - struct mutex lock; - struct list_head free[2]; - struct list_head full[2]; -}; - -enum ehca_ext_qp_type { - EQPT_NORMAL = 0, - EQPT_LLQP = 1, - EQPT_SRQBASE = 2, - EQPT_SRQ = 3, -}; - -/* struct to cache modify_qp()'s parms for GSI/SMI qp */ -struct ehca_mod_qp_parm { - int mask; - struct ib_qp_attr attr; -}; - -#define EHCA_MOD_QP_PARM_MAX 4 - -#define QMAP_IDX_MASK 0xFFFFULL - -/* struct for tracking if cqes have been reported to the application */ -struct ehca_qmap_entry { - u16 app_wr_id; - u8 reported; - u8 cqe_req; -}; - -struct ehca_queue_map { - struct ehca_qmap_entry *map; - unsigned int entries; - unsigned int tail; - unsigned int left_to_poll; - unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */ -}; - -/* function to calculate the next index for the qmap */ -static inline unsigned int next_index(unsigned int cur_index, unsigned int limit) -{ - unsigned int temp = cur_index + 1; - return (temp == limit) ? 0 : temp; -} - -struct ehca_qp { - union { - struct ib_qp ib_qp; - struct ib_srq ib_srq; - }; - u32 qp_type; - enum ehca_ext_qp_type ext_type; - enum ib_qp_state state; - struct ipz_queue ipz_squeue; - struct ehca_queue_map sq_map; - struct ipz_queue ipz_rqueue; - struct ehca_queue_map rq_map; - struct h_galpas galpas; - u32 qkey; - u32 real_qp_num; - u32 token; - spinlock_t spinlock_s; - spinlock_t spinlock_r; - u32 sq_max_inline_data_size; - struct ipz_qp_handle ipz_qp_handle; - struct ehca_pfqp pf; - struct ib_qp_init_attr init_attr; - struct ehca_cq *send_cq; - struct ehca_cq *recv_cq; - unsigned int sqerr_purgeflag; - struct hlist_node list_entries; - /* array to cache modify_qp()'s parms for GSI/SMI qp */ - struct ehca_mod_qp_parm *mod_qp_parm; - int mod_qp_parm_idx; - /* mmap counter for resources mapped into user space */ - u32 mm_count_squeue; - u32 mm_count_rqueue; - u32 mm_count_galpa; - /* unsolicited ack circumvention */ - int unsol_ack_circ; - int mtu_shift; - u32 message_count; - u32 packet_count; - atomic_t nr_events; /* events seen */ - wait_queue_head_t wait_completion; - int mig_armed; - struct list_head sq_err_node; - struct list_head rq_err_node; -}; - -#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) -#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ) -#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE) - -/* must be power of 2 */ -#define QP_HASHTAB_LEN 8 - -struct ehca_cq { - struct ib_cq ib_cq; - struct ipz_queue ipz_queue; - struct h_galpas galpas; - spinlock_t spinlock; - u32 cq_number; - u32 token; - u32 nr_of_entries; - struct ipz_cq_handle ipz_cq_handle; - struct ehca_pfcq pf; - spinlock_t cb_lock; - struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; - struct list_head entry; - u32 nr_callbacks; /* #events assigned to cpu by scaling code */ - atomic_t nr_events; /* #events seen */ - wait_queue_head_t wait_completion; - spinlock_t task_lock; - /* mmap counter for resources mapped into user space */ - u32 mm_count_queue; - u32 mm_count_galpa; - struct list_head sqp_err_list; - struct list_head rqp_err_list; -}; - -enum ehca_mr_flag { - EHCA_MR_FLAG_FMR = 0x80000000, /* FMR, created with ehca_alloc_fmr */ - EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR */ -}; - -struct ehca_mr { - union { - struct ib_mr ib_mr; /* must always be first in ehca_mr */ - struct ib_fmr ib_fmr; /* must always be first in ehca_mr */ - } ib; - struct ib_umem *umem; - spinlock_t mrlock; - - enum ehca_mr_flag flags; - u32 num_kpages; /* number of kernel pages */ - u32 num_hwpages; /* number of hw pages to form MR */ - u64 hwpage_size; /* hw page size used for this MR */ - int acl; /* ACL (stored here for usage in reregister) */ - u64 *start; /* virtual start address (stored here for */ - /* usage in reregister) */ - u64 size; /* size (stored here for usage in reregister) */ - u32 fmr_page_size; /* page size for FMR */ - u32 fmr_max_pages; /* max pages for FMR */ - u32 fmr_max_maps; /* max outstanding maps for FMR */ - u32 fmr_map_cnt; /* map counter for FMR */ - /* fw specific data */ - struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */ - struct h_galpas galpas; -}; - -struct ehca_mw { - struct ib_mw ib_mw; /* gen2 mw, must always be first in ehca_mw */ - spinlock_t mwlock; - - u8 never_bound; /* indication MW was never bound */ - struct ipz_mrmw_handle ipz_mw_handle; /* MW handle for h-calls */ - struct h_galpas galpas; -}; - -enum ehca_mr_pgi_type { - EHCA_MR_PGI_PHYS = 1, /* type of ehca_reg_phys_mr, - * ehca_rereg_phys_mr, - * ehca_reg_internal_maxmr */ - EHCA_MR_PGI_USER = 2, /* type of ehca_reg_user_mr */ - EHCA_MR_PGI_FMR = 3 /* type of ehca_map_phys_fmr */ -}; - -struct ehca_mr_pginfo { - enum ehca_mr_pgi_type type; - u64 num_kpages; - u64 kpage_cnt; - u64 hwpage_size; /* hw page size used for this MR */ - u64 num_hwpages; /* number of hw pages */ - u64 hwpage_cnt; /* counter for hw pages */ - u64 next_hwpage; /* next hw page in buffer/chunk/listelem */ - - union { - struct { /* type EHCA_MR_PGI_PHYS section */ - int num_phys_buf; - struct ib_phys_buf *phys_buf_array; - u64 next_buf; - } phy; - struct { /* type EHCA_MR_PGI_USER section */ - struct ib_umem *region; - struct scatterlist *next_sg; - u64 next_nmap; - } usr; - struct { /* type EHCA_MR_PGI_FMR section */ - u64 fmr_pgsize; - u64 *page_list; - u64 next_listelem; - } fmr; - } u; -}; - -/* output parameters for MR/FMR hipz calls */ -struct ehca_mr_hipzout_parms { - struct ipz_mrmw_handle handle; - u32 lkey; - u32 rkey; - u64 len; - u64 vaddr; - u32 acl; -}; - -/* output parameters for MW hipz calls */ -struct ehca_mw_hipzout_parms { - struct ipz_mrmw_handle handle; - u32 rkey; -}; - -struct ehca_av { - struct ib_ah ib_ah; - struct ehca_ud_av av; -}; - -struct ehca_ucontext { - struct ib_ucontext ib_ucontext; -}; - -int ehca_init_pd_cache(void); -void ehca_cleanup_pd_cache(void); -int ehca_init_cq_cache(void); -void ehca_cleanup_cq_cache(void); -int ehca_init_qp_cache(void); -void ehca_cleanup_qp_cache(void); -int ehca_init_av_cache(void); -void ehca_cleanup_av_cache(void); -int ehca_init_mrmw_cache(void); -void ehca_cleanup_mrmw_cache(void); -int ehca_init_small_qp_cache(void); -void ehca_cleanup_small_qp_cache(void); - -extern rwlock_t ehca_qp_idr_lock; -extern rwlock_t ehca_cq_idr_lock; -extern struct idr ehca_qp_idr; -extern struct idr ehca_cq_idr; -extern spinlock_t shca_list_lock; - -extern int ehca_static_rate; -extern int ehca_port_act_time; -extern bool ehca_use_hp_mr; -extern bool ehca_scaling_code; -extern int ehca_lock_hcalls; -extern int ehca_nr_ports; -extern int ehca_max_cq; -extern int ehca_max_qp; - -struct ipzu_queue_resp { - u32 qe_size; /* queue entry size */ - u32 act_nr_of_sg; - u32 queue_length; /* queue length allocated in bytes */ - u32 pagesize; - u32 toggle_state; - u32 offset; /* save offset within a page for small_qp */ -}; - -struct ehca_create_cq_resp { - u32 cq_number; - u32 token; - struct ipzu_queue_resp ipz_queue; - u32 fw_handle_ofs; - u32 dummy; -}; - -struct ehca_create_qp_resp { - u32 qp_num; - u32 token; - u32 qp_type; - u32 ext_type; - u32 qkey; - /* qp_num assigned by ehca: sqp0/1 may have got different numbers */ - u32 real_qp_num; - u32 fw_handle_ofs; - u32 dummy; - struct ipzu_queue_resp ipz_squeue; - struct ipzu_queue_resp ipz_rqueue; -}; - -struct ehca_alloc_cq_parms { - u32 nr_cqe; - u32 act_nr_of_entries; - u32 act_pages; - struct ipz_eq_handle eq_handle; -}; - -enum ehca_service_type { - ST_RC = 0, - ST_UC = 1, - ST_RD = 2, - ST_UD = 3, -}; - -enum ehca_ll_comp_flags { - LLQP_SEND_COMP = 0x20, - LLQP_RECV_COMP = 0x40, - LLQP_COMP_MASK = 0x60, -}; - -struct ehca_alloc_queue_parms { - /* input parameters */ - int max_wr; - int max_sge; - int page_size; - int is_small; - - /* output parameters */ - u16 act_nr_wqes; - u8 act_nr_sges; - u32 queue_size; /* bytes for small queues, pages otherwise */ -}; - -struct ehca_alloc_qp_parms { - struct ehca_alloc_queue_parms squeue; - struct ehca_alloc_queue_parms rqueue; - - /* input parameters */ - enum ehca_service_type servicetype; - int qp_storage; - int sigtype; - enum ehca_ext_qp_type ext_type; - enum ehca_ll_comp_flags ll_comp_flags; - int ud_av_l_key_ctl; - - u32 token; - struct ipz_eq_handle eq_handle; - struct ipz_pd pd; - struct ipz_cq_handle send_cq_handle, recv_cq_handle; - - u32 srq_qpn, srq_token, srq_limit; - - /* output parameters */ - u32 real_qp_num; - struct ipz_qp_handle qp_handle; - struct h_galpas galpas; -}; - -int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); -int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num); -struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num); - -#endif diff --git a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h b/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h deleted file mode 100644 index 689c35786dd2..000000000000 --- a/drivers/infiniband/hw/ehca/ehca_classes_pSeries.h +++ /dev/null @@ -1,208 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * pSeries interface definitions - * - * Authors: Waleri Fomin - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __EHCA_CLASSES_PSERIES_H__ -#define __EHCA_CLASSES_PSERIES_H__ - -#include "hcp_phyp.h" -#include "ipz_pt_fn.h" - - -struct ehca_pfqp { - struct ipz_qpt sqpt; - struct ipz_qpt rqpt; -}; - -struct ehca_pfcq { - struct ipz_qpt qpt; - u32 cqnr; -}; - -struct ehca_pfeq { - struct ipz_qpt qpt; - struct h_galpa galpa; - u32 eqnr; -}; - -struct ipz_adapter_handle { - u64 handle; -}; - -struct ipz_cq_handle { - u64 handle; -}; - -struct ipz_eq_handle { - u64 handle; -}; - -struct ipz_qp_handle { - u64 handle; -}; -struct ipz_mrmw_handle { - u64 handle; -}; - -struct ipz_pd { - u32 value; -}; - -struct hcp_modify_qp_control_block { - u32 qkey; /* 00 */ - u32 rdd; /* reliable datagram domain */ - u32 send_psn; /* 02 */ - u32 receive_psn; /* 03 */ - u32 prim_phys_port; /* 04 */ - u32 alt_phys_port; /* 05 */ - u32 prim_p_key_idx; /* 06 */ - u32 alt_p_key_idx; /* 07 */ - u32 rdma_atomic_ctrl; /* 08 */ - u32 qp_state; /* 09 */ - u32 reserved_10; /* 10 */ - u32 rdma_nr_atomic_resp_res; /* 11 */ - u32 path_migration_state; /* 12 */ - u32 rdma_atomic_outst_dest_qp; /* 13 */ - u32 dest_qp_nr; /* 14 */ - u32 min_rnr_nak_timer_field; /* 15 */ - u32 service_level; /* 16 */ - u32 send_grh_flag; /* 17 */ - u32 retry_count; /* 18 */ - u32 timeout; /* 19 */ - u32 path_mtu; /* 20 */ - u32 max_static_rate; /* 21 */ - u32 dlid; /* 22 */ - u32 rnr_retry_count; /* 23 */ - u32 source_path_bits; /* 24 */ - u32 traffic_class; /* 25 */ - u32 hop_limit; /* 26 */ - u32 source_gid_idx; /* 27 */ - u32 flow_label; /* 28 */ - u32 reserved_29; /* 29 */ - union { /* 30 */ - u64 dw[2]; - u8 byte[16]; - } dest_gid; - u32 service_level_al; /* 34 */ - u32 send_grh_flag_al; /* 35 */ - u32 retry_count_al; /* 36 */ - u32 timeout_al; /* 37 */ - u32 max_static_rate_al; /* 38 */ - u32 dlid_al; /* 39 */ - u32 rnr_retry_count_al; /* 40 */ - u32 source_path_bits_al; /* 41 */ - u32 traffic_class_al; /* 42 */ - u32 hop_limit_al; /* 43 */ - u32 source_gid_idx_al; /* 44 */ - u32 flow_label_al; /* 45 */ - u32 reserved_46; /* 46 */ - u32 reserved_47; /* 47 */ - union { /* 48 */ - u64 dw[2]; - u8 byte[16]; - } dest_gid_al; - u32 max_nr_outst_send_wr; /* 52 */ - u32 max_nr_outst_recv_wr; /* 53 */ - u32 disable_ete_credit_check; /* 54 */ - u32 qp_number; /* 55 */ - u64 send_queue_handle; /* 56 */ - u64 recv_queue_handle; /* 58 */ - u32 actual_nr_sges_in_sq_wqe; /* 60 */ - u32 actual_nr_sges_in_rq_wqe; /* 61 */ - u32 qp_enable; /* 62 */ - u32 curr_srq_limit; /* 63 */ - u64 qp_aff_asyn_ev_log_reg; /* 64 */ - u64 shared_rq_hndl; /* 66 */ - u64 trigg_doorbell_qp_hndl; /* 68 */ - u32 reserved_70_127[58]; /* 70 */ -}; - -#define MQPCB_MASK_QKEY EHCA_BMASK_IBM( 0, 0) -#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM( 2, 2) -#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM( 3, 3) -#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM( 4, 4) -#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24, 31) -#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM( 5, 5) -#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM( 6, 6) -#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24, 31) -#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7) -#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8) -#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9) -#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11) -#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12) -#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13) -#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14, 14) -#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15, 15) -#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16, 16) -#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17, 17) -#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18) -#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19) -#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20) -#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21) -#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22) -#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23) -#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24) -#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25) -#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26) -#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27) -#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28) -#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30) -#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31) -#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32) -#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33) -#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34) -#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35) -#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36) -#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37) -#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38) -#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39) -#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40) -#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41) -#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42) -#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44) -#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45) -#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46) -#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47) -#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48) -#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49) -#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50) -#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51) - -#endif /* __EHCA_CLASSES_PSERIES_H__ */ diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c deleted file mode 100644 index 9b68b175069b..000000000000 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ /dev/null @@ -1,397 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Completion queue handling - * - * Authors: Waleri Fomin - * Khadija Souissi - * Reinhard Ernst - * Heiko J Schick - * Hoang-Nam Nguyen - * - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_iverbs.h" -#include "ehca_classes.h" -#include "ehca_irq.h" -#include "hcp_if.h" - -static struct kmem_cache *cq_cache; - -int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp) -{ - unsigned int qp_num = qp->real_qp_num; - unsigned int key = qp_num & (QP_HASHTAB_LEN-1); - unsigned long flags; - - spin_lock_irqsave(&cq->spinlock, flags); - hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]); - spin_unlock_irqrestore(&cq->spinlock, flags); - - ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x", - cq->cq_number, qp_num); - - return 0; -} - -int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) -{ - int ret = -EINVAL; - unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); - struct hlist_node *iter; - struct ehca_qp *qp; - unsigned long flags; - - spin_lock_irqsave(&cq->spinlock, flags); - hlist_for_each(iter, &cq->qp_hashtab[key]) { - qp = hlist_entry(iter, struct ehca_qp, list_entries); - if (qp->real_qp_num == real_qp_num) { - hlist_del(iter); - ehca_dbg(cq->ib_cq.device, - "removed qp from cq .cq_num=%x real_qp_num=%x", - cq->cq_number, real_qp_num); - ret = 0; - break; - } - } - spin_unlock_irqrestore(&cq->spinlock, flags); - if (ret) - ehca_err(cq->ib_cq.device, - "qp not found cq_num=%x real_qp_num=%x", - cq->cq_number, real_qp_num); - - return ret; -} - -struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) -{ - struct ehca_qp *ret = NULL; - unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); - struct hlist_node *iter; - struct ehca_qp *qp; - hlist_for_each(iter, &cq->qp_hashtab[key]) { - qp = hlist_entry(iter, struct ehca_qp, list_entries); - if (qp->real_qp_num == real_qp_num) { - ret = qp; - break; - } - } - return ret; -} - -struct ib_cq *ehca_create_cq(struct ib_device *device, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, - struct ib_udata *udata) -{ - int cqe = attr->cqe; - static const u32 additional_cqe = 20; - struct ib_cq *cq; - struct ehca_cq *my_cq; - struct ehca_shca *shca = - container_of(device, struct ehca_shca, ib_device); - struct ipz_adapter_handle adapter_handle; - struct ehca_alloc_cq_parms param; /* h_call's out parameters */ - struct h_galpa gal; - void *vpage; - u32 counter; - u64 rpage, cqx_fec, h_ret; - int ipz_rc, i; - unsigned long flags; - - if (attr->flags) - return ERR_PTR(-EINVAL); - - if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) - return ERR_PTR(-EINVAL); - - if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) { - ehca_err(device, "Unable to create CQ, max number of %i " - "CQs reached.", shca->max_num_cqs); - ehca_err(device, "To increase the maximum number of CQs " - "use the number_of_cqs module parameter.\n"); - return ERR_PTR(-ENOSPC); - } - - my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL); - if (!my_cq) { - ehca_err(device, "Out of memory for ehca_cq struct device=%p", - device); - atomic_dec(&shca->num_cqs); - return ERR_PTR(-ENOMEM); - } - - memset(¶m, 0, sizeof(struct ehca_alloc_cq_parms)); - - spin_lock_init(&my_cq->spinlock); - spin_lock_init(&my_cq->cb_lock); - spin_lock_init(&my_cq->task_lock); - atomic_set(&my_cq->nr_events, 0); - init_waitqueue_head(&my_cq->wait_completion); - - cq = &my_cq->ib_cq; - - adapter_handle = shca->ipz_hca_handle; - param.eq_handle = shca->eq.ipz_eq_handle; - - idr_preload(GFP_KERNEL); - write_lock_irqsave(&ehca_cq_idr_lock, flags); - my_cq->token = idr_alloc(&ehca_cq_idr, my_cq, 0, 0x2000000, GFP_NOWAIT); - write_unlock_irqrestore(&ehca_cq_idr_lock, flags); - idr_preload_end(); - - if (my_cq->token < 0) { - cq = ERR_PTR(-ENOMEM); - ehca_err(device, "Can't allocate new idr entry. device=%p", - device); - goto create_cq_exit1; - } - - /* - * CQs maximum depth is 4GB-64, but we need additional 20 as buffer - * for receiving errors CQEs. - */ - param.nr_cqe = cqe + additional_cqe; - h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, ¶m); - - if (h_ret != H_SUCCESS) { - ehca_err(device, "hipz_h_alloc_resource_cq() failed " - "h_ret=%lli device=%p", h_ret, device); - cq = ERR_PTR(ehca2ib_return_code(h_ret)); - goto create_cq_exit2; - } - - ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages, - EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0); - if (!ipz_rc) { - ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%i device=%p", - ipz_rc, device); - cq = ERR_PTR(-EINVAL); - goto create_cq_exit3; - } - - for (counter = 0; counter < param.act_pages; counter++) { - vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); - if (!vpage) { - ehca_err(device, "ipz_qpageit_get_inc() " - "returns NULL device=%p", device); - cq = ERR_PTR(-EAGAIN); - goto create_cq_exit4; - } - rpage = __pa(vpage); - - h_ret = hipz_h_register_rpage_cq(adapter_handle, - my_cq->ipz_cq_handle, - &my_cq->pf, - 0, - 0, - rpage, - 1, - my_cq->galpas. - kernel); - - if (h_ret < H_SUCCESS) { - ehca_err(device, "hipz_h_register_rpage_cq() failed " - "ehca_cq=%p cq_num=%x h_ret=%lli counter=%i " - "act_pages=%i", my_cq, my_cq->cq_number, - h_ret, counter, param.act_pages); - cq = ERR_PTR(-EINVAL); - goto create_cq_exit4; - } - - if (counter == (param.act_pages - 1)) { - vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); - if ((h_ret != H_SUCCESS) || vpage) { - ehca_err(device, "Registration of pages not " - "complete ehca_cq=%p cq_num=%x " - "h_ret=%lli", my_cq, my_cq->cq_number, - h_ret); - cq = ERR_PTR(-EAGAIN); - goto create_cq_exit4; - } - } else { - if (h_ret != H_PAGE_REGISTERED) { - ehca_err(device, "Registration of page failed " - "ehca_cq=%p cq_num=%x h_ret=%lli " - "counter=%i act_pages=%i", - my_cq, my_cq->cq_number, - h_ret, counter, param.act_pages); - cq = ERR_PTR(-ENOMEM); - goto create_cq_exit4; - } - } - } - - ipz_qeit_reset(&my_cq->ipz_queue); - - gal = my_cq->galpas.kernel; - cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec)); - ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%llx", - my_cq, my_cq->cq_number, cqx_fec); - - my_cq->ib_cq.cqe = my_cq->nr_of_entries = - param.act_nr_of_entries - additional_cqe; - my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff; - - for (i = 0; i < QP_HASHTAB_LEN; i++) - INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]); - - INIT_LIST_HEAD(&my_cq->sqp_err_list); - INIT_LIST_HEAD(&my_cq->rqp_err_list); - - if (context) { - struct ipz_queue *ipz_queue = &my_cq->ipz_queue; - struct ehca_create_cq_resp resp; - memset(&resp, 0, sizeof(resp)); - resp.cq_number = my_cq->cq_number; - resp.token = my_cq->token; - resp.ipz_queue.qe_size = ipz_queue->qe_size; - resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg; - resp.ipz_queue.queue_length = ipz_queue->queue_length; - resp.ipz_queue.pagesize = ipz_queue->pagesize; - resp.ipz_queue.toggle_state = ipz_queue->toggle_state; - resp.fw_handle_ofs = (u32) - (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1)); - if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { - ehca_err(device, "Copy to udata failed."); - cq = ERR_PTR(-EFAULT); - goto create_cq_exit4; - } - } - - return cq; - -create_cq_exit4: - ipz_queue_dtor(NULL, &my_cq->ipz_queue); - -create_cq_exit3: - h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); - if (h_ret != H_SUCCESS) - ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p " - "cq_num=%x h_ret=%lli", my_cq, my_cq->cq_number, h_ret); - -create_cq_exit2: - write_lock_irqsave(&ehca_cq_idr_lock, flags); - idr_remove(&ehca_cq_idr, my_cq->token); - write_unlock_irqrestore(&ehca_cq_idr_lock, flags); - -create_cq_exit1: - kmem_cache_free(cq_cache, my_cq); - - atomic_dec(&shca->num_cqs); - return cq; -} - -int ehca_destroy_cq(struct ib_cq *cq) -{ - u64 h_ret; - struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); - int cq_num = my_cq->cq_number; - struct ib_device *device = cq->device; - struct ehca_shca *shca = container_of(device, struct ehca_shca, - ib_device); - struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; - unsigned long flags; - - if (cq->uobject) { - if (my_cq->mm_count_galpa || my_cq->mm_count_queue) { - ehca_err(device, "Resources still referenced in " - "user space cq_num=%x", my_cq->cq_number); - return -EINVAL; - } - } - - /* - * remove the CQ from the idr first to make sure - * no more interrupt tasklets will touch this CQ - */ - write_lock_irqsave(&ehca_cq_idr_lock, flags); - idr_remove(&ehca_cq_idr, my_cq->token); - write_unlock_irqrestore(&ehca_cq_idr_lock, flags); - - /* now wait until all pending events have completed */ - wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events)); - - /* nobody's using our CQ any longer -- we can destroy it */ - h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); - if (h_ret == H_R_STATE) { - /* cq in err: read err data and destroy it forcibly */ - ehca_dbg(device, "ehca_cq=%p cq_num=%x resource=%llx in err " - "state. Try to delete it forcibly.", - my_cq, cq_num, my_cq->ipz_cq_handle.handle); - ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle); - h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); - if (h_ret == H_SUCCESS) - ehca_dbg(device, "cq_num=%x deleted successfully.", - cq_num); - } - if (h_ret != H_SUCCESS) { - ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lli " - "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num); - return ehca2ib_return_code(h_ret); - } - ipz_queue_dtor(NULL, &my_cq->ipz_queue); - kmem_cache_free(cq_cache, my_cq); - - atomic_dec(&shca->num_cqs); - return 0; -} - -int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) -{ - /* TODO: proper resize needs to be done */ - ehca_err(cq->device, "not implemented yet"); - - return -EFAULT; -} - -int ehca_init_cq_cache(void) -{ - cq_cache = kmem_cache_create("ehca_cache_cq", - sizeof(struct ehca_cq), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!cq_cache) - return -ENOMEM; - return 0; -} - -void ehca_cleanup_cq_cache(void) -{ - if (cq_cache) - kmem_cache_destroy(cq_cache); -} diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c deleted file mode 100644 index 90da6747d395..000000000000 --- a/drivers/infiniband/hw/ehca/ehca_eq.c +++ /dev/null @@ -1,189 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Event queue handling - * - * Authors: Waleri Fomin - * Khadija Souissi - * Reinhard Ernst - * Heiko J Schick - * Hoang-Nam Nguyen - * - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "ehca_classes.h" -#include "ehca_irq.h" -#include "ehca_iverbs.h" -#include "ehca_qes.h" -#include "hcp_if.h" -#include "ipz_pt_fn.h" - -int ehca_create_eq(struct ehca_shca *shca, - struct ehca_eq *eq, - const enum ehca_eq_type type, const u32 length) -{ - int ret; - u64 h_ret; - u32 nr_pages; - u32 i; - void *vpage; - struct ib_device *ib_dev = &shca->ib_device; - - spin_lock_init(&eq->spinlock); - spin_lock_init(&eq->irq_spinlock); - eq->is_initialized = 0; - - if (type != EHCA_EQ && type != EHCA_NEQ) { - ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq); - return -EINVAL; - } - if (!length) { - ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq); - return -EINVAL; - } - - h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle, - &eq->pf, - type, - length, - &eq->ipz_eq_handle, - &eq->length, - &nr_pages, &eq->ist); - - if (h_ret != H_SUCCESS) { - ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq); - return -EINVAL; - } - - ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages, - EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0); - if (!ret) { - ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq); - goto create_eq_exit1; - } - - for (i = 0; i < nr_pages; i++) { - u64 rpage; - - vpage = ipz_qpageit_get_inc(&eq->ipz_queue); - if (!vpage) - goto create_eq_exit2; - - rpage = __pa(vpage); - h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, - eq->ipz_eq_handle, - &eq->pf, - 0, 0, rpage, 1); - - if (i == (nr_pages - 1)) { - /* last page */ - vpage = ipz_qpageit_get_inc(&eq->ipz_queue); - if (h_ret != H_SUCCESS || vpage) - goto create_eq_exit2; - } else { - if (h_ret != H_PAGE_REGISTERED) - goto create_eq_exit2; - } - } - - ipz_qeit_reset(&eq->ipz_queue); - - /* register interrupt handlers and initialize work queues */ - if (type == EHCA_EQ) { - tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); - - ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq, - 0, "ehca_eq", - (void *)shca); - if (ret < 0) - ehca_err(ib_dev, "Can't map interrupt handler."); - } else if (type == EHCA_NEQ) { - tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); - - ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq, - 0, "ehca_neq", - (void *)shca); - if (ret < 0) - ehca_err(ib_dev, "Can't map interrupt handler."); - } - - eq->is_initialized = 1; - - return 0; - -create_eq_exit2: - ipz_queue_dtor(NULL, &eq->ipz_queue); - -create_eq_exit1: - hipz_h_destroy_eq(shca->ipz_hca_handle, eq); - - return -EINVAL; -} - -void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq) -{ - unsigned long flags; - void *eqe; - - spin_lock_irqsave(&eq->spinlock, flags); - eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue); - spin_unlock_irqrestore(&eq->spinlock, flags); - - return eqe; -} - -int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq) -{ - unsigned long flags; - u64 h_ret; - - ibmebus_free_irq(eq->ist, (void *)shca); - - spin_lock_irqsave(&shca_list_lock, flags); - eq->is_initialized = 0; - spin_unlock_irqrestore(&shca_list_lock, flags); - - tasklet_kill(&eq->interrupt_task); - - h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq); - - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't free EQ resources."); - return -EINVAL; - } - ipz_queue_dtor(NULL, &eq->ipz_queue); - - return 0; -} diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c deleted file mode 100644 index e8b1bb65797a..000000000000 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ /dev/null @@ -1,414 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * HCA query functions - * - * Authors: Heiko J Schick - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_tools.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" - -static unsigned int limit_uint(unsigned int value) -{ - return min_t(unsigned int, value, INT_MAX); -} - -int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, - struct ib_udata *uhw) -{ - int i, ret = 0; - struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, - ib_device); - struct hipz_query_hca *rblock; - - static const u32 cap_mapping[] = { - IB_DEVICE_RESIZE_MAX_WR, HCA_CAP_WQE_RESIZE, - IB_DEVICE_BAD_PKEY_CNTR, HCA_CAP_BAD_P_KEY_CTR, - IB_DEVICE_BAD_QKEY_CNTR, HCA_CAP_Q_KEY_VIOL_CTR, - IB_DEVICE_RAW_MULTI, HCA_CAP_RAW_PACKET_MCAST, - IB_DEVICE_AUTO_PATH_MIG, HCA_CAP_AUTO_PATH_MIG, - IB_DEVICE_CHANGE_PHY_PORT, HCA_CAP_SQD_RTS_PORT_CHANGE, - IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK, - IB_DEVICE_CURR_QP_STATE_MOD, HCA_CAP_CUR_QP_STATE_MOD, - IB_DEVICE_SHUTDOWN_PORT, HCA_CAP_SHUTDOWN_PORT, - IB_DEVICE_INIT_TYPE, HCA_CAP_INIT_TYPE, - IB_DEVICE_PORT_ACTIVE_EVENT, HCA_CAP_PORT_ACTIVE_EVENT, - }; - - if (uhw->inlen || uhw->outlen) - return -EINVAL; - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query device properties"); - ret = -EINVAL; - goto query_device1; - } - - memset(props, 0, sizeof(struct ib_device_attr)); - props->page_size_cap = shca->hca_cap_mr_pgsize; - props->fw_ver = rblock->hw_ver; - props->max_mr_size = rblock->max_mr_size; - props->vendor_id = rblock->vendor_id >> 8; - props->vendor_part_id = rblock->vendor_part_id >> 16; - props->hw_ver = rblock->hw_ver; - props->max_qp = limit_uint(rblock->max_qp); - props->max_qp_wr = limit_uint(rblock->max_wqes_wq); - props->max_sge = limit_uint(rblock->max_sge); - props->max_sge_rd = limit_uint(rblock->max_sge_rd); - props->max_cq = limit_uint(rblock->max_cq); - props->max_cqe = limit_uint(rblock->max_cqe); - props->max_mr = limit_uint(rblock->max_mr); - props->max_mw = limit_uint(rblock->max_mw); - props->max_pd = limit_uint(rblock->max_pd); - props->max_ah = limit_uint(rblock->max_ah); - props->max_ee = limit_uint(rblock->max_rd_ee_context); - props->max_rdd = limit_uint(rblock->max_rd_domain); - props->max_fmr = limit_uint(rblock->max_mr); - props->max_qp_rd_atom = limit_uint(rblock->max_rr_qp); - props->max_ee_rd_atom = limit_uint(rblock->max_rr_ee_context); - props->max_res_rd_atom = limit_uint(rblock->max_rr_hca); - props->max_qp_init_rd_atom = limit_uint(rblock->max_act_wqs_qp); - props->max_ee_init_rd_atom = limit_uint(rblock->max_act_wqs_ee_context); - - if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { - props->max_srq = limit_uint(props->max_qp); - props->max_srq_wr = limit_uint(props->max_qp_wr); - props->max_srq_sge = 3; - } - - props->max_pkeys = 16; - /* Some FW versions say 0 here; insert sensible value in that case */ - props->local_ca_ack_delay = rblock->local_ca_ack_delay ? - min_t(u8, rblock->local_ca_ack_delay, 255) : 12; - props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp); - props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp); - props->max_mcast_grp = limit_uint(rblock->max_mcast_grp); - props->max_mcast_qp_attach = limit_uint(rblock->max_mcast_qp_attach); - props->max_total_mcast_qp_attach - = limit_uint(rblock->max_total_mcast_qp_attach); - - /* translate device capabilities */ - props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID | - IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ; - for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2) - if (rblock->hca_cap_indicators & cap_mapping[i + 1]) - props->device_cap_flags |= cap_mapping[i]; - -query_device1: - ehca_free_fw_ctrlblock(rblock); - - return ret; -} - -static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu) -{ - switch (fw_mtu) { - case 0x1: - return IB_MTU_256; - case 0x2: - return IB_MTU_512; - case 0x3: - return IB_MTU_1024; - case 0x4: - return IB_MTU_2048; - case 0x5: - return IB_MTU_4096; - default: - ehca_err(&shca->ib_device, "Unknown MTU size: %x.", - fw_mtu); - return 0; - } -} - -static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap) -{ - switch (vl_cap) { - case 0x1: - return 1; - case 0x2: - return 2; - case 0x3: - return 4; - case 0x4: - return 8; - case 0x5: - return 15; - default: - ehca_err(&shca->ib_device, "invalid Vl Capability: %x.", - vl_cap); - return 0; - } -} - -int ehca_query_port(struct ib_device *ibdev, - u8 port, struct ib_port_attr *props) -{ - int ret = 0; - u64 h_ret; - struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, - ib_device); - struct hipz_query_port *rblock; - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query port properties"); - ret = -EINVAL; - goto query_port1; - } - - memset(props, 0, sizeof(struct ib_port_attr)); - - props->active_mtu = props->max_mtu = map_mtu(shca, rblock->max_mtu); - props->port_cap_flags = rblock->capability_mask; - props->gid_tbl_len = rblock->gid_tbl_len; - if (rblock->max_msg_sz) - props->max_msg_sz = rblock->max_msg_sz; - else - props->max_msg_sz = 0x1 << 31; - props->bad_pkey_cntr = rblock->bad_pkey_cntr; - props->qkey_viol_cntr = rblock->qkey_viol_cntr; - props->pkey_tbl_len = rblock->pkey_tbl_len; - props->lid = rblock->lid; - props->sm_lid = rblock->sm_lid; - props->lmc = rblock->lmc; - props->sm_sl = rblock->sm_sl; - props->subnet_timeout = rblock->subnet_timeout; - props->init_type_reply = rblock->init_type_reply; - props->max_vl_num = map_number_of_vls(shca, rblock->vl_cap); - - if (rblock->state && rblock->phys_width) { - props->phys_state = rblock->phys_pstate; - props->state = rblock->phys_state; - props->active_width = rblock->phys_width; - props->active_speed = rblock->phys_speed; - } else { - /* old firmware releases don't report physical - * port info, so use default values - */ - props->phys_state = 5; - props->state = rblock->state; - props->active_width = IB_WIDTH_12X; - props->active_speed = IB_SPEED_SDR; - } - -query_port1: - ehca_free_fw_ctrlblock(rblock); - - return ret; -} - -int ehca_query_sma_attr(struct ehca_shca *shca, - u8 port, struct ehca_sma_attr *attr) -{ - int ret = 0; - u64 h_ret; - struct hipz_query_port *rblock; - - rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query port properties"); - ret = -EINVAL; - goto query_sma_attr1; - } - - memset(attr, 0, sizeof(struct ehca_sma_attr)); - - attr->lid = rblock->lid; - attr->lmc = rblock->lmc; - attr->sm_sl = rblock->sm_sl; - attr->sm_lid = rblock->sm_lid; - - attr->pkey_tbl_len = rblock->pkey_tbl_len; - memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys)); - -query_sma_attr1: - ehca_free_fw_ctrlblock(rblock); - - return ret; -} - -int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) -{ - int ret = 0; - u64 h_ret; - struct ehca_shca *shca; - struct hipz_query_port *rblock; - - shca = container_of(ibdev, struct ehca_shca, ib_device); - if (index > 16) { - ehca_err(&shca->ib_device, "Invalid index: %x.", index); - return -EINVAL; - } - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query port properties"); - ret = -EINVAL; - goto query_pkey1; - } - - memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16)); - -query_pkey1: - ehca_free_fw_ctrlblock(rblock); - - return ret; -} - -int ehca_query_gid(struct ib_device *ibdev, u8 port, - int index, union ib_gid *gid) -{ - int ret = 0; - u64 h_ret; - struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, - ib_device); - struct hipz_query_port *rblock; - - if (index < 0 || index > 255) { - ehca_err(&shca->ib_device, "Invalid index: %x.", index); - return -EINVAL; - } - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query port properties"); - ret = -EINVAL; - goto query_gid1; - } - - memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64)); - memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64)); - -query_gid1: - ehca_free_fw_ctrlblock(rblock); - - return ret; -} - -static const u32 allowed_port_caps = ( - IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP | - IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP | - IB_PORT_VENDOR_CLASS_SUP); - -int ehca_modify_port(struct ib_device *ibdev, - u8 port, int port_modify_mask, - struct ib_port_modify *props) -{ - int ret = 0; - struct ehca_shca *shca; - struct hipz_query_port *rblock; - u32 cap; - u64 hret; - - shca = container_of(ibdev, struct ehca_shca, ib_device); - if ((props->set_port_cap_mask | props->clr_port_cap_mask) - & ~allowed_port_caps) { - ehca_err(&shca->ib_device, "Non-changeable bits set in masks " - "set=%x clr=%x allowed=%x", props->set_port_cap_mask, - props->clr_port_cap_mask, allowed_port_caps); - return -EINVAL; - } - - if (mutex_lock_interruptible(&shca->modify_mutex)) - return -ERESTARTSYS; - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - ret = -ENOMEM; - goto modify_port1; - } - - hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); - if (hret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query port properties"); - ret = -EINVAL; - goto modify_port2; - } - - cap = (rblock->capability_mask | props->set_port_cap_mask) - & ~props->clr_port_cap_mask; - - hret = hipz_h_modify_port(shca->ipz_hca_handle, port, - cap, props->init_type, port_modify_mask); - if (hret != H_SUCCESS) { - ehca_err(&shca->ib_device, "Modify port failed h_ret=%lli", - hret); - ret = -EINVAL; - } - -modify_port2: - ehca_free_fw_ctrlblock(rblock); - -modify_port1: - mutex_unlock(&shca->modify_mutex); - - return ret; -} diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c deleted file mode 100644 index 8615d7cf7e01..000000000000 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ /dev/null @@ -1,870 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Functions for EQs, NEQs and interrupts - * - * Authors: Heiko J Schick - * Khadija Souissi - * Hoang-Nam Nguyen - * Joachim Fenkes - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -#include "ehca_classes.h" -#include "ehca_irq.h" -#include "ehca_iverbs.h" -#include "ehca_tools.h" -#include "hcp_if.h" -#include "hipz_fns.h" -#include "ipz_pt_fn.h" - -#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) -#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31) -#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7) -#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31) -#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31) -#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63) -#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63) - -#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) -#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7) -#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15) -#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16) -#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16) -#define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23) - -#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63) -#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7) - -static void queue_comp_task(struct ehca_cq *__cq); - -static struct ehca_comp_pool *pool; - -static inline void comp_event_callback(struct ehca_cq *cq) -{ - if (!cq->ib_cq.comp_handler) - return; - - spin_lock(&cq->cb_lock); - cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context); - spin_unlock(&cq->cb_lock); - - return; -} - -static void print_error_data(struct ehca_shca *shca, void *data, - u64 *rblock, int length) -{ - u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); - u64 resource = rblock[1]; - - switch (type) { - case 0x1: /* Queue Pair */ - { - struct ehca_qp *qp = (struct ehca_qp *)data; - - /* only print error data if AER is set */ - if (rblock[6] == 0) - return; - - ehca_err(&shca->ib_device, - "QP 0x%x (resource=%llx) has errors.", - qp->ib_qp.qp_num, resource); - break; - } - case 0x4: /* Completion Queue */ - { - struct ehca_cq *cq = (struct ehca_cq *)data; - - ehca_err(&shca->ib_device, - "CQ 0x%x (resource=%llx) has errors.", - cq->cq_number, resource); - break; - } - default: - ehca_err(&shca->ib_device, - "Unknown error type: %llx on %s.", - type, shca->ib_device.name); - break; - } - - ehca_err(&shca->ib_device, "Error data is available: %llx.", resource); - ehca_err(&shca->ib_device, "EHCA ----- error data begin " - "---------------------------------------------------"); - ehca_dmp(rblock, length, "resource=%llx", resource); - ehca_err(&shca->ib_device, "EHCA ----- error data end " - "----------------------------------------------------"); - - return; -} - -int ehca_error_data(struct ehca_shca *shca, void *data, - u64 resource) -{ - - unsigned long ret; - u64 *rblock; - unsigned long block_count; - - rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); - if (!rblock) { - ehca_err(&shca->ib_device, "Cannot allocate rblock memory."); - ret = -ENOMEM; - goto error_data1; - } - - /* rblock must be 4K aligned and should be 4K large */ - ret = hipz_h_error_data(shca->ipz_hca_handle, - resource, - rblock, - &block_count); - - if (ret == H_R_STATE) - ehca_err(&shca->ib_device, - "No error data is available: %llx.", resource); - else if (ret == H_SUCCESS) { - int length; - - length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]); - - if (length > EHCA_PAGESIZE) - length = EHCA_PAGESIZE; - - print_error_data(shca, data, rblock, length); - } else - ehca_err(&shca->ib_device, - "Error data could not be fetched: %llx", resource); - - ehca_free_fw_ctrlblock(rblock); - -error_data1: - return ret; - -} - -static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp, - enum ib_event_type event_type) -{ - struct ib_event event; - - /* PATH_MIG without the QP ever having been armed is false alarm */ - if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed) - return; - - event.device = &shca->ib_device; - event.event = event_type; - - if (qp->ext_type == EQPT_SRQ) { - if (!qp->ib_srq.event_handler) - return; - - event.element.srq = &qp->ib_srq; - qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context); - } else { - if (!qp->ib_qp.event_handler) - return; - - event.element.qp = &qp->ib_qp; - qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); - } -} - -static void qp_event_callback(struct ehca_shca *shca, u64 eqe, - enum ib_event_type event_type, int fatal) -{ - struct ehca_qp *qp; - u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe); - - read_lock(&ehca_qp_idr_lock); - qp = idr_find(&ehca_qp_idr, token); - if (qp) - atomic_inc(&qp->nr_events); - read_unlock(&ehca_qp_idr_lock); - - if (!qp) - return; - - if (fatal) - ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); - - dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ? - IB_EVENT_SRQ_ERR : event_type); - - /* - * eHCA only processes one WQE at a time for SRQ base QPs, - * so the last WQE has been processed as soon as the QP enters - * error state. - */ - if (fatal && qp->ext_type == EQPT_SRQBASE) - dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED); - - if (atomic_dec_and_test(&qp->nr_events)) - wake_up(&qp->wait_completion); - return; -} - -static void cq_event_callback(struct ehca_shca *shca, - u64 eqe) -{ - struct ehca_cq *cq; - u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe); - - read_lock(&ehca_cq_idr_lock); - cq = idr_find(&ehca_cq_idr, token); - if (cq) - atomic_inc(&cq->nr_events); - read_unlock(&ehca_cq_idr_lock); - - if (!cq) - return; - - ehca_error_data(shca, cq, cq->ipz_cq_handle.handle); - - if (atomic_dec_and_test(&cq->nr_events)) - wake_up(&cq->wait_completion); - - return; -} - -static void parse_identifier(struct ehca_shca *shca, u64 eqe) -{ - u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe); - - switch (identifier) { - case 0x02: /* path migrated */ - qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0); - break; - case 0x03: /* communication established */ - qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0); - break; - case 0x04: /* send queue drained */ - qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0); - break; - case 0x05: /* QP error */ - case 0x06: /* QP error */ - qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1); - break; - case 0x07: /* CQ error */ - case 0x08: /* CQ error */ - cq_event_callback(shca, eqe); - break; - case 0x09: /* MRMWPTE error */ - ehca_err(&shca->ib_device, "MRMWPTE error."); - break; - case 0x0A: /* port event */ - ehca_err(&shca->ib_device, "Port event."); - break; - case 0x0B: /* MR access error */ - ehca_err(&shca->ib_device, "MR access error."); - break; - case 0x0C: /* EQ error */ - ehca_err(&shca->ib_device, "EQ error."); - break; - case 0x0D: /* P/Q_Key mismatch */ - ehca_err(&shca->ib_device, "P/Q_Key mismatch."); - break; - case 0x10: /* sampling complete */ - ehca_err(&shca->ib_device, "Sampling complete."); - break; - case 0x11: /* unaffiliated access error */ - ehca_err(&shca->ib_device, "Unaffiliated access error."); - break; - case 0x12: /* path migrating */ - ehca_err(&shca->ib_device, "Path migrating."); - break; - case 0x13: /* interface trace stopped */ - ehca_err(&shca->ib_device, "Interface trace stopped."); - break; - case 0x14: /* first error capture info available */ - ehca_info(&shca->ib_device, "First error capture available"); - break; - case 0x15: /* SRQ limit reached */ - qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0); - break; - default: - ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.", - identifier, shca->ib_device.name); - break; - } - - return; -} - -static void dispatch_port_event(struct ehca_shca *shca, int port_num, - enum ib_event_type type, const char *msg) -{ - struct ib_event event; - - ehca_info(&shca->ib_device, "port %d %s.", port_num, msg); - event.device = &shca->ib_device; - event.event = type; - event.element.port_num = port_num; - ib_dispatch_event(&event); -} - -static void notify_port_conf_change(struct ehca_shca *shca, int port_num) -{ - struct ehca_sma_attr new_attr; - struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr; - - ehca_query_sma_attr(shca, port_num, &new_attr); - - if (new_attr.sm_sl != old_attr->sm_sl || - new_attr.sm_lid != old_attr->sm_lid) - dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE, - "SM changed"); - - if (new_attr.lid != old_attr->lid || - new_attr.lmc != old_attr->lmc) - dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE, - "LID changed"); - - if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len || - memcmp(new_attr.pkeys, old_attr->pkeys, - sizeof(u16) * new_attr.pkey_tbl_len)) - dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE, - "P_Key changed"); - - *old_attr = new_attr; -} - -/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */ -static int replay_modify_qp(struct ehca_sport *sport) -{ - int aqp1_destroyed; - unsigned long flags; - - spin_lock_irqsave(&sport->mod_sqp_lock, flags); - - aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI]; - - if (sport->ibqp_sqp[IB_QPT_SMI]) - ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]); - if (!aqp1_destroyed) - ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]); - - spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - - return aqp1_destroyed; -} - -static void parse_ec(struct ehca_shca *shca, u64 eqe) -{ - u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); - u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); - u8 spec_event; - struct ehca_sport *sport = &shca->sport[port - 1]; - - switch (ec) { - case 0x30: /* port availability change */ - if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { - /* only replay modify_qp calls in autodetect mode; - * if AQP1 was destroyed, the port is already down - * again and we can drop the event. - */ - if (ehca_nr_ports < 0) - if (replay_modify_qp(sport)) - break; - - sport->port_state = IB_PORT_ACTIVE; - dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, - "is active"); - ehca_query_sma_attr(shca, port, &sport->saved_attr); - } else { - sport->port_state = IB_PORT_DOWN; - dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, - "is inactive"); - } - break; - case 0x31: - /* port configuration change - * disruptive change is caused by - * LID, PKEY or SM change - */ - if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) { - ehca_warn(&shca->ib_device, "disruptive port " - "%d configuration change", port); - - sport->port_state = IB_PORT_DOWN; - dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, - "is inactive"); - - sport->port_state = IB_PORT_ACTIVE; - dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, - "is active"); - ehca_query_sma_attr(shca, port, - &sport->saved_attr); - } else - notify_port_conf_change(shca, port); - break; - case 0x32: /* adapter malfunction */ - ehca_err(&shca->ib_device, "Adapter malfunction."); - break; - case 0x33: /* trace stopped */ - ehca_err(&shca->ib_device, "Traced stopped."); - break; - case 0x34: /* util async event */ - spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe); - if (spec_event == 0x80) /* client reregister required */ - dispatch_port_event(shca, port, - IB_EVENT_CLIENT_REREGISTER, - "client reregister req."); - else - ehca_warn(&shca->ib_device, "Unknown util async " - "event %x on port %x", spec_event, port); - break; - default: - ehca_err(&shca->ib_device, "Unknown event code: %x on %s.", - ec, shca->ib_device.name); - break; - } - - return; -} - -static inline void reset_eq_pending(struct ehca_cq *cq) -{ - u64 CQx_EP; - struct h_galpa gal = cq->galpas.kernel; - - hipz_galpa_store_cq(gal, cqx_ep, 0x0); - CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep)); - - return; -} - -irqreturn_t ehca_interrupt_neq(int irq, void *dev_id) -{ - struct ehca_shca *shca = (struct ehca_shca*)dev_id; - - tasklet_hi_schedule(&shca->neq.interrupt_task); - - return IRQ_HANDLED; -} - -void ehca_tasklet_neq(unsigned long data) -{ - struct ehca_shca *shca = (struct ehca_shca*)data; - struct ehca_eqe *eqe; - u64 ret; - - eqe = ehca_poll_eq(shca, &shca->neq); - - while (eqe) { - if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry)) - parse_ec(shca, eqe->entry); - - eqe = ehca_poll_eq(shca, &shca->neq); - } - - ret = hipz_h_reset_event(shca->ipz_hca_handle, - shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL); - - if (ret != H_SUCCESS) - ehca_err(&shca->ib_device, "Can't clear notification events."); - - return; -} - -irqreturn_t ehca_interrupt_eq(int irq, void *dev_id) -{ - struct ehca_shca *shca = (struct ehca_shca*)dev_id; - - tasklet_hi_schedule(&shca->eq.interrupt_task); - - return IRQ_HANDLED; -} - - -static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) -{ - u64 eqe_value; - u32 token; - struct ehca_cq *cq; - - eqe_value = eqe->entry; - ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value); - if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { - ehca_dbg(&shca->ib_device, "Got completion event"); - token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); - read_lock(&ehca_cq_idr_lock); - cq = idr_find(&ehca_cq_idr, token); - if (cq) - atomic_inc(&cq->nr_events); - read_unlock(&ehca_cq_idr_lock); - if (cq == NULL) { - ehca_err(&shca->ib_device, - "Invalid eqe for non-existing cq token=%x", - token); - return; - } - reset_eq_pending(cq); - if (ehca_scaling_code) - queue_comp_task(cq); - else { - comp_event_callback(cq); - if (atomic_dec_and_test(&cq->nr_events)) - wake_up(&cq->wait_completion); - } - } else { - ehca_dbg(&shca->ib_device, "Got non completion event"); - parse_identifier(shca, eqe_value); - } -} - -void ehca_process_eq(struct ehca_shca *shca, int is_irq) -{ - struct ehca_eq *eq = &shca->eq; - struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; - u64 eqe_value, ret; - int eqe_cnt, i; - int eq_empty = 0; - - spin_lock(&eq->irq_spinlock); - if (is_irq) { - const int max_query_cnt = 100; - int query_cnt = 0; - int int_state = 1; - do { - int_state = hipz_h_query_int_state( - shca->ipz_hca_handle, eq->ist); - query_cnt++; - iosync(); - } while (int_state && query_cnt < max_query_cnt); - if (unlikely((query_cnt == max_query_cnt))) - ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x", - int_state, query_cnt); - } - - /* read out all eqes */ - eqe_cnt = 0; - do { - u32 token; - eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq); - if (!eqe_cache[eqe_cnt].eqe) - break; - eqe_value = eqe_cache[eqe_cnt].eqe->entry; - if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { - token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); - read_lock(&ehca_cq_idr_lock); - eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token); - if (eqe_cache[eqe_cnt].cq) - atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events); - read_unlock(&ehca_cq_idr_lock); - if (!eqe_cache[eqe_cnt].cq) { - ehca_err(&shca->ib_device, - "Invalid eqe for non-existing cq " - "token=%x", token); - continue; - } - } else - eqe_cache[eqe_cnt].cq = NULL; - eqe_cnt++; - } while (eqe_cnt < EHCA_EQE_CACHE_SIZE); - if (!eqe_cnt) { - if (is_irq) - ehca_dbg(&shca->ib_device, - "No eqe found for irq event"); - goto unlock_irq_spinlock; - } else if (!is_irq) { - ret = hipz_h_eoi(eq->ist); - if (ret != H_SUCCESS) - ehca_err(&shca->ib_device, - "bad return code EOI -rc = %lld\n", ret); - ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); - } - if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE)) - ehca_dbg(&shca->ib_device, "too many eqes for one irq event"); - /* enable irq for new packets */ - for (i = 0; i < eqe_cnt; i++) { - if (eq->eqe_cache[i].cq) - reset_eq_pending(eq->eqe_cache[i].cq); - } - /* check eq */ - spin_lock(&eq->spinlock); - eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue)); - spin_unlock(&eq->spinlock); - /* call completion handler for cached eqes */ - for (i = 0; i < eqe_cnt; i++) - if (eq->eqe_cache[i].cq) { - if (ehca_scaling_code) - queue_comp_task(eq->eqe_cache[i].cq); - else { - struct ehca_cq *cq = eq->eqe_cache[i].cq; - comp_event_callback(cq); - if (atomic_dec_and_test(&cq->nr_events)) - wake_up(&cq->wait_completion); - } - } else { - ehca_dbg(&shca->ib_device, "Got non completion event"); - parse_identifier(shca, eq->eqe_cache[i].eqe->entry); - } - /* poll eq if not empty */ - if (eq_empty) - goto unlock_irq_spinlock; - do { - struct ehca_eqe *eqe; - eqe = ehca_poll_eq(shca, &shca->eq); - if (!eqe) - break; - process_eqe(shca, eqe); - } while (1); - -unlock_irq_spinlock: - spin_unlock(&eq->irq_spinlock); -} - -void ehca_tasklet_eq(unsigned long data) -{ - ehca_process_eq((struct ehca_shca*)data, 1); -} - -static int find_next_online_cpu(struct ehca_comp_pool *pool) -{ - int cpu; - unsigned long flags; - - WARN_ON_ONCE(!in_interrupt()); - if (ehca_debug_level >= 3) - ehca_dmp(cpu_online_mask, cpumask_size(), ""); - - spin_lock_irqsave(&pool->last_cpu_lock, flags); - do { - cpu = cpumask_next(pool->last_cpu, cpu_online_mask); - if (cpu >= nr_cpu_ids) - cpu = cpumask_first(cpu_online_mask); - pool->last_cpu = cpu; - } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active); - spin_unlock_irqrestore(&pool->last_cpu_lock, flags); - - return cpu; -} - -static void __queue_comp_task(struct ehca_cq *__cq, - struct ehca_cpu_comp_task *cct, - struct task_struct *thread) -{ - unsigned long flags; - - spin_lock_irqsave(&cct->task_lock, flags); - spin_lock(&__cq->task_lock); - - if (__cq->nr_callbacks == 0) { - __cq->nr_callbacks++; - list_add_tail(&__cq->entry, &cct->cq_list); - cct->cq_jobs++; - wake_up_process(thread); - } else - __cq->nr_callbacks++; - - spin_unlock(&__cq->task_lock); - spin_unlock_irqrestore(&cct->task_lock, flags); -} - -static void queue_comp_task(struct ehca_cq *__cq) -{ - int cpu_id; - struct ehca_cpu_comp_task *cct; - struct task_struct *thread; - int cq_jobs; - unsigned long flags; - - cpu_id = find_next_online_cpu(pool); - BUG_ON(!cpu_online(cpu_id)); - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); - BUG_ON(!cct || !thread); - - spin_lock_irqsave(&cct->task_lock, flags); - cq_jobs = cct->cq_jobs; - spin_unlock_irqrestore(&cct->task_lock, flags); - if (cq_jobs > 0) { - cpu_id = find_next_online_cpu(pool); - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); - thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); - BUG_ON(!cct || !thread); - } - __queue_comp_task(__cq, cct, thread); -} - -static void run_comp_task(struct ehca_cpu_comp_task *cct) -{ - struct ehca_cq *cq; - - while (!list_empty(&cct->cq_list)) { - cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); - spin_unlock_irq(&cct->task_lock); - - comp_event_callback(cq); - if (atomic_dec_and_test(&cq->nr_events)) - wake_up(&cq->wait_completion); - - spin_lock_irq(&cct->task_lock); - spin_lock(&cq->task_lock); - cq->nr_callbacks--; - if (!cq->nr_callbacks) { - list_del_init(cct->cq_list.next); - cct->cq_jobs--; - } - spin_unlock(&cq->task_lock); - } -} - -static void comp_task_park(unsigned int cpu) -{ - struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - struct ehca_cpu_comp_task *target; - struct task_struct *thread; - struct ehca_cq *cq, *tmp; - LIST_HEAD(list); - - spin_lock_irq(&cct->task_lock); - cct->cq_jobs = 0; - cct->active = 0; - list_splice_init(&cct->cq_list, &list); - spin_unlock_irq(&cct->task_lock); - - cpu = find_next_online_cpu(pool); - target = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu); - spin_lock_irq(&target->task_lock); - list_for_each_entry_safe(cq, tmp, &list, entry) { - list_del(&cq->entry); - __queue_comp_task(cq, target, thread); - } - spin_unlock_irq(&target->task_lock); -} - -static void comp_task_stop(unsigned int cpu, bool online) -{ - struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - - spin_lock_irq(&cct->task_lock); - cct->cq_jobs = 0; - cct->active = 0; - WARN_ON(!list_empty(&cct->cq_list)); - spin_unlock_irq(&cct->task_lock); -} - -static int comp_task_should_run(unsigned int cpu) -{ - struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - - return cct->cq_jobs; -} - -static void comp_task(unsigned int cpu) -{ - struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks); - int cql_empty; - - spin_lock_irq(&cct->task_lock); - cql_empty = list_empty(&cct->cq_list); - if (!cql_empty) { - __set_current_state(TASK_RUNNING); - run_comp_task(cct); - } - spin_unlock_irq(&cct->task_lock); -} - -static struct smp_hotplug_thread comp_pool_threads = { - .thread_should_run = comp_task_should_run, - .thread_fn = comp_task, - .thread_comm = "ehca_comp/%u", - .cleanup = comp_task_stop, - .park = comp_task_park, -}; - -int ehca_create_comp_pool(void) -{ - int cpu, ret = -ENOMEM; - - if (!ehca_scaling_code) - return 0; - - pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL); - if (pool == NULL) - return -ENOMEM; - - spin_lock_init(&pool->last_cpu_lock); - pool->last_cpu = cpumask_any(cpu_online_mask); - - pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); - if (!pool->cpu_comp_tasks) - goto out_pool; - - pool->cpu_comp_threads = alloc_percpu(struct task_struct *); - if (!pool->cpu_comp_threads) - goto out_tasks; - - for_each_present_cpu(cpu) { - struct ehca_cpu_comp_task *cct; - - cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); - spin_lock_init(&cct->task_lock); - INIT_LIST_HEAD(&cct->cq_list); - } - - comp_pool_threads.store = pool->cpu_comp_threads; - ret = smpboot_register_percpu_thread(&comp_pool_threads); - if (ret) - goto out_threads; - - pr_info("eHCA scaling code enabled\n"); - return ret; - -out_threads: - free_percpu(pool->cpu_comp_threads); -out_tasks: - free_percpu(pool->cpu_comp_tasks); -out_pool: - kfree(pool); - return ret; -} - -void ehca_destroy_comp_pool(void) -{ - if (!ehca_scaling_code) - return; - - smpboot_unregister_percpu_thread(&comp_pool_threads); - - free_percpu(pool->cpu_comp_threads); - free_percpu(pool->cpu_comp_tasks); - kfree(pool); -} diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h deleted file mode 100644 index 5370199f08c7..000000000000 --- a/drivers/infiniband/hw/ehca/ehca_irq.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Function definitions and structs for EQs, NEQs and interrupts - * - * Authors: Heiko J Schick - * Khadija Souissi - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __EHCA_IRQ_H -#define __EHCA_IRQ_H - - -struct ehca_shca; - -#include -#include - -int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource); - -irqreturn_t ehca_interrupt_neq(int irq, void *dev_id); -void ehca_tasklet_neq(unsigned long data); - -irqreturn_t ehca_interrupt_eq(int irq, void *dev_id); -void ehca_tasklet_eq(unsigned long data); -void ehca_process_eq(struct ehca_shca *shca, int is_irq); - -struct ehca_cpu_comp_task { - struct list_head cq_list; - spinlock_t task_lock; - int cq_jobs; - int active; -}; - -struct ehca_comp_pool { - struct ehca_cpu_comp_task __percpu *cpu_comp_tasks; - struct task_struct * __percpu *cpu_comp_threads; - int last_cpu; - spinlock_t last_cpu_lock; -}; - -int ehca_create_comp_pool(void); -void ehca_destroy_comp_pool(void); - -#endif diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h deleted file mode 100644 index 80e6a3d5df3e..000000000000 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ /dev/null @@ -1,218 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Function definitions for internal functions - * - * Authors: Heiko J Schick - * Dietmar Decker - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __EHCA_IVERBS_H__ -#define __EHCA_IVERBS_H__ - -#include "ehca_classes.h" - -int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, - struct ib_udata *uhw); - -int ehca_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props); - -enum rdma_protocol_type -ehca_query_protocol(struct ib_device *device, u8 port_num); - -int ehca_query_sma_attr(struct ehca_shca *shca, u8 port, - struct ehca_sma_attr *attr); - -int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey); - -int ehca_query_gid(struct ib_device *ibdev, u8 port, int index, - union ib_gid *gid); - -int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, - struct ib_port_modify *props); - -struct ib_pd *ehca_alloc_pd(struct ib_device *device, - struct ib_ucontext *context, - struct ib_udata *udata); - -int ehca_dealloc_pd(struct ib_pd *pd); - -struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); - -int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); - -int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); - -int ehca_destroy_ah(struct ib_ah *ah); - -struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags); - -struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, u64 *iova_start); - -struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt, int mr_access_flags, - struct ib_udata *udata); - -int ehca_rereg_phys_mr(struct ib_mr *mr, - int mr_rereg_mask, - struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, int mr_access_flags, u64 *iova_start); - -int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); - -int ehca_dereg_mr(struct ib_mr *mr); - -struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); - -int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw, - struct ib_mw_bind *mw_bind); - -int ehca_dealloc_mw(struct ib_mw *mw); - -struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, - int mr_access_flags, - struct ib_fmr_attr *fmr_attr); - -int ehca_map_phys_fmr(struct ib_fmr *fmr, - u64 *page_list, int list_len, u64 iova); - -int ehca_unmap_fmr(struct list_head *fmr_list); - -int ehca_dealloc_fmr(struct ib_fmr *fmr); - -enum ehca_eq_type { - EHCA_EQ = 0, /* Event Queue */ - EHCA_NEQ /* Notification Event Queue */ -}; - -int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq, - enum ehca_eq_type type, const u32 length); - -int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq); - -void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq); - - -struct ib_cq *ehca_create_cq(struct ib_device *device, - const struct ib_cq_init_attr *attr, - struct ib_ucontext *context, - struct ib_udata *udata); - -int ehca_destroy_cq(struct ib_cq *cq); - -int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); - -int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); - -int ehca_peek_cq(struct ib_cq *cq, int wc_cnt); - -int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags); - -struct ib_qp *ehca_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata); - -int ehca_destroy_qp(struct ib_qp *qp); - -int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, - struct ib_udata *udata); - -int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, - int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); - -int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr); - -int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); - -int ehca_post_srq_recv(struct ib_srq *srq, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr); - -struct ib_srq *ehca_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata); - -int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); - -int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); - -int ehca_destroy_srq(struct ib_srq *srq); - -u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp, - struct ib_qp_init_attr *qp_init_attr); - -int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); - -int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); - -struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, - struct ib_udata *udata); - -int ehca_dealloc_ucontext(struct ib_ucontext *context); - -int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); - -int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index); - -void ehca_poll_eqs(unsigned long data); - -int ehca_calc_ipd(struct ehca_shca *shca, int port, - enum ib_rate path_rate, u32 *ipd); - -void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq); - -#ifdef CONFIG_PPC_64K_PAGES -void *ehca_alloc_fw_ctrlblock(gfp_t flags); -void ehca_free_fw_ctrlblock(void *ptr); -#else -#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags)) -#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) -#endif - -void ehca_recover_sqp(struct ib_qp *sqp); - -#endif diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c deleted file mode 100644 index 8246418cd4e0..000000000000 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ /dev/null @@ -1,1123 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * module start stop, hca detection - * - * Authors: Heiko J Schick - * Hoang-Nam Nguyen - * Joachim Fenkes - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifdef CONFIG_PPC_64K_PAGES -#include -#endif - -#include -#include -#include -#include "ehca_classes.h" -#include "ehca_iverbs.h" -#include "ehca_mrmw.h" -#include "ehca_tools.h" -#include "hcp_if.h" - -#define HCAD_VERSION "0029" - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Christoph Raisch "); -MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); -MODULE_VERSION(HCAD_VERSION); - -static bool ehca_open_aqp1 = 0; -static int ehca_hw_level = 0; -static bool ehca_poll_all_eqs = 1; - -int ehca_debug_level = 0; -int ehca_nr_ports = -1; -bool ehca_use_hp_mr = 0; -int ehca_port_act_time = 30; -int ehca_static_rate = -1; -bool ehca_scaling_code = 0; -int ehca_lock_hcalls = -1; -int ehca_max_cq = -1; -int ehca_max_qp = -1; - -module_param_named(open_aqp1, ehca_open_aqp1, bool, S_IRUGO); -module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); -module_param_named(hw_level, ehca_hw_level, int, S_IRUGO); -module_param_named(nr_ports, ehca_nr_ports, int, S_IRUGO); -module_param_named(use_hp_mr, ehca_use_hp_mr, bool, S_IRUGO); -module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO); -module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO); -module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); -module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO); -module_param_named(lock_hcalls, ehca_lock_hcalls, bint, S_IRUGO); -module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO); -module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO); - -MODULE_PARM_DESC(open_aqp1, - "Open AQP1 on startup (default: no)"); -MODULE_PARM_DESC(debug_level, - "Amount of debug output (0: none (default), 1: traces, " - "2: some dumps, 3: lots)"); -MODULE_PARM_DESC(hw_level, - "Hardware level (0: autosensing (default), " - "0x10..0x14: eHCA, 0x20..0x23: eHCA2)"); -MODULE_PARM_DESC(nr_ports, - "number of connected ports (-1: autodetect (default), " - "1: port one only, 2: two ports)"); -MODULE_PARM_DESC(use_hp_mr, - "Use high performance MRs (default: no)"); -MODULE_PARM_DESC(port_act_time, - "Time to wait for port activation (default: 30 sec)"); -MODULE_PARM_DESC(poll_all_eqs, - "Poll all event queues periodically (default: yes)"); -MODULE_PARM_DESC(static_rate, - "Set permanent static rate (default: no static rate)"); -MODULE_PARM_DESC(scaling_code, - "Enable scaling code (default: no)"); -MODULE_PARM_DESC(lock_hcalls, - "Serialize all hCalls made by the driver " - "(default: autodetect)"); -MODULE_PARM_DESC(number_of_cqs, - "Max number of CQs which can be allocated " - "(default: autodetect)"); -MODULE_PARM_DESC(number_of_qps, - "Max number of QPs which can be allocated " - "(default: autodetect)"); - -DEFINE_RWLOCK(ehca_qp_idr_lock); -DEFINE_RWLOCK(ehca_cq_idr_lock); -DEFINE_IDR(ehca_qp_idr); -DEFINE_IDR(ehca_cq_idr); - -static LIST_HEAD(shca_list); /* list of all registered ehcas */ -DEFINE_SPINLOCK(shca_list_lock); - -static struct timer_list poll_eqs_timer; - -#ifdef CONFIG_PPC_64K_PAGES -static struct kmem_cache *ctblk_cache; - -void *ehca_alloc_fw_ctrlblock(gfp_t flags) -{ - void *ret = kmem_cache_zalloc(ctblk_cache, flags); - if (!ret) - ehca_gen_err("Out of memory for ctblk"); - return ret; -} - -void ehca_free_fw_ctrlblock(void *ptr) -{ - if (ptr) - kmem_cache_free(ctblk_cache, ptr); - -} -#endif - -int ehca2ib_return_code(u64 ehca_rc) -{ - switch (ehca_rc) { - case H_SUCCESS: - return 0; - case H_RESOURCE: /* Resource in use */ - case H_BUSY: - return -EBUSY; - case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ - case H_CONSTRAINED: /* resource constraint */ - case H_NO_MEM: - return -ENOMEM; - default: - return -EINVAL; - } -} - -static int ehca_create_slab_caches(void) -{ - int ret; - - ret = ehca_init_pd_cache(); - if (ret) { - ehca_gen_err("Cannot create PD SLAB cache."); - return ret; - } - - ret = ehca_init_cq_cache(); - if (ret) { - ehca_gen_err("Cannot create CQ SLAB cache."); - goto create_slab_caches2; - } - - ret = ehca_init_qp_cache(); - if (ret) { - ehca_gen_err("Cannot create QP SLAB cache."); - goto create_slab_caches3; - } - - ret = ehca_init_av_cache(); - if (ret) { - ehca_gen_err("Cannot create AV SLAB cache."); - goto create_slab_caches4; - } - - ret = ehca_init_mrmw_cache(); - if (ret) { - ehca_gen_err("Cannot create MR&MW SLAB cache."); - goto create_slab_caches5; - } - - ret = ehca_init_small_qp_cache(); - if (ret) { - ehca_gen_err("Cannot create small queue SLAB cache."); - goto create_slab_caches6; - } - -#ifdef CONFIG_PPC_64K_PAGES - ctblk_cache = kmem_cache_create("ehca_cache_ctblk", - EHCA_PAGESIZE, H_CB_ALIGNMENT, - SLAB_HWCACHE_ALIGN, - NULL); - if (!ctblk_cache) { - ehca_gen_err("Cannot create ctblk SLAB cache."); - ehca_cleanup_small_qp_cache(); - ret = -ENOMEM; - goto create_slab_caches6; - } -#endif - return 0; - -create_slab_caches6: - ehca_cleanup_mrmw_cache(); - -create_slab_caches5: - ehca_cleanup_av_cache(); - -create_slab_caches4: - ehca_cleanup_qp_cache(); - -create_slab_caches3: - ehca_cleanup_cq_cache(); - -create_slab_caches2: - ehca_cleanup_pd_cache(); - - return ret; -} - -static void ehca_destroy_slab_caches(void) -{ - ehca_cleanup_small_qp_cache(); - ehca_cleanup_mrmw_cache(); - ehca_cleanup_av_cache(); - ehca_cleanup_qp_cache(); - ehca_cleanup_cq_cache(); - ehca_cleanup_pd_cache(); -#ifdef CONFIG_PPC_64K_PAGES - if (ctblk_cache) - kmem_cache_destroy(ctblk_cache); -#endif -} - -#define EHCA_HCAAVER EHCA_BMASK_IBM(32, 39) -#define EHCA_REVID EHCA_BMASK_IBM(40, 63) - -static struct cap_descr { - u64 mask; - char *descr; -} hca_cap_descr[] = { - { HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" }, - { HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" }, - { HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" }, - { HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" }, - { HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" }, - { HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" }, - { HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" }, - { HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" }, - { HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" }, - { HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" }, - { HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" }, - { HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" }, - { HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" }, - { HCA_CAP_SRQ, "HCA_CAP_SRQ" }, - { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" }, - { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" }, - { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" }, - { HCA_CAP_H_ALLOC_RES_SYNC, "HCA_CAP_H_ALLOC_RES_SYNC" }, -}; - -static int ehca_sense_attributes(struct ehca_shca *shca) -{ - int i, ret = 0; - u64 h_ret; - struct hipz_query_hca *rblock; - struct hipz_query_port *port; - const char *loc_code; - - static const u32 pgsize_map[] = { - HCA_CAP_MR_PGSIZE_4K, 0x1000, - HCA_CAP_MR_PGSIZE_64K, 0x10000, - HCA_CAP_MR_PGSIZE_1M, 0x100000, - HCA_CAP_MR_PGSIZE_16M, 0x1000000, - }; - - ehca_gen_dbg("Probing adapter %s...", - shca->ofdev->dev.of_node->full_name); - loc_code = of_get_property(shca->ofdev->dev.of_node, "ibm,loc-code", - NULL); - if (loc_code) - ehca_gen_dbg(" ... location lode=%s", loc_code); - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_gen_err("Cannot allocate rblock memory."); - return -ENOMEM; - } - - h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock); - if (h_ret != H_SUCCESS) { - ehca_gen_err("Cannot query device properties. h_ret=%lli", - h_ret); - ret = -EPERM; - goto sense_attributes1; - } - - if (ehca_nr_ports == 1) - shca->num_ports = 1; - else - shca->num_ports = (u8)rblock->num_ports; - - ehca_gen_dbg(" ... found %x ports", rblock->num_ports); - - if (ehca_hw_level == 0) { - u32 hcaaver; - u32 revid; - - hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver); - revid = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver); - - ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid); - - if (hcaaver == 1) { - if (revid <= 3) - shca->hw_level = 0x10 | (revid + 1); - else - shca->hw_level = 0x14; - } else if (hcaaver == 2) { - if (revid == 0) - shca->hw_level = 0x21; - else if (revid == 0x10) - shca->hw_level = 0x22; - else if (revid == 0x20 || revid == 0x21) - shca->hw_level = 0x23; - } - - if (!shca->hw_level) { - ehca_gen_warn("unknown hardware version" - " - assuming default level"); - shca->hw_level = 0x22; - } - } else - shca->hw_level = ehca_hw_level; - ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); - - shca->hca_cap = rblock->hca_cap_indicators; - ehca_gen_dbg(" ... HCA capabilities:"); - for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++) - if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) - ehca_gen_dbg(" %s", hca_cap_descr[i].descr); - - /* Autodetect hCall locking -- the "H_ALLOC_RESOURCE synced" flag is - * a firmware property, so it's valid across all adapters - */ - if (ehca_lock_hcalls == -1) - ehca_lock_hcalls = !EHCA_BMASK_GET(HCA_CAP_H_ALLOC_RES_SYNC, - shca->hca_cap); - - /* translate supported MR page sizes; always support 4K */ - shca->hca_cap_mr_pgsize = EHCA_PAGESIZE; - for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2) - if (rblock->memory_page_size_supported & pgsize_map[i]) - shca->hca_cap_mr_pgsize |= pgsize_map[i + 1]; - - /* Set maximum number of CQs and QPs to calculate EQ size */ - if (shca->max_num_qps == -1) - shca->max_num_qps = min_t(int, rblock->max_qp, - EHCA_MAX_NUM_QUEUES); - else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) { - ehca_gen_warn("The requested number of QPs is out of range " - "(1 - %i) specified by HW. Value is set to %i", - rblock->max_qp, rblock->max_qp); - shca->max_num_qps = rblock->max_qp; - } - - if (shca->max_num_cqs == -1) - shca->max_num_cqs = min_t(int, rblock->max_cq, - EHCA_MAX_NUM_QUEUES); - else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) { - ehca_gen_warn("The requested number of CQs is out of range " - "(1 - %i) specified by HW. Value is set to %i", - rblock->max_cq, rblock->max_cq); - } - - /* query max MTU from first port -- it's the same for all ports */ - port = (struct hipz_query_port *)rblock; - h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); - if (h_ret != H_SUCCESS) { - ehca_gen_err("Cannot query port properties. h_ret=%lli", - h_ret); - ret = -EPERM; - goto sense_attributes1; - } - - shca->max_mtu = port->max_mtu; - -sense_attributes1: - ehca_free_fw_ctrlblock(rblock); - return ret; -} - -static int init_node_guid(struct ehca_shca *shca) -{ - int ret = 0; - struct hipz_query_hca *rblock; - - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!rblock) { - ehca_err(&shca->ib_device, "Can't allocate rblock memory."); - return -ENOMEM; - } - - if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { - ehca_err(&shca->ib_device, "Can't query device properties"); - ret = -EINVAL; - goto init_node_guid1; - } - - memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64)); - -init_node_guid1: - ehca_free_fw_ctrlblock(rblock); - return ret; -} - -static int ehca_port_immutable(struct ib_device *ibdev, u8 port_num, - struct ib_port_immutable *immutable) -{ - struct ib_port_attr attr; - int err; - - err = ehca_query_port(ibdev, port_num, &attr); - if (err) - return err; - - immutable->pkey_tbl_len = attr.pkey_tbl_len; - immutable->gid_tbl_len = attr.gid_tbl_len; - immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; - immutable->max_mad_size = IB_MGMT_MAD_SIZE; - - return 0; -} - -static int ehca_init_device(struct ehca_shca *shca) -{ - int ret; - - ret = init_node_guid(shca); - if (ret) - return ret; - - strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); - shca->ib_device.owner = THIS_MODULE; - - shca->ib_device.uverbs_abi_ver = 8; - shca->ib_device.uverbs_cmd_mask = - (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | - (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | - (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | - (1ull << IB_USER_VERBS_CMD_REG_MR) | - (1ull << IB_USER_VERBS_CMD_DEREG_MR) | - (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | - (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | - (1ull << IB_USER_VERBS_CMD_CREATE_QP) | - (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_CMD_QUERY_QP) | - (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | - (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | - (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); - - shca->ib_device.node_type = RDMA_NODE_IB_CA; - shca->ib_device.phys_port_cnt = shca->num_ports; - shca->ib_device.num_comp_vectors = 1; - shca->ib_device.dma_device = &shca->ofdev->dev; - shca->ib_device.query_device = ehca_query_device; - shca->ib_device.query_port = ehca_query_port; - shca->ib_device.query_gid = ehca_query_gid; - shca->ib_device.query_pkey = ehca_query_pkey; - /* shca->in_device.modify_device = ehca_modify_device */ - shca->ib_device.modify_port = ehca_modify_port; - shca->ib_device.alloc_ucontext = ehca_alloc_ucontext; - shca->ib_device.dealloc_ucontext = ehca_dealloc_ucontext; - shca->ib_device.alloc_pd = ehca_alloc_pd; - shca->ib_device.dealloc_pd = ehca_dealloc_pd; - shca->ib_device.create_ah = ehca_create_ah; - /* shca->ib_device.modify_ah = ehca_modify_ah; */ - shca->ib_device.query_ah = ehca_query_ah; - shca->ib_device.destroy_ah = ehca_destroy_ah; - shca->ib_device.create_qp = ehca_create_qp; - shca->ib_device.modify_qp = ehca_modify_qp; - shca->ib_device.query_qp = ehca_query_qp; - shca->ib_device.destroy_qp = ehca_destroy_qp; - shca->ib_device.post_send = ehca_post_send; - shca->ib_device.post_recv = ehca_post_recv; - shca->ib_device.create_cq = ehca_create_cq; - shca->ib_device.destroy_cq = ehca_destroy_cq; - shca->ib_device.resize_cq = ehca_resize_cq; - shca->ib_device.poll_cq = ehca_poll_cq; - /* shca->ib_device.peek_cq = ehca_peek_cq; */ - shca->ib_device.req_notify_cq = ehca_req_notify_cq; - /* shca->ib_device.req_ncomp_notif = ehca_req_ncomp_notif; */ - shca->ib_device.get_dma_mr = ehca_get_dma_mr; - shca->ib_device.reg_phys_mr = ehca_reg_phys_mr; - shca->ib_device.reg_user_mr = ehca_reg_user_mr; - shca->ib_device.query_mr = ehca_query_mr; - shca->ib_device.dereg_mr = ehca_dereg_mr; - shca->ib_device.rereg_phys_mr = ehca_rereg_phys_mr; - shca->ib_device.alloc_mw = ehca_alloc_mw; - shca->ib_device.bind_mw = ehca_bind_mw; - shca->ib_device.dealloc_mw = ehca_dealloc_mw; - shca->ib_device.alloc_fmr = ehca_alloc_fmr; - shca->ib_device.map_phys_fmr = ehca_map_phys_fmr; - shca->ib_device.unmap_fmr = ehca_unmap_fmr; - shca->ib_device.dealloc_fmr = ehca_dealloc_fmr; - shca->ib_device.attach_mcast = ehca_attach_mcast; - shca->ib_device.detach_mcast = ehca_detach_mcast; - shca->ib_device.process_mad = ehca_process_mad; - shca->ib_device.mmap = ehca_mmap; - shca->ib_device.dma_ops = &ehca_dma_mapping_ops; - shca->ib_device.get_port_immutable = ehca_port_immutable; - - if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { - shca->ib_device.uverbs_cmd_mask |= - (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | - (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | - (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | - (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); - - shca->ib_device.create_srq = ehca_create_srq; - shca->ib_device.modify_srq = ehca_modify_srq; - shca->ib_device.query_srq = ehca_query_srq; - shca->ib_device.destroy_srq = ehca_destroy_srq; - shca->ib_device.post_srq_recv = ehca_post_srq_recv; - } - - return ret; -} - -static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) -{ - struct ehca_sport *sport = &shca->sport[port - 1]; - struct ib_cq *ibcq; - struct ib_qp *ibqp; - struct ib_qp_init_attr qp_init_attr; - struct ib_cq_init_attr cq_attr = {}; - int ret; - - if (sport->ibcq_aqp1) { - ehca_err(&shca->ib_device, "AQP1 CQ is already created."); - return -EPERM; - } - - cq_attr.cqe = 10; - ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), - &cq_attr); - if (IS_ERR(ibcq)) { - ehca_err(&shca->ib_device, "Cannot create AQP1 CQ."); - return PTR_ERR(ibcq); - } - sport->ibcq_aqp1 = ibcq; - - if (sport->ibqp_sqp[IB_QPT_GSI]) { - ehca_err(&shca->ib_device, "AQP1 QP is already created."); - ret = -EPERM; - goto create_aqp1; - } - - memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr)); - qp_init_attr.send_cq = ibcq; - qp_init_attr.recv_cq = ibcq; - qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; - qp_init_attr.cap.max_send_wr = 100; - qp_init_attr.cap.max_recv_wr = 100; - qp_init_attr.cap.max_send_sge = 2; - qp_init_attr.cap.max_recv_sge = 1; - qp_init_attr.qp_type = IB_QPT_GSI; - qp_init_attr.port_num = port; - qp_init_attr.qp_context = NULL; - qp_init_attr.event_handler = NULL; - qp_init_attr.srq = NULL; - - ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr); - if (IS_ERR(ibqp)) { - ehca_err(&shca->ib_device, "Cannot create AQP1 QP."); - ret = PTR_ERR(ibqp); - goto create_aqp1; - } - sport->ibqp_sqp[IB_QPT_GSI] = ibqp; - - return 0; - -create_aqp1: - ib_destroy_cq(sport->ibcq_aqp1); - return ret; -} - -static int ehca_destroy_aqp1(struct ehca_sport *sport) -{ - int ret; - - ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]); - if (ret) { - ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret); - return ret; - } - - ret = ib_destroy_cq(sport->ibcq_aqp1); - if (ret) - ehca_gen_err("Cannot destroy AQP1 CQ. ret=%i", ret); - - return ret; -} - -static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%d\n", ehca_debug_level); -} - -static ssize_t ehca_store_debug_level(struct device_driver *ddp, - const char *buf, size_t count) -{ - int value = (*buf) - '0'; - if (value >= 0 && value <= 9) - ehca_debug_level = value; - return 1; -} - -static DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR, - ehca_show_debug_level, ehca_store_debug_level); - -static struct attribute *ehca_drv_attrs[] = { - &driver_attr_debug_level.attr, - NULL -}; - -static struct attribute_group ehca_drv_attr_grp = { - .attrs = ehca_drv_attrs -}; - -static const struct attribute_group *ehca_drv_attr_groups[] = { - &ehca_drv_attr_grp, - NULL, -}; - -#define EHCA_RESOURCE_ATTR(name) \ -static ssize_t ehca_show_##name(struct device *dev, \ - struct device_attribute *attr, \ - char *buf) \ -{ \ - struct ehca_shca *shca; \ - struct hipz_query_hca *rblock; \ - int data; \ - \ - shca = dev_get_drvdata(dev); \ - \ - rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); \ - if (!rblock) { \ - dev_err(dev, "Can't allocate rblock memory.\n"); \ - return 0; \ - } \ - \ - if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \ - dev_err(dev, "Can't query device properties\n"); \ - ehca_free_fw_ctrlblock(rblock); \ - return 0; \ - } \ - \ - data = rblock->name; \ - ehca_free_fw_ctrlblock(rblock); \ - \ - if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1)) \ - return snprintf(buf, 256, "1\n"); \ - else \ - return snprintf(buf, 256, "%d\n", data); \ - \ -} \ -static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL); - -EHCA_RESOURCE_ATTR(num_ports); -EHCA_RESOURCE_ATTR(hw_ver); -EHCA_RESOURCE_ATTR(max_eq); -EHCA_RESOURCE_ATTR(cur_eq); -EHCA_RESOURCE_ATTR(max_cq); -EHCA_RESOURCE_ATTR(cur_cq); -EHCA_RESOURCE_ATTR(max_qp); -EHCA_RESOURCE_ATTR(cur_qp); -EHCA_RESOURCE_ATTR(max_mr); -EHCA_RESOURCE_ATTR(cur_mr); -EHCA_RESOURCE_ATTR(max_mw); -EHCA_RESOURCE_ATTR(cur_mw); -EHCA_RESOURCE_ATTR(max_pd); -EHCA_RESOURCE_ATTR(max_ah); - -static ssize_t ehca_show_adapter_handle(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct ehca_shca *shca = dev_get_drvdata(dev); - - return sprintf(buf, "%llx\n", shca->ipz_hca_handle.handle); - -} -static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL); - -static struct attribute *ehca_dev_attrs[] = { - &dev_attr_adapter_handle.attr, - &dev_attr_num_ports.attr, - &dev_attr_hw_ver.attr, - &dev_attr_max_eq.attr, - &dev_attr_cur_eq.attr, - &dev_attr_max_cq.attr, - &dev_attr_cur_cq.attr, - &dev_attr_max_qp.attr, - &dev_attr_cur_qp.attr, - &dev_attr_max_mr.attr, - &dev_attr_cur_mr.attr, - &dev_attr_max_mw.attr, - &dev_attr_cur_mw.attr, - &dev_attr_max_pd.attr, - &dev_attr_max_ah.attr, - NULL -}; - -static struct attribute_group ehca_dev_attr_grp = { - .attrs = ehca_dev_attrs -}; - -static int ehca_probe(struct platform_device *dev) -{ - struct ehca_shca *shca; - const u64 *handle; - struct ib_pd *ibpd; - int ret, i, eq_size; - unsigned long flags; - - handle = of_get_property(dev->dev.of_node, "ibm,hca-handle", NULL); - if (!handle) { - ehca_gen_err("Cannot get eHCA handle for adapter: %s.", - dev->dev.of_node->full_name); - return -ENODEV; - } - - if (!(*handle)) { - ehca_gen_err("Wrong eHCA handle for adapter: %s.", - dev->dev.of_node->full_name); - return -ENODEV; - } - - shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca)); - if (!shca) { - ehca_gen_err("Cannot allocate shca memory."); - return -ENOMEM; - } - - mutex_init(&shca->modify_mutex); - atomic_set(&shca->num_cqs, 0); - atomic_set(&shca->num_qps, 0); - shca->max_num_qps = ehca_max_qp; - shca->max_num_cqs = ehca_max_cq; - - for (i = 0; i < ARRAY_SIZE(shca->sport); i++) - spin_lock_init(&shca->sport[i].mod_sqp_lock); - - shca->ofdev = dev; - shca->ipz_hca_handle.handle = *handle; - dev_set_drvdata(&dev->dev, shca); - - ret = ehca_sense_attributes(shca); - if (ret < 0) { - ehca_gen_err("Cannot sense eHCA attributes."); - goto probe1; - } - - ret = ehca_init_device(shca); - if (ret) { - ehca_gen_err("Cannot init ehca device struct"); - goto probe1; - } - - eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps; - /* create event queues */ - ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size); - if (ret) { - ehca_err(&shca->ib_device, "Cannot create EQ."); - goto probe1; - } - - ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513); - if (ret) { - ehca_err(&shca->ib_device, "Cannot create NEQ."); - goto probe3; - } - - /* create internal protection domain */ - ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL); - if (IS_ERR(ibpd)) { - ehca_err(&shca->ib_device, "Cannot create internal PD."); - ret = PTR_ERR(ibpd); - goto probe4; - } - - shca->pd = container_of(ibpd, struct ehca_pd, ib_pd); - shca->pd->ib_pd.device = &shca->ib_device; - - /* create internal max MR */ - ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr); - - if (ret) { - ehca_err(&shca->ib_device, "Cannot create internal MR ret=%i", - ret); - goto probe5; - } - - ret = ib_register_device(&shca->ib_device, NULL); - if (ret) { - ehca_err(&shca->ib_device, - "ib_register_device() failed ret=%i", ret); - goto probe6; - } - - /* create AQP1 for port 1 */ - if (ehca_open_aqp1 == 1) { - shca->sport[0].port_state = IB_PORT_DOWN; - ret = ehca_create_aqp1(shca, 1); - if (ret) { - ehca_err(&shca->ib_device, - "Cannot create AQP1 for port 1."); - goto probe7; - } - } - - /* create AQP1 for port 2 */ - if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) { - shca->sport[1].port_state = IB_PORT_DOWN; - ret = ehca_create_aqp1(shca, 2); - if (ret) { - ehca_err(&shca->ib_device, - "Cannot create AQP1 for port 2."); - goto probe8; - } - } - - ret = sysfs_create_group(&dev->dev.kobj, &ehca_dev_attr_grp); - if (ret) /* only complain; we can live without attributes */ - ehca_err(&shca->ib_device, - "Cannot create device attributes ret=%d", ret); - - spin_lock_irqsave(&shca_list_lock, flags); - list_add(&shca->shca_list, &shca_list); - spin_unlock_irqrestore(&shca_list_lock, flags); - - return 0; - -probe8: - ret = ehca_destroy_aqp1(&shca->sport[0]); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy AQP1 for port 1. ret=%i", ret); - -probe7: - ib_unregister_device(&shca->ib_device); - -probe6: - ret = ehca_dereg_internal_maxmr(shca); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy internal MR. ret=%x", ret); - -probe5: - ret = ehca_dealloc_pd(&shca->pd->ib_pd); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy internal PD. ret=%x", ret); - -probe4: - ret = ehca_destroy_eq(shca, &shca->neq); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy NEQ. ret=%x", ret); - -probe3: - ret = ehca_destroy_eq(shca, &shca->eq); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy EQ. ret=%x", ret); - -probe1: - ib_dealloc_device(&shca->ib_device); - - return -EINVAL; -} - -static int ehca_remove(struct platform_device *dev) -{ - struct ehca_shca *shca = dev_get_drvdata(&dev->dev); - unsigned long flags; - int ret; - - sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp); - - if (ehca_open_aqp1 == 1) { - int i; - for (i = 0; i < shca->num_ports; i++) { - ret = ehca_destroy_aqp1(&shca->sport[i]); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy AQP1 for port %x " - "ret=%i", ret, i); - } - } - - ib_unregister_device(&shca->ib_device); - - ret = ehca_dereg_internal_maxmr(shca); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy internal MR. ret=%i", ret); - - ret = ehca_dealloc_pd(&shca->pd->ib_pd); - if (ret) - ehca_err(&shca->ib_device, - "Cannot destroy internal PD. ret=%i", ret); - - ret = ehca_destroy_eq(shca, &shca->eq); - if (ret) - ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%i", ret); - - ret = ehca_destroy_eq(shca, &shca->neq); - if (ret) - ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%i", ret); - - ib_dealloc_device(&shca->ib_device); - - spin_lock_irqsave(&shca_list_lock, flags); - list_del(&shca->shca_list); - spin_unlock_irqrestore(&shca_list_lock, flags); - - return ret; -} - -static struct of_device_id ehca_device_table[] = -{ - { - .name = "lhca", - .compatible = "IBM,lhca", - }, - {}, -}; -MODULE_DEVICE_TABLE(of, ehca_device_table); - -static struct platform_driver ehca_driver = { - .probe = ehca_probe, - .remove = ehca_remove, - .driver = { - .name = "ehca", - .owner = THIS_MODULE, - .groups = ehca_drv_attr_groups, - .of_match_table = ehca_device_table, - }, -}; - -void ehca_poll_eqs(unsigned long data) -{ - struct ehca_shca *shca; - - spin_lock(&shca_list_lock); - list_for_each_entry(shca, &shca_list, shca_list) { - if (shca->eq.is_initialized) { - /* call deadman proc only if eq ptr does not change */ - struct ehca_eq *eq = &shca->eq; - int max = 3; - volatile u64 q_ofs, q_ofs2; - unsigned long flags; - spin_lock_irqsave(&eq->spinlock, flags); - q_ofs = eq->ipz_queue.current_q_offset; - spin_unlock_irqrestore(&eq->spinlock, flags); - do { - spin_lock_irqsave(&eq->spinlock, flags); - q_ofs2 = eq->ipz_queue.current_q_offset; - spin_unlock_irqrestore(&eq->spinlock, flags); - max--; - } while (q_ofs == q_ofs2 && max > 0); - if (q_ofs == q_ofs2) - ehca_process_eq(shca, 0); - } - } - mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ)); - spin_unlock(&shca_list_lock); -} - -static int ehca_mem_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - static unsigned long ehca_dmem_warn_time; - unsigned long flags; - - switch (action) { - case MEM_CANCEL_OFFLINE: - case MEM_CANCEL_ONLINE: - case MEM_ONLINE: - case MEM_OFFLINE: - return NOTIFY_OK; - case MEM_GOING_ONLINE: - case MEM_GOING_OFFLINE: - /* only ok if no hca is attached to the lpar */ - spin_lock_irqsave(&shca_list_lock, flags); - if (list_empty(&shca_list)) { - spin_unlock_irqrestore(&shca_list_lock, flags); - return NOTIFY_OK; - } else { - spin_unlock_irqrestore(&shca_list_lock, flags); - if (printk_timed_ratelimit(&ehca_dmem_warn_time, - 30 * 1000)) - ehca_gen_err("DMEM operations are not allowed" - "in conjunction with eHCA"); - return NOTIFY_BAD; - } - } - return NOTIFY_OK; -} - -static struct notifier_block ehca_mem_nb = { - .notifier_call = ehca_mem_notifier, -}; - -static int __init ehca_module_init(void) -{ - int ret; - - printk(KERN_INFO "eHCA Infiniband Device Driver " - "(Version " HCAD_VERSION ")\n"); - - ret = ehca_create_comp_pool(); - if (ret) { - ehca_gen_err("Cannot create comp pool."); - return ret; - } - - ret = ehca_create_slab_caches(); - if (ret) { - ehca_gen_err("Cannot create SLAB caches"); - ret = -ENOMEM; - goto module_init1; - } - - ret = ehca_create_busmap(); - if (ret) { - ehca_gen_err("Cannot create busmap."); - goto module_init2; - } - - ret = ibmebus_register_driver(&ehca_driver); - if (ret) { - ehca_gen_err("Cannot register eHCA device driver"); - ret = -EINVAL; - goto module_init3; - } - - ret = register_memory_notifier(&ehca_mem_nb); - if (ret) { - ehca_gen_err("Failed registering memory add/remove notifier"); - goto module_init4; - } - - if (ehca_poll_all_eqs != 1) { - ehca_gen_err("WARNING!!!"); - ehca_gen_err("It is possible to lose interrupts."); - } else { - init_timer(&poll_eqs_timer); - poll_eqs_timer.function = ehca_poll_eqs; - poll_eqs_timer.expires = jiffies + HZ; - add_timer(&poll_eqs_timer); - } - - return 0; - -module_init4: - ibmebus_unregister_driver(&ehca_driver); - -module_init3: - ehca_destroy_busmap(); - -module_init2: - ehca_destroy_slab_caches(); - -module_init1: - ehca_destroy_comp_pool(); - return ret; -}; - -static void __exit ehca_module_exit(void) -{ - if (ehca_poll_all_eqs == 1) - del_timer_sync(&poll_eqs_timer); - - ibmebus_unregister_driver(&ehca_driver); - - unregister_memory_notifier(&ehca_mem_nb); - - ehca_destroy_busmap(); - - ehca_destroy_slab_caches(); - - ehca_destroy_comp_pool(); - - idr_destroy(&ehca_cq_idr); - idr_destroy(&ehca_qp_idr); -}; - -module_init(ehca_module_init); -module_exit(ehca_module_exit); diff --git a/drivers/infiniband/hw/ehca/ehca_mcast.c b/drivers/infiniband/hw/ehca/ehca_mcast.c deleted file mode 100644 index cec181532924..000000000000 --- a/drivers/infiniband/hw/ehca/ehca_mcast.c +++ /dev/null @@ -1,131 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * mcast functions - * - * Authors: Khadija Souissi - * Waleri Fomin - * Reinhard Ernst - * Hoang-Nam Nguyen - * Heiko J Schick - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include "ehca_classes.h" -#include "ehca_tools.h" -#include "ehca_qes.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" - -#define MAX_MC_LID 0xFFFE -#define MIN_MC_LID 0xC000 /* Multicast limits */ -#define EHCA_VALID_MULTICAST_GID(gid) ((gid)[0] == 0xFF) -#define EHCA_VALID_MULTICAST_LID(lid) \ - (((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID)) - -int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); - struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, - ib_device); - union ib_gid my_gid; - u64 subnet_prefix, interface_id, h_ret; - - if (ibqp->qp_type != IB_QPT_UD) { - ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type); - return -EINVAL; - } - - if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { - ehca_err(ibqp->device, "invalid mulitcast gid"); - return -EINVAL; - } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { - ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid); - return -EINVAL; - } - - memcpy(&my_gid, gid->raw, sizeof(union ib_gid)); - - subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); - interface_id = be64_to_cpu(my_gid.global.interface_id); - h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - my_qp->galpas.kernel, - lid, subnet_prefix, interface_id); - if (h_ret != H_SUCCESS) - ehca_err(ibqp->device, - "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed " - "h_ret=%lli", my_qp, ibqp->qp_num, h_ret); - - return ehca2ib_return_code(h_ret); -} - -int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); - struct ehca_shca *shca = container_of(ibqp->pd->device, - struct ehca_shca, ib_device); - union ib_gid my_gid; - u64 subnet_prefix, interface_id, h_ret; - - if (ibqp->qp_type != IB_QPT_UD) { - ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type); - return -EINVAL; - } - - if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { - ehca_err(ibqp->device, "invalid mulitcast gid"); - return -EINVAL; - } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { - ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid); - return -EINVAL; - } - - memcpy(&my_gid, gid->raw, sizeof(union ib_gid)); - - subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); - interface_id = be64_to_cpu(my_gid.global.interface_id); - h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - my_qp->galpas.kernel, - lid, subnet_prefix, interface_id); - if (h_ret != H_SUCCESS) - ehca_err(ibqp->device, - "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed " - "h_ret=%lli", my_qp, ibqp->qp_num, h_ret); - - return ehca2ib_return_code(h_ret); -} diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c deleted file mode 100644 index f914b30999f8..000000000000 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ /dev/null @@ -1,2593 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * MR/MW functions - * - * Authors: Dietmar Decker - * Christoph Raisch - * Hoang-Nam Nguyen - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -#include "ehca_iverbs.h" -#include "ehca_mrmw.h" -#include "hcp_if.h" -#include "hipz_hw.h" - -#define NUM_CHUNKS(length, chunk_size) \ - (((length) + (chunk_size - 1)) / (chunk_size)) - -/* max number of rpages (per hcall register_rpages) */ -#define MAX_RPAGES 512 - -/* DMEM toleration management */ -#define EHCA_SECTSHIFT SECTION_SIZE_BITS -#define EHCA_SECTSIZE (1UL << EHCA_SECTSHIFT) -#define EHCA_HUGEPAGESHIFT 34 -#define EHCA_HUGEPAGE_SIZE (1UL << EHCA_HUGEPAGESHIFT) -#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT) -#define EHCA_INVAL_ADDR 0xFFFFFFFFFFFFFFFFULL -#define EHCA_DIR_INDEX_SHIFT 13 /* 8k Entries in 64k block */ -#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2) -#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT) -#define EHCA_TOP_MAP_SIZE (0x10000) /* currently fixed map size */ -#define EHCA_DIR_MAP_SIZE (0x10000) -#define EHCA_ENT_MAP_SIZE (0x10000) -#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1) - -static unsigned long ehca_mr_len; - -/* - * Memory map data structures - */ -struct ehca_dir_bmap { - u64 ent[EHCA_MAP_ENTRIES]; -}; -struct ehca_top_bmap { - struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES]; -}; -struct ehca_bmap { - struct ehca_top_bmap *top[EHCA_MAP_ENTRIES]; -}; - -static struct ehca_bmap *ehca_bmap; - -static struct kmem_cache *mr_cache; -static struct kmem_cache *mw_cache; - -enum ehca_mr_pgsize { - EHCA_MR_PGSIZE4K = 0x1000L, - EHCA_MR_PGSIZE64K = 0x10000L, - EHCA_MR_PGSIZE1M = 0x100000L, - EHCA_MR_PGSIZE16M = 0x1000000L -}; - -#define EHCA_MR_PGSHIFT4K 12 -#define EHCA_MR_PGSHIFT64K 16 -#define EHCA_MR_PGSHIFT1M 20 -#define EHCA_MR_PGSHIFT16M 24 - -static u64 ehca_map_vaddr(void *caddr); - -static u32 ehca_encode_hwpage_size(u32 pgsize) -{ - int log = ilog2(pgsize); - WARN_ON(log < 12 || log > 24 || log & 3); - return (log - 12) / 4; -} - -static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca) -{ - return rounddown_pow_of_two(shca->hca_cap_mr_pgsize); -} - -static struct ehca_mr *ehca_mr_new(void) -{ - struct ehca_mr *me; - - me = kmem_cache_zalloc(mr_cache, GFP_KERNEL); - if (me) - spin_lock_init(&me->mrlock); - else - ehca_gen_err("alloc failed"); - - return me; -} - -static void ehca_mr_delete(struct ehca_mr *me) -{ - kmem_cache_free(mr_cache, me); -} - -static struct ehca_mw *ehca_mw_new(void) -{ - struct ehca_mw *me; - - me = kmem_cache_zalloc(mw_cache, GFP_KERNEL); - if (me) - spin_lock_init(&me->mwlock); - else - ehca_gen_err("alloc failed"); - - return me; -} - -static void ehca_mw_delete(struct ehca_mw *me) -{ - kmem_cache_free(mw_cache, me); -} - -/*----------------------------------------------------------------------*/ - -struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) -{ - struct ib_mr *ib_mr; - int ret; - struct ehca_mr *e_maxmr; - struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_shca *shca = - container_of(pd->device, struct ehca_shca, ib_device); - - if (shca->maxmr) { - e_maxmr = ehca_mr_new(); - if (!e_maxmr) { - ehca_err(&shca->ib_device, "out of memory"); - ib_mr = ERR_PTR(-ENOMEM); - goto get_dma_mr_exit0; - } - - ret = ehca_reg_maxmr(shca, e_maxmr, - (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)), - mr_access_flags, e_pd, - &e_maxmr->ib.ib_mr.lkey, - &e_maxmr->ib.ib_mr.rkey); - if (ret) { - ehca_mr_delete(e_maxmr); - ib_mr = ERR_PTR(ret); - goto get_dma_mr_exit0; - } - ib_mr = &e_maxmr->ib.ib_mr; - } else { - ehca_err(&shca->ib_device, "no internal max-MR exist!"); - ib_mr = ERR_PTR(-EINVAL); - goto get_dma_mr_exit0; - } - -get_dma_mr_exit0: - if (IS_ERR(ib_mr)) - ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x", - PTR_ERR(ib_mr), pd, mr_access_flags); - return ib_mr; -} /* end ehca_get_dma_mr() */ - -/*----------------------------------------------------------------------*/ - -struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, - u64 *iova_start) -{ - struct ib_mr *ib_mr; - int ret; - struct ehca_mr *e_mr; - struct ehca_shca *shca = - container_of(pd->device, struct ehca_shca, ib_device); - struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - - u64 size; - - if ((num_phys_buf <= 0) || !phys_buf_array) { - ehca_err(pd->device, "bad input values: num_phys_buf=%x " - "phys_buf_array=%p", num_phys_buf, phys_buf_array); - ib_mr = ERR_PTR(-EINVAL); - goto reg_phys_mr_exit0; - } - if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || - ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { - /* - * Remote Write Access requires Local Write Access - * Remote Atomic Access requires Local Write Access - */ - ehca_err(pd->device, "bad input values: mr_access_flags=%x", - mr_access_flags); - ib_mr = ERR_PTR(-EINVAL); - goto reg_phys_mr_exit0; - } - - /* check physical buffer list and calculate size */ - ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf, - iova_start, &size); - if (ret) { - ib_mr = ERR_PTR(ret); - goto reg_phys_mr_exit0; - } - if ((size == 0) || - (((u64)iova_start + size) < (u64)iova_start)) { - ehca_err(pd->device, "bad input values: size=%llx iova_start=%p", - size, iova_start); - ib_mr = ERR_PTR(-EINVAL); - goto reg_phys_mr_exit0; - } - - e_mr = ehca_mr_new(); - if (!e_mr) { - ehca_err(pd->device, "out of memory"); - ib_mr = ERR_PTR(-ENOMEM); - goto reg_phys_mr_exit0; - } - - /* register MR on HCA */ - if (ehca_mr_is_maxmr(size, iova_start)) { - e_mr->flags |= EHCA_MR_FLAG_MAXMR; - ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags, - e_pd, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey); - if (ret) { - ib_mr = ERR_PTR(ret); - goto reg_phys_mr_exit1; - } - } else { - struct ehca_mr_pginfo pginfo; - u32 num_kpages; - u32 num_hwpages; - u64 hw_pgsize; - - num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size, - PAGE_SIZE); - /* for kernel space we try most possible pgsize */ - hw_pgsize = ehca_get_max_hwpage_size(shca); - num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size, - hw_pgsize); - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_kpages = num_kpages; - pginfo.hwpage_size = hw_pgsize; - pginfo.num_hwpages = num_hwpages; - pginfo.u.phy.num_phys_buf = num_phys_buf; - pginfo.u.phy.phys_buf_array = phys_buf_array; - pginfo.next_hwpage = - ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; - - ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, - e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); - if (ret) { - ib_mr = ERR_PTR(ret); - goto reg_phys_mr_exit1; - } - } - - /* successful registration of all pages */ - return &e_mr->ib.ib_mr; - -reg_phys_mr_exit1: - ehca_mr_delete(e_mr); -reg_phys_mr_exit0: - if (IS_ERR(ib_mr)) - ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p " - "num_phys_buf=%x mr_access_flags=%x iova_start=%p", - PTR_ERR(ib_mr), pd, phys_buf_array, - num_phys_buf, mr_access_flags, iova_start); - return ib_mr; -} /* end ehca_reg_phys_mr() */ - -/*----------------------------------------------------------------------*/ - -struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, - u64 virt, int mr_access_flags, - struct ib_udata *udata) -{ - struct ib_mr *ib_mr; - struct ehca_mr *e_mr; - struct ehca_shca *shca = - container_of(pd->device, struct ehca_shca, ib_device); - struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_mr_pginfo pginfo; - int ret, page_shift; - u32 num_kpages; - u32 num_hwpages; - u64 hwpage_size; - - if (!pd) { - ehca_gen_err("bad pd=%p", pd); - return ERR_PTR(-EFAULT); - } - - if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || - ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { - /* - * Remote Write Access requires Local Write Access - * Remote Atomic Access requires Local Write Access - */ - ehca_err(pd->device, "bad input values: mr_access_flags=%x", - mr_access_flags); - ib_mr = ERR_PTR(-EINVAL); - goto reg_user_mr_exit0; - } - - if (length == 0 || virt + length < virt) { - ehca_err(pd->device, "bad input values: length=%llx " - "virt_base=%llx", length, virt); - ib_mr = ERR_PTR(-EINVAL); - goto reg_user_mr_exit0; - } - - e_mr = ehca_mr_new(); - if (!e_mr) { - ehca_err(pd->device, "out of memory"); - ib_mr = ERR_PTR(-ENOMEM); - goto reg_user_mr_exit0; - } - - e_mr->umem = ib_umem_get(pd->uobject->context, start, length, - mr_access_flags, 0); - if (IS_ERR(e_mr->umem)) { - ib_mr = (void *)e_mr->umem; - goto reg_user_mr_exit1; - } - - if (e_mr->umem->page_size != PAGE_SIZE) { - ehca_err(pd->device, "page size not supported, " - "e_mr->umem->page_size=%x", e_mr->umem->page_size); - ib_mr = ERR_PTR(-EINVAL); - goto reg_user_mr_exit2; - } - - /* determine number of MR pages */ - num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE); - /* select proper hw_pgsize */ - page_shift = PAGE_SHIFT; - if (e_mr->umem->hugetlb) { - /* determine page_shift, clamp between 4K and 16M */ - page_shift = (fls64(length - 1) + 3) & ~3; - page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K), - EHCA_MR_PGSHIFT16M); - } - hwpage_size = 1UL << page_shift; - - /* now that we have the desired page size, shift until it's - * supported, too. 4K is always supported, so this terminates. - */ - while (!(hwpage_size & shca->hca_cap_mr_pgsize)) - hwpage_size >>= 4; - -reg_user_mr_fallback: - num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size); - /* register MR on HCA */ - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_USER; - pginfo.hwpage_size = hwpage_size; - pginfo.num_kpages = num_kpages; - pginfo.num_hwpages = num_hwpages; - pginfo.u.usr.region = e_mr->umem; - pginfo.next_hwpage = ib_umem_offset(e_mr->umem) / hwpage_size; - pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl; - ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, - e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); - if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) { - ehca_warn(pd->device, "failed to register mr " - "with hwpage_size=%llx", hwpage_size); - ehca_info(pd->device, "try to register mr with " - "kpage_size=%lx", PAGE_SIZE); - /* - * this means kpages are not contiguous for a hw page - * try kernel page size as fallback solution - */ - hwpage_size = PAGE_SIZE; - goto reg_user_mr_fallback; - } - if (ret) { - ib_mr = ERR_PTR(ret); - goto reg_user_mr_exit2; - } - - /* successful registration of all pages */ - return &e_mr->ib.ib_mr; - -reg_user_mr_exit2: - ib_umem_release(e_mr->umem); -reg_user_mr_exit1: - ehca_mr_delete(e_mr); -reg_user_mr_exit0: - if (IS_ERR(ib_mr)) - ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p", - PTR_ERR(ib_mr), pd, mr_access_flags, udata); - return ib_mr; -} /* end ehca_reg_user_mr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_rereg_phys_mr(struct ib_mr *mr, - int mr_rereg_mask, - struct ib_pd *pd, - struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - int mr_access_flags, - u64 *iova_start) -{ - int ret; - - struct ehca_shca *shca = - container_of(mr->device, struct ehca_shca, ib_device); - struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); - u64 new_size; - u64 *new_start; - u32 new_acl; - struct ehca_pd *new_pd; - u32 tmp_lkey, tmp_rkey; - unsigned long sl_flags; - u32 num_kpages = 0; - u32 num_hwpages = 0; - struct ehca_mr_pginfo pginfo; - - if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) { - /* TODO not supported, because PHYP rereg hCall needs pages */ - ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not " - "supported yet, mr_rereg_mask=%x", mr_rereg_mask); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - - if (mr_rereg_mask & IB_MR_REREG_PD) { - if (!pd) { - ehca_err(mr->device, "rereg with bad pd, pd=%p " - "mr_rereg_mask=%x", pd, mr_rereg_mask); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - } - - if ((mr_rereg_mask & - ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) || - (mr_rereg_mask == 0)) { - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - - /* check other parameters */ - if (e_mr == shca->maxmr) { - /* should be impossible, however reject to be sure */ - ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p " - "shca->maxmr=%p mr->lkey=%x", - mr, shca->maxmr, mr->lkey); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */ - if (e_mr->flags & EHCA_MR_FLAG_FMR) { - ehca_err(mr->device, "not supported for FMR, mr=%p " - "flags=%x", mr, e_mr->flags); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - if (!phys_buf_array || num_phys_buf <= 0) { - ehca_err(mr->device, "bad input values mr_rereg_mask=%x" - " phys_buf_array=%p num_phys_buf=%x", - mr_rereg_mask, phys_buf_array, num_phys_buf); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - } - if ((mr_rereg_mask & IB_MR_REREG_ACCESS) && /* change ACL */ - (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || - ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) { - /* - * Remote Write Access requires Local Write Access - * Remote Atomic Access requires Local Write Access - */ - ehca_err(mr->device, "bad input values: mr_rereg_mask=%x " - "mr_access_flags=%x", mr_rereg_mask, mr_access_flags); - ret = -EINVAL; - goto rereg_phys_mr_exit0; - } - - /* set requested values dependent on rereg request */ - spin_lock_irqsave(&e_mr->mrlock, sl_flags); - new_start = e_mr->start; - new_size = e_mr->size; - new_acl = e_mr->acl; - new_pd = container_of(mr->pd, struct ehca_pd, ib_pd); - - if (mr_rereg_mask & IB_MR_REREG_TRANS) { - u64 hw_pgsize = ehca_get_max_hwpage_size(shca); - - new_start = iova_start; /* change address */ - /* check physical buffer list and calculate size */ - ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, - num_phys_buf, iova_start, - &new_size); - if (ret) - goto rereg_phys_mr_exit1; - if ((new_size == 0) || - (((u64)iova_start + new_size) < (u64)iova_start)) { - ehca_err(mr->device, "bad input values: new_size=%llx " - "iova_start=%p", new_size, iova_start); - ret = -EINVAL; - goto rereg_phys_mr_exit1; - } - num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) + - new_size, PAGE_SIZE); - num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) + - new_size, hw_pgsize); - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_kpages = num_kpages; - pginfo.hwpage_size = hw_pgsize; - pginfo.num_hwpages = num_hwpages; - pginfo.u.phy.num_phys_buf = num_phys_buf; - pginfo.u.phy.phys_buf_array = phys_buf_array; - pginfo.next_hwpage = - ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; - } - if (mr_rereg_mask & IB_MR_REREG_ACCESS) - new_acl = mr_access_flags; - if (mr_rereg_mask & IB_MR_REREG_PD) - new_pd = container_of(pd, struct ehca_pd, ib_pd); - - ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl, - new_pd, &pginfo, &tmp_lkey, &tmp_rkey); - if (ret) - goto rereg_phys_mr_exit1; - - /* successful reregistration */ - if (mr_rereg_mask & IB_MR_REREG_PD) - mr->pd = pd; - mr->lkey = tmp_lkey; - mr->rkey = tmp_rkey; - -rereg_phys_mr_exit1: - spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); -rereg_phys_mr_exit0: - if (ret) - ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p " - "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x " - "iova_start=%p", - ret, mr, mr_rereg_mask, pd, phys_buf_array, - num_phys_buf, mr_access_flags, iova_start); - return ret; -} /* end ehca_rereg_phys_mr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) -{ - int ret = 0; - u64 h_ret; - struct ehca_shca *shca = - container_of(mr->device, struct ehca_shca, ib_device); - struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); - unsigned long sl_flags; - struct ehca_mr_hipzout_parms hipzout; - - if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { - ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " - "e_mr->flags=%x", mr, e_mr, e_mr->flags); - ret = -EINVAL; - goto query_mr_exit0; - } - - memset(mr_attr, 0, sizeof(struct ib_mr_attr)); - spin_lock_irqsave(&e_mr->mrlock, sl_flags); - - h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout); - if (h_ret != H_SUCCESS) { - ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p " - "hca_hndl=%llx mr_hndl=%llx lkey=%x", - h_ret, mr, shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle, mr->lkey); - ret = ehca2ib_return_code(h_ret); - goto query_mr_exit1; - } - mr_attr->pd = mr->pd; - mr_attr->device_virt_addr = hipzout.vaddr; - mr_attr->size = hipzout.len; - mr_attr->lkey = hipzout.lkey; - mr_attr->rkey = hipzout.rkey; - ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags); - -query_mr_exit1: - spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); -query_mr_exit0: - if (ret) - ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p", - ret, mr, mr_attr); - return ret; -} /* end ehca_query_mr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_dereg_mr(struct ib_mr *mr) -{ - int ret = 0; - u64 h_ret; - struct ehca_shca *shca = - container_of(mr->device, struct ehca_shca, ib_device); - struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); - - if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { - ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " - "e_mr->flags=%x", mr, e_mr, e_mr->flags); - ret = -EINVAL; - goto dereg_mr_exit0; - } else if (e_mr == shca->maxmr) { - /* should be impossible, however reject to be sure */ - ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p " - "shca->maxmr=%p mr->lkey=%x", - mr, shca->maxmr, mr->lkey); - ret = -EINVAL; - goto dereg_mr_exit0; - } - - /* TODO: BUSY: MR still has bound window(s) */ - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); - if (h_ret != H_SUCCESS) { - ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p " - "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x", - h_ret, shca, e_mr, shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle, mr->lkey); - ret = ehca2ib_return_code(h_ret); - goto dereg_mr_exit0; - } - - if (e_mr->umem) - ib_umem_release(e_mr->umem); - - /* successful deregistration */ - ehca_mr_delete(e_mr); - -dereg_mr_exit0: - if (ret) - ehca_err(mr->device, "ret=%i mr=%p", ret, mr); - return ret; -} /* end ehca_dereg_mr() */ - -/*----------------------------------------------------------------------*/ - -struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) -{ - struct ib_mw *ib_mw; - u64 h_ret; - struct ehca_mw *e_mw; - struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_shca *shca = - container_of(pd->device, struct ehca_shca, ib_device); - struct ehca_mw_hipzout_parms hipzout; - - if (type != IB_MW_TYPE_1) - return ERR_PTR(-EINVAL); - - e_mw = ehca_mw_new(); - if (!e_mw) { - ib_mw = ERR_PTR(-ENOMEM); - goto alloc_mw_exit0; - } - - h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw, - e_pd->fw_pd, &hipzout); - if (h_ret != H_SUCCESS) { - ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli " - "shca=%p hca_hndl=%llx mw=%p", - h_ret, shca, shca->ipz_hca_handle.handle, e_mw); - ib_mw = ERR_PTR(ehca2ib_return_code(h_ret)); - goto alloc_mw_exit1; - } - /* successful MW allocation */ - e_mw->ipz_mw_handle = hipzout.handle; - e_mw->ib_mw.rkey = hipzout.rkey; - return &e_mw->ib_mw; - -alloc_mw_exit1: - ehca_mw_delete(e_mw); -alloc_mw_exit0: - if (IS_ERR(ib_mw)) - ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd); - return ib_mw; -} /* end ehca_alloc_mw() */ - -/*----------------------------------------------------------------------*/ - -int ehca_bind_mw(struct ib_qp *qp, - struct ib_mw *mw, - struct ib_mw_bind *mw_bind) -{ - /* TODO: not supported up to now */ - ehca_gen_err("bind MW currently not supported by HCAD"); - - return -EPERM; -} /* end ehca_bind_mw() */ - -/*----------------------------------------------------------------------*/ - -int ehca_dealloc_mw(struct ib_mw *mw) -{ - u64 h_ret; - struct ehca_shca *shca = - container_of(mw->device, struct ehca_shca, ib_device); - struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw); - - h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw); - if (h_ret != H_SUCCESS) { - ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p " - "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx", - h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle, - e_mw->ipz_mw_handle.handle); - return ehca2ib_return_code(h_ret); - } - /* successful deallocation */ - ehca_mw_delete(e_mw); - return 0; -} /* end ehca_dealloc_mw() */ - -/*----------------------------------------------------------------------*/ - -struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, - int mr_access_flags, - struct ib_fmr_attr *fmr_attr) -{ - struct ib_fmr *ib_fmr; - struct ehca_shca *shca = - container_of(pd->device, struct ehca_shca, ib_device); - struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_mr *e_fmr; - int ret; - u32 tmp_lkey, tmp_rkey; - struct ehca_mr_pginfo pginfo; - u64 hw_pgsize; - - /* check other parameters */ - if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || - ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && - !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { - /* - * Remote Write Access requires Local Write Access - * Remote Atomic Access requires Local Write Access - */ - ehca_err(pd->device, "bad input values: mr_access_flags=%x", - mr_access_flags); - ib_fmr = ERR_PTR(-EINVAL); - goto alloc_fmr_exit0; - } - if (mr_access_flags & IB_ACCESS_MW_BIND) { - ehca_err(pd->device, "bad input values: mr_access_flags=%x", - mr_access_flags); - ib_fmr = ERR_PTR(-EINVAL); - goto alloc_fmr_exit0; - } - if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) { - ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x " - "fmr_attr->max_maps=%x fmr_attr->page_shift=%x", - fmr_attr->max_pages, fmr_attr->max_maps, - fmr_attr->page_shift); - ib_fmr = ERR_PTR(-EINVAL); - goto alloc_fmr_exit0; - } - - hw_pgsize = 1 << fmr_attr->page_shift; - if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) { - ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x", - fmr_attr->page_shift); - ib_fmr = ERR_PTR(-EINVAL); - goto alloc_fmr_exit0; - } - - e_fmr = ehca_mr_new(); - if (!e_fmr) { - ib_fmr = ERR_PTR(-ENOMEM); - goto alloc_fmr_exit0; - } - e_fmr->flags |= EHCA_MR_FLAG_FMR; - - /* register MR on HCA */ - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.hwpage_size = hw_pgsize; - /* - * pginfo.num_hwpages==0, ie register_rpages() will not be called - * but deferred to map_phys_fmr() - */ - ret = ehca_reg_mr(shca, e_fmr, NULL, - fmr_attr->max_pages * (1 << fmr_attr->page_shift), - mr_access_flags, e_pd, &pginfo, - &tmp_lkey, &tmp_rkey, EHCA_REG_MR); - if (ret) { - ib_fmr = ERR_PTR(ret); - goto alloc_fmr_exit1; - } - - /* successful */ - e_fmr->hwpage_size = hw_pgsize; - e_fmr->fmr_page_size = 1 << fmr_attr->page_shift; - e_fmr->fmr_max_pages = fmr_attr->max_pages; - e_fmr->fmr_max_maps = fmr_attr->max_maps; - e_fmr->fmr_map_cnt = 0; - return &e_fmr->ib.ib_fmr; - -alloc_fmr_exit1: - ehca_mr_delete(e_fmr); -alloc_fmr_exit0: - return ib_fmr; -} /* end ehca_alloc_fmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_map_phys_fmr(struct ib_fmr *fmr, - u64 *page_list, - int list_len, - u64 iova) -{ - int ret; - struct ehca_shca *shca = - container_of(fmr->device, struct ehca_shca, ib_device); - struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); - struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd); - struct ehca_mr_pginfo pginfo; - u32 tmp_lkey, tmp_rkey; - - if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { - ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x", - e_fmr, e_fmr->flags); - ret = -EINVAL; - goto map_phys_fmr_exit0; - } - ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len); - if (ret) - goto map_phys_fmr_exit0; - if (iova % e_fmr->fmr_page_size) { - /* only whole-numbered pages */ - ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x", - iova, e_fmr->fmr_page_size); - ret = -EINVAL; - goto map_phys_fmr_exit0; - } - if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) { - /* HCAD does not limit the maps, however trace this anyway */ - ehca_info(fmr->device, "map limit exceeded, fmr=%p " - "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x", - fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps); - } - - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_FMR; - pginfo.num_kpages = list_len; - pginfo.hwpage_size = e_fmr->hwpage_size; - pginfo.num_hwpages = - list_len * e_fmr->fmr_page_size / pginfo.hwpage_size; - pginfo.u.fmr.page_list = page_list; - pginfo.next_hwpage = - (iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size; - pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size; - - ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova, - list_len * e_fmr->fmr_page_size, - e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey); - if (ret) - goto map_phys_fmr_exit0; - - /* successful reregistration */ - e_fmr->fmr_map_cnt++; - e_fmr->ib.ib_fmr.lkey = tmp_lkey; - e_fmr->ib.ib_fmr.rkey = tmp_rkey; - return 0; - -map_phys_fmr_exit0: - if (ret) - ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x " - "iova=%llx", ret, fmr, page_list, list_len, iova); - return ret; -} /* end ehca_map_phys_fmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_unmap_fmr(struct list_head *fmr_list) -{ - int ret = 0; - struct ib_fmr *ib_fmr; - struct ehca_shca *shca = NULL; - struct ehca_shca *prev_shca; - struct ehca_mr *e_fmr; - u32 num_fmr = 0; - u32 unmap_fmr_cnt = 0; - - /* check all FMR belong to same SHCA, and check internal flag */ - list_for_each_entry(ib_fmr, fmr_list, list) { - prev_shca = shca; - shca = container_of(ib_fmr->device, struct ehca_shca, - ib_device); - e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); - if ((shca != prev_shca) && prev_shca) { - ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p " - "prev_shca=%p e_fmr=%p", - shca, prev_shca, e_fmr); - ret = -EINVAL; - goto unmap_fmr_exit0; - } - if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { - ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p " - "e_fmr->flags=%x", e_fmr, e_fmr->flags); - ret = -EINVAL; - goto unmap_fmr_exit0; - } - num_fmr++; - } - - /* loop over all FMRs to unmap */ - list_for_each_entry(ib_fmr, fmr_list, list) { - unmap_fmr_cnt++; - e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); - shca = container_of(ib_fmr->device, struct ehca_shca, - ib_device); - ret = ehca_unmap_one_fmr(shca, e_fmr); - if (ret) { - /* unmap failed, stop unmapping of rest of FMRs */ - ehca_err(&shca->ib_device, "unmap of one FMR failed, " - "stop rest, e_fmr=%p num_fmr=%x " - "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr, - unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey); - goto unmap_fmr_exit0; - } - } - -unmap_fmr_exit0: - if (ret) - ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x", - ret, fmr_list, num_fmr, unmap_fmr_cnt); - return ret; -} /* end ehca_unmap_fmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_dealloc_fmr(struct ib_fmr *fmr) -{ - int ret; - u64 h_ret; - struct ehca_shca *shca = - container_of(fmr->device, struct ehca_shca, ib_device); - struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); - - if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { - ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x", - e_fmr, e_fmr->flags); - ret = -EINVAL; - goto free_fmr_exit0; - } - - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); - if (h_ret != H_SUCCESS) { - ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p " - "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x", - h_ret, e_fmr, shca->ipz_hca_handle.handle, - e_fmr->ipz_mr_handle.handle, fmr->lkey); - ret = ehca2ib_return_code(h_ret); - goto free_fmr_exit0; - } - /* successful deregistration */ - ehca_mr_delete(e_fmr); - return 0; - -free_fmr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr); - return ret; -} /* end ehca_dealloc_fmr() */ - -/*----------------------------------------------------------------------*/ - -static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, - struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo); - -int ehca_reg_mr(struct ehca_shca *shca, - struct ehca_mr *e_mr, - u64 *iova_start, - u64 size, - int acl, - struct ehca_pd *e_pd, - struct ehca_mr_pginfo *pginfo, - u32 *lkey, /*OUT*/ - u32 *rkey, /*OUT*/ - enum ehca_reg_type reg_type) -{ - int ret; - u64 h_ret; - u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout; - - ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl); - if (ehca_use_hp_mr == 1) - hipz_acl |= 0x00000001; - - h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr, - (u64)iova_start, size, hipz_acl, - e_pd->fw_pd, &hipzout); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli " - "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle); - ret = ehca2ib_return_code(h_ret); - goto ehca_reg_mr_exit0; - } - - e_mr->ipz_mr_handle = hipzout.handle; - - if (reg_type == EHCA_REG_BUSMAP_MR) - ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo); - else if (reg_type == EHCA_REG_MR) - ret = ehca_reg_mr_rpages(shca, e_mr, pginfo); - else - ret = -EINVAL; - - if (ret) - goto ehca_reg_mr_exit1; - - /* successful registration */ - e_mr->num_kpages = pginfo->num_kpages; - e_mr->num_hwpages = pginfo->num_hwpages; - e_mr->hwpage_size = pginfo->hwpage_size; - e_mr->start = iova_start; - e_mr->size = size; - e_mr->acl = acl; - *lkey = hipzout.lkey; - *rkey = hipzout.rkey; - return 0; - -ehca_reg_mr_exit1: - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p " - "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x " - "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i", - h_ret, shca, e_mr, iova_start, size, acl, e_pd, - hipzout.lkey, pginfo, pginfo->num_kpages, - pginfo->num_hwpages, ret); - ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, " - "not recoverable"); - } -ehca_reg_mr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p " - "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p " - "num_kpages=%llx num_hwpages=%llx", - ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, - pginfo->num_kpages, pginfo->num_hwpages); - return ret; -} /* end ehca_reg_mr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_reg_mr_rpages(struct ehca_shca *shca, - struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo) -{ - int ret = 0; - u64 h_ret; - u32 rnum; - u64 rpage; - u32 i; - u64 *kpage; - - if (!pginfo->num_hwpages) /* in case of fmr */ - return 0; - - kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!kpage) { - ehca_err(&shca->ib_device, "kpage alloc failed"); - ret = -ENOMEM; - goto ehca_reg_mr_rpages_exit0; - } - - /* max MAX_RPAGES ehca mr pages per register call */ - for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) { - - if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { - rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */ - if (rnum == 0) - rnum = MAX_RPAGES; /* last shot is full */ - } else - rnum = MAX_RPAGES; - - ret = ehca_set_pagebuf(pginfo, rnum, kpage); - if (ret) { - ehca_err(&shca->ib_device, "ehca_set_pagebuf " - "bad rc, ret=%i rnum=%x kpage=%p", - ret, rnum, kpage); - goto ehca_reg_mr_rpages_exit1; - } - - if (rnum > 1) { - rpage = __pa(kpage); - if (!rpage) { - ehca_err(&shca->ib_device, "kpage=%p i=%x", - kpage, i); - ret = -EFAULT; - goto ehca_reg_mr_rpages_exit1; - } - } else - rpage = *kpage; - - h_ret = hipz_h_register_rpage_mr( - shca->ipz_hca_handle, e_mr, - ehca_encode_hwpage_size(pginfo->hwpage_size), - 0, rpage, rnum); - - if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { - /* - * check for 'registration complete'==H_SUCCESS - * and for 'page registered'==H_PAGE_REGISTERED - */ - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "last " - "hipz_reg_rpage_mr failed, h_ret=%lli " - "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx" - " lkey=%x", h_ret, e_mr, i, - shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle, - e_mr->ib.ib_mr.lkey); - ret = ehca2ib_return_code(h_ret); - break; - } else - ret = 0; - } else if (h_ret != H_PAGE_REGISTERED) { - ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, " - "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx " - "mr_hndl=%llx", h_ret, e_mr, i, - e_mr->ib.ib_mr.lkey, - shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle); - ret = ehca2ib_return_code(h_ret); - break; - } else - ret = 0; - } /* end for(i) */ - - -ehca_reg_mr_rpages_exit1: - ehca_free_fw_ctrlblock(kpage); -ehca_reg_mr_rpages_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p " - "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr, - pginfo, pginfo->num_kpages, pginfo->num_hwpages); - return ret; -} /* end ehca_reg_mr_rpages() */ - -/*----------------------------------------------------------------------*/ - -inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, - struct ehca_mr *e_mr, - u64 *iova_start, - u64 size, - u32 acl, - struct ehca_pd *e_pd, - struct ehca_mr_pginfo *pginfo, - u32 *lkey, /*OUT*/ - u32 *rkey) /*OUT*/ -{ - int ret; - u64 h_ret; - u32 hipz_acl; - u64 *kpage; - u64 rpage; - struct ehca_mr_pginfo pginfo_save; - struct ehca_mr_hipzout_parms hipzout; - - ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl); - - kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!kpage) { - ehca_err(&shca->ib_device, "kpage alloc failed"); - ret = -ENOMEM; - goto ehca_rereg_mr_rereg1_exit0; - } - - pginfo_save = *pginfo; - ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage); - if (ret) { - ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p " - "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx " - "kpage=%p", e_mr, pginfo, pginfo->type, - pginfo->num_kpages, pginfo->num_hwpages, kpage); - goto ehca_rereg_mr_rereg1_exit1; - } - rpage = __pa(kpage); - if (!rpage) { - ehca_err(&shca->ib_device, "kpage=%p", kpage); - ret = -EFAULT; - goto ehca_rereg_mr_rereg1_exit1; - } - h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr, - (u64)iova_start, size, hipz_acl, - e_pd->fw_pd, rpage, &hipzout); - if (h_ret != H_SUCCESS) { - /* - * reregistration unsuccessful, try it again with the 3 hCalls, - * e.g. this is required in case H_MR_CONDITION - * (MW bound or MR is shared) - */ - ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed " - "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr); - *pginfo = pginfo_save; - ret = -EAGAIN; - } else if ((u64 *)hipzout.vaddr != iova_start) { - ehca_err(&shca->ib_device, "PHYP changed iova_start in " - "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p " - "mr_handle=%llx lkey=%x lkey_out=%x", iova_start, - hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle, - e_mr->ib.ib_mr.lkey, hipzout.lkey); - ret = -EFAULT; - } else { - /* - * successful reregistration - * note: start and start_out are identical for eServer HCAs - */ - e_mr->num_kpages = pginfo->num_kpages; - e_mr->num_hwpages = pginfo->num_hwpages; - e_mr->hwpage_size = pginfo->hwpage_size; - e_mr->start = iova_start; - e_mr->size = size; - e_mr->acl = acl; - *lkey = hipzout.lkey; - *rkey = hipzout.rkey; - } - -ehca_rereg_mr_rereg1_exit1: - ehca_free_fw_ctrlblock(kpage); -ehca_rereg_mr_rereg1_exit0: - if ( ret && (ret != -EAGAIN) ) - ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x " - "pginfo=%p num_kpages=%llx num_hwpages=%llx", - ret, *lkey, *rkey, pginfo, pginfo->num_kpages, - pginfo->num_hwpages); - return ret; -} /* end ehca_rereg_mr_rereg1() */ - -/*----------------------------------------------------------------------*/ - -int ehca_rereg_mr(struct ehca_shca *shca, - struct ehca_mr *e_mr, - u64 *iova_start, - u64 size, - int acl, - struct ehca_pd *e_pd, - struct ehca_mr_pginfo *pginfo, - u32 *lkey, - u32 *rkey) -{ - int ret = 0; - u64 h_ret; - int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */ - int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */ - - /* first determine reregistration hCall(s) */ - if ((pginfo->num_hwpages > MAX_RPAGES) || - (e_mr->num_hwpages > MAX_RPAGES) || - (pginfo->num_hwpages > e_mr->num_hwpages)) { - ehca_dbg(&shca->ib_device, "Rereg3 case, " - "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x", - pginfo->num_hwpages, e_mr->num_hwpages); - rereg_1_hcall = 0; - rereg_3_hcall = 1; - } - - if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */ - rereg_1_hcall = 0; - rereg_3_hcall = 1; - e_mr->flags &= ~EHCA_MR_FLAG_MAXMR; - ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p", - e_mr); - } - - if (rereg_1_hcall) { - ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size, - acl, e_pd, pginfo, lkey, rkey); - if (ret) { - if (ret == -EAGAIN) - rereg_3_hcall = 1; - else - goto ehca_rereg_mr_exit0; - } - } - - if (rereg_3_hcall) { - struct ehca_mr save_mr; - - /* first deregister old MR */ - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_free_mr failed, " - "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx " - "mr->lkey=%x", - h_ret, e_mr, shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle, - e_mr->ib.ib_mr.lkey); - ret = ehca2ib_return_code(h_ret); - goto ehca_rereg_mr_exit0; - } - /* clean ehca_mr_t, without changing struct ib_mr and lock */ - save_mr = *e_mr; - ehca_mr_deletenew(e_mr); - - /* set some MR values */ - e_mr->flags = save_mr.flags; - e_mr->hwpage_size = save_mr.hwpage_size; - e_mr->fmr_page_size = save_mr.fmr_page_size; - e_mr->fmr_max_pages = save_mr.fmr_max_pages; - e_mr->fmr_max_maps = save_mr.fmr_max_maps; - e_mr->fmr_map_cnt = save_mr.fmr_map_cnt; - - ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl, - e_pd, pginfo, lkey, rkey, EHCA_REG_MR); - if (ret) { - u32 offset = (u64)(&e_mr->flags) - (u64)e_mr; - memcpy(&e_mr->flags, &(save_mr.flags), - sizeof(struct ehca_mr) - offset); - goto ehca_rereg_mr_exit0; - } - } - -ehca_rereg_mr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p " - "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p " - "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x " - "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size, - acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey, - rereg_1_hcall, rereg_3_hcall); - return ret; -} /* end ehca_rereg_mr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_unmap_one_fmr(struct ehca_shca *shca, - struct ehca_mr *e_fmr) -{ - int ret = 0; - u64 h_ret; - struct ehca_pd *e_pd = - container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd); - struct ehca_mr save_fmr; - u32 tmp_lkey, tmp_rkey; - struct ehca_mr_pginfo pginfo; - struct ehca_mr_hipzout_parms hipzout; - struct ehca_mr save_mr; - - if (e_fmr->fmr_max_pages <= MAX_RPAGES) { - /* - * note: after using rereg hcall with len=0, - * rereg hcall must be used again for registering pages - */ - h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0, - 0, 0, e_pd->fw_pd, 0, &hipzout); - if (h_ret == H_SUCCESS) { - /* successful reregistration */ - e_fmr->start = NULL; - e_fmr->size = 0; - tmp_lkey = hipzout.lkey; - tmp_rkey = hipzout.rkey; - return 0; - } - /* - * should not happen, because length checked above, - * FMRs are not shared and no MW bound to FMRs - */ - ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " - "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx " - "mr_hndl=%llx lkey=%x lkey_out=%x", - h_ret, e_fmr, shca->ipz_hca_handle.handle, - e_fmr->ipz_mr_handle.handle, - e_fmr->ib.ib_fmr.lkey, hipzout.lkey); - /* try free and rereg */ - } - - /* first free old FMR */ - h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_free_mr failed, " - "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx " - "lkey=%x", - h_ret, e_fmr, shca->ipz_hca_handle.handle, - e_fmr->ipz_mr_handle.handle, - e_fmr->ib.ib_fmr.lkey); - ret = ehca2ib_return_code(h_ret); - goto ehca_unmap_one_fmr_exit0; - } - /* clean ehca_mr_t, without changing lock */ - save_fmr = *e_fmr; - ehca_mr_deletenew(e_fmr); - - /* set some MR values */ - e_fmr->flags = save_fmr.flags; - e_fmr->hwpage_size = save_fmr.hwpage_size; - e_fmr->fmr_page_size = save_fmr.fmr_page_size; - e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; - e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; - e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt; - e_fmr->acl = save_fmr.acl; - - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_FMR; - ret = ehca_reg_mr(shca, e_fmr, NULL, - (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), - e_fmr->acl, e_pd, &pginfo, &tmp_lkey, - &tmp_rkey, EHCA_REG_MR); - if (ret) { - u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; - memcpy(&e_fmr->flags, &(save_mr.flags), - sizeof(struct ehca_mr) - offset); - } - -ehca_unmap_one_fmr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x " - "fmr_max_pages=%x", - ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages); - return ret; -} /* end ehca_unmap_one_fmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_reg_smr(struct ehca_shca *shca, - struct ehca_mr *e_origmr, - struct ehca_mr *e_newmr, - u64 *iova_start, - int acl, - struct ehca_pd *e_pd, - u32 *lkey, /*OUT*/ - u32 *rkey) /*OUT*/ -{ - int ret = 0; - u64 h_ret; - u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout; - - ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl); - - h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, - (u64)iova_start, hipz_acl, e_pd->fw_pd, - &hipzout); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli " - "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x " - "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x", - h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd, - shca->ipz_hca_handle.handle, - e_origmr->ipz_mr_handle.handle, - e_origmr->ib.ib_mr.lkey); - ret = ehca2ib_return_code(h_ret); - goto ehca_reg_smr_exit0; - } - /* successful registration */ - e_newmr->num_kpages = e_origmr->num_kpages; - e_newmr->num_hwpages = e_origmr->num_hwpages; - e_newmr->hwpage_size = e_origmr->hwpage_size; - e_newmr->start = iova_start; - e_newmr->size = e_origmr->size; - e_newmr->acl = acl; - e_newmr->ipz_mr_handle = hipzout.handle; - *lkey = hipzout.lkey; - *rkey = hipzout.rkey; - return 0; - -ehca_reg_smr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p " - "e_newmr=%p iova_start=%p acl=%x e_pd=%p", - ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd); - return ret; -} /* end ehca_reg_smr() */ - -/*----------------------------------------------------------------------*/ -static inline void *ehca_calc_sectbase(int top, int dir, int idx) -{ - unsigned long ret = idx; - ret |= dir << EHCA_DIR_INDEX_SHIFT; - ret |= top << EHCA_TOP_INDEX_SHIFT; - return __va(ret << SECTION_SIZE_BITS); -} - -#define ehca_bmap_valid(entry) \ - ((u64)entry != (u64)EHCA_INVAL_ADDR) - -static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage, - struct ehca_shca *shca, struct ehca_mr *mr, - struct ehca_mr_pginfo *pginfo) -{ - u64 h_ret = 0; - unsigned long page = 0; - u64 rpage = __pa(kpage); - int page_count; - - void *sectbase = ehca_calc_sectbase(top, dir, idx); - if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) { - ehca_err(&shca->ib_device, "reg_mr_section will probably fail:" - "hwpage_size does not fit to " - "section start address"); - } - page_count = EHCA_SECTSIZE / pginfo->hwpage_size; - - while (page < page_count) { - u64 rnum; - for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count); - rnum++) { - void *pg = sectbase + ((page++) * pginfo->hwpage_size); - kpage[rnum] = __pa(pg); - } - - h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr, - ehca_encode_hwpage_size(pginfo->hwpage_size), - 0, rpage, rnum); - - if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) { - ehca_err(&shca->ib_device, "register_rpage_mr failed"); - return h_ret; - } - } - return h_ret; -} - -static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage, - struct ehca_shca *shca, struct ehca_mr *mr, - struct ehca_mr_pginfo *pginfo) -{ - u64 hret = H_SUCCESS; - int idx; - - for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) { - if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx])) - continue; - - hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr, - pginfo); - if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) - return hret; - } - return hret; -} - -static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca, - struct ehca_mr *mr, - struct ehca_mr_pginfo *pginfo) -{ - u64 hret = H_SUCCESS; - int dir; - - for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { - if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) - continue; - - hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo); - if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) - return hret; - } - return hret; -} - -/* register internal max-MR to internal SHCA */ -int ehca_reg_internal_maxmr( - struct ehca_shca *shca, - struct ehca_pd *e_pd, - struct ehca_mr **e_maxmr) /*OUT*/ -{ - int ret; - struct ehca_mr *e_mr; - u64 *iova_start; - u64 size_maxmr; - struct ehca_mr_pginfo pginfo; - struct ib_phys_buf ib_pbuf; - u32 num_kpages; - u32 num_hwpages; - u64 hw_pgsize; - - if (!ehca_bmap) { - ret = -EFAULT; - goto ehca_reg_internal_maxmr_exit0; - } - - e_mr = ehca_mr_new(); - if (!e_mr) { - ehca_err(&shca->ib_device, "out of memory"); - ret = -ENOMEM; - goto ehca_reg_internal_maxmr_exit0; - } - e_mr->flags |= EHCA_MR_FLAG_MAXMR; - - /* register internal max-MR on HCA */ - size_maxmr = ehca_mr_len; - iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)); - ib_pbuf.addr = 0; - ib_pbuf.size = size_maxmr; - num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr, - PAGE_SIZE); - hw_pgsize = ehca_get_max_hwpage_size(shca); - num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr, - hw_pgsize); - - memset(&pginfo, 0, sizeof(pginfo)); - pginfo.type = EHCA_MR_PGI_PHYS; - pginfo.num_kpages = num_kpages; - pginfo.num_hwpages = num_hwpages; - pginfo.hwpage_size = hw_pgsize; - pginfo.u.phy.num_phys_buf = 1; - pginfo.u.phy.phys_buf_array = &ib_pbuf; - - ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd, - &pginfo, &e_mr->ib.ib_mr.lkey, - &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR); - if (ret) { - ehca_err(&shca->ib_device, "reg of internal max MR failed, " - "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x " - "num_hwpages=%x", e_mr, iova_start, size_maxmr, - num_kpages, num_hwpages); - goto ehca_reg_internal_maxmr_exit1; - } - - /* successful registration of all pages */ - e_mr->ib.ib_mr.device = e_pd->ib_pd.device; - e_mr->ib.ib_mr.pd = &e_pd->ib_pd; - e_mr->ib.ib_mr.uobject = NULL; - atomic_inc(&(e_pd->ib_pd.usecnt)); - atomic_set(&(e_mr->ib.ib_mr.usecnt), 0); - *e_maxmr = e_mr; - return 0; - -ehca_reg_internal_maxmr_exit1: - ehca_mr_delete(e_mr); -ehca_reg_internal_maxmr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p", - ret, shca, e_pd, e_maxmr); - return ret; -} /* end ehca_reg_internal_maxmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_reg_maxmr(struct ehca_shca *shca, - struct ehca_mr *e_newmr, - u64 *iova_start, - int acl, - struct ehca_pd *e_pd, - u32 *lkey, - u32 *rkey) -{ - u64 h_ret; - struct ehca_mr *e_origmr = shca->maxmr; - u32 hipz_acl; - struct ehca_mr_hipzout_parms hipzout; - - ehca_mrmw_map_acl(acl, &hipz_acl); - ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl); - - h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, - (u64)iova_start, hipz_acl, e_pd->fw_pd, - &hipzout); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli " - "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x", - h_ret, e_origmr, shca->ipz_hca_handle.handle, - e_origmr->ipz_mr_handle.handle, - e_origmr->ib.ib_mr.lkey); - return ehca2ib_return_code(h_ret); - } - /* successful registration */ - e_newmr->num_kpages = e_origmr->num_kpages; - e_newmr->num_hwpages = e_origmr->num_hwpages; - e_newmr->hwpage_size = e_origmr->hwpage_size; - e_newmr->start = iova_start; - e_newmr->size = e_origmr->size; - e_newmr->acl = acl; - e_newmr->ipz_mr_handle = hipzout.handle; - *lkey = hipzout.lkey; - *rkey = hipzout.rkey; - return 0; -} /* end ehca_reg_maxmr() */ - -/*----------------------------------------------------------------------*/ - -int ehca_dereg_internal_maxmr(struct ehca_shca *shca) -{ - int ret; - struct ehca_mr *e_maxmr; - struct ib_pd *ib_pd; - - if (!shca->maxmr) { - ehca_err(&shca->ib_device, "bad call, shca=%p", shca); - ret = -EINVAL; - goto ehca_dereg_internal_maxmr_exit0; - } - - e_maxmr = shca->maxmr; - ib_pd = e_maxmr->ib.ib_mr.pd; - shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */ - - ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr); - if (ret) { - ehca_err(&shca->ib_device, "dereg internal max-MR failed, " - "ret=%i e_maxmr=%p shca=%p lkey=%x", - ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey); - shca->maxmr = e_maxmr; - goto ehca_dereg_internal_maxmr_exit0; - } - - atomic_dec(&ib_pd->usecnt); - -ehca_dereg_internal_maxmr_exit0: - if (ret) - ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p", - ret, shca, shca->maxmr); - return ret; -} /* end ehca_dereg_internal_maxmr() */ - -/*----------------------------------------------------------------------*/ - -/* - * check physical buffer array of MR verbs for validness and - * calculates MR size - */ -int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - u64 *iova_start, - u64 *size) -{ - struct ib_phys_buf *pbuf = phys_buf_array; - u64 size_count = 0; - u32 i; - - if (num_phys_buf == 0) { - ehca_gen_err("bad phys buf array len, num_phys_buf=0"); - return -EINVAL; - } - /* check first buffer */ - if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) { - ehca_gen_err("iova_start/addr mismatch, iova_start=%p " - "pbuf->addr=%llx pbuf->size=%llx", - iova_start, pbuf->addr, pbuf->size); - return -EINVAL; - } - if (((pbuf->addr + pbuf->size) % PAGE_SIZE) && - (num_phys_buf > 1)) { - ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx " - "pbuf->size=%llx", pbuf->addr, pbuf->size); - return -EINVAL; - } - - for (i = 0; i < num_phys_buf; i++) { - if ((i > 0) && (pbuf->addr % PAGE_SIZE)) { - ehca_gen_err("bad address, i=%x pbuf->addr=%llx " - "pbuf->size=%llx", - i, pbuf->addr, pbuf->size); - return -EINVAL; - } - if (((i > 0) && /* not 1st */ - (i < (num_phys_buf - 1)) && /* not last */ - (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) { - ehca_gen_err("bad size, i=%x pbuf->size=%llx", - i, pbuf->size); - return -EINVAL; - } - size_count += pbuf->size; - pbuf++; - } - - *size = size_count; - return 0; -} /* end ehca_mr_chk_buf_and_calc_size() */ - -/*----------------------------------------------------------------------*/ - -/* check page list of map FMR verb for validness */ -int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, - u64 *page_list, - int list_len) -{ - u32 i; - u64 *page; - - if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) { - ehca_gen_err("bad list_len, list_len=%x " - "e_fmr->fmr_max_pages=%x fmr=%p", - list_len, e_fmr->fmr_max_pages, e_fmr); - return -EINVAL; - } - - /* each page must be aligned */ - page = page_list; - for (i = 0; i < list_len; i++) { - if (*page % e_fmr->fmr_page_size) { - ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p " - "fmr_page_size=%x", i, *page, page, e_fmr, - e_fmr->fmr_page_size); - return -EINVAL; - } - page++; - } - - return 0; -} /* end ehca_fmr_check_page_list() */ - -/*----------------------------------------------------------------------*/ - -/* PAGE_SIZE >= pginfo->hwpage_size */ -static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage) -{ - int ret = 0; - u64 pgaddr; - u32 j = 0; - int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size; - struct scatterlist **sg = &pginfo->u.usr.next_sg; - - while (*sg != NULL) { - pgaddr = page_to_pfn(sg_page(*sg)) - << PAGE_SHIFT; - *kpage = pgaddr + (pginfo->next_hwpage * - pginfo->hwpage_size); - if (!(*kpage)) { - ehca_gen_err("pgaddr=%llx " - "sg_dma_address=%llx " - "entry=%llx next_hwpage=%llx", - pgaddr, (u64)sg_dma_address(*sg), - pginfo->u.usr.next_nmap, - pginfo->next_hwpage); - return -EFAULT; - } - (pginfo->hwpage_cnt)++; - (pginfo->next_hwpage)++; - kpage++; - if (pginfo->next_hwpage % hwpages_per_kpage == 0) { - (pginfo->kpage_cnt)++; - (pginfo->u.usr.next_nmap)++; - pginfo->next_hwpage = 0; - *sg = sg_next(*sg); - } - j++; - if (j >= number) - break; - } - - return ret; -} - -/* - * check given pages for contiguous layout - * last page addr is returned in prev_pgaddr for further check - */ -static int ehca_check_kpages_per_ate(struct scatterlist **sg, - int num_pages, - u64 *prev_pgaddr) -{ - for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) { - u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT; - if (ehca_debug_level >= 3) - ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr, - *(u64 *)__va(pgaddr)); - if (pgaddr - PAGE_SIZE != *prev_pgaddr) { - ehca_gen_err("uncontiguous page found pgaddr=%llx " - "prev_pgaddr=%llx entries_left_in_hwpage=%x", - pgaddr, *prev_pgaddr, num_pages); - return -EINVAL; - } - *prev_pgaddr = pgaddr; - } - return 0; -} - -/* PAGE_SIZE < pginfo->hwpage_size */ -static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage) -{ - int ret = 0; - u64 pgaddr, prev_pgaddr; - u32 j = 0; - int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE; - int nr_kpages = kpages_per_hwpage; - struct scatterlist **sg = &pginfo->u.usr.next_sg; - - while (*sg != NULL) { - - if (nr_kpages == kpages_per_hwpage) { - pgaddr = (page_to_pfn(sg_page(*sg)) - << PAGE_SHIFT); - *kpage = pgaddr; - if (!(*kpage)) { - ehca_gen_err("pgaddr=%llx entry=%llx", - pgaddr, pginfo->u.usr.next_nmap); - ret = -EFAULT; - return ret; - } - /* - * The first page in a hwpage must be aligned; - * the first MR page is exempt from this rule. - */ - if (pgaddr & (pginfo->hwpage_size - 1)) { - if (pginfo->hwpage_cnt) { - ehca_gen_err( - "invalid alignment " - "pgaddr=%llx entry=%llx " - "mr_pgsize=%llx", - pgaddr, pginfo->u.usr.next_nmap, - pginfo->hwpage_size); - ret = -EFAULT; - return ret; - } - /* first MR page */ - pginfo->kpage_cnt = - (pgaddr & - (pginfo->hwpage_size - 1)) >> - PAGE_SHIFT; - nr_kpages -= pginfo->kpage_cnt; - *kpage = pgaddr & - ~(pginfo->hwpage_size - 1); - } - if (ehca_debug_level >= 3) { - u64 val = *(u64 *)__va(pgaddr); - ehca_gen_dbg("kpage=%llx page=%llx " - "value=%016llx", - *kpage, pgaddr, val); - } - prev_pgaddr = pgaddr; - *sg = sg_next(*sg); - pginfo->kpage_cnt++; - pginfo->u.usr.next_nmap++; - nr_kpages--; - if (!nr_kpages) - goto next_kpage; - continue; - } - - ret = ehca_check_kpages_per_ate(sg, nr_kpages, - &prev_pgaddr); - if (ret) - return ret; - pginfo->kpage_cnt += nr_kpages; - pginfo->u.usr.next_nmap += nr_kpages; - -next_kpage: - nr_kpages = kpages_per_hwpage; - (pginfo->hwpage_cnt)++; - kpage++; - j++; - if (j >= number) - break; - } - - return ret; -} - -static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, - u32 number, u64 *kpage) -{ - int ret = 0; - struct ib_phys_buf *pbuf; - u64 num_hw, offs_hw; - u32 i = 0; - - /* loop over desired phys_buf_array entries */ - while (i < number) { - pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf; - num_hw = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) + - pbuf->size, pginfo->hwpage_size); - offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) / - pginfo->hwpage_size; - while (pginfo->next_hwpage < offs_hw + num_hw) { - /* sanity check */ - if ((pginfo->kpage_cnt >= pginfo->num_kpages) || - (pginfo->hwpage_cnt >= pginfo->num_hwpages)) { - ehca_gen_err("kpage_cnt >= num_kpages, " - "kpage_cnt=%llx num_kpages=%llx " - "hwpage_cnt=%llx " - "num_hwpages=%llx i=%x", - pginfo->kpage_cnt, - pginfo->num_kpages, - pginfo->hwpage_cnt, - pginfo->num_hwpages, i); - return -EFAULT; - } - *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) + - (pginfo->next_hwpage * pginfo->hwpage_size); - if ( !(*kpage) && pbuf->addr ) { - ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx " - "next_hwpage=%llx", pbuf->addr, - pbuf->size, pginfo->next_hwpage); - return -EFAULT; - } - (pginfo->hwpage_cnt)++; - (pginfo->next_hwpage)++; - if (PAGE_SIZE >= pginfo->hwpage_size) { - if (pginfo->next_hwpage % - (PAGE_SIZE / pginfo->hwpage_size) == 0) - (pginfo->kpage_cnt)++; - } else - pginfo->kpage_cnt += pginfo->hwpage_size / - PAGE_SIZE; - kpage++; - i++; - if (i >= number) break; - } - if (pginfo->next_hwpage >= offs_hw + num_hw) { - (pginfo->u.phy.next_buf)++; - pginfo->next_hwpage = 0; - } - } - return ret; -} - -static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, - u32 number, u64 *kpage) -{ - int ret = 0; - u64 *fmrlist; - u32 i; - - /* loop over desired page_list entries */ - fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem; - for (i = 0; i < number; i++) { - *kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) + - pginfo->next_hwpage * pginfo->hwpage_size; - if ( !(*kpage) ) { - ehca_gen_err("*fmrlist=%llx fmrlist=%p " - "next_listelem=%llx next_hwpage=%llx", - *fmrlist, fmrlist, - pginfo->u.fmr.next_listelem, - pginfo->next_hwpage); - return -EFAULT; - } - (pginfo->hwpage_cnt)++; - if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) { - if (pginfo->next_hwpage % - (pginfo->u.fmr.fmr_pgsize / - pginfo->hwpage_size) == 0) { - (pginfo->kpage_cnt)++; - (pginfo->u.fmr.next_listelem)++; - fmrlist++; - pginfo->next_hwpage = 0; - } else - (pginfo->next_hwpage)++; - } else { - unsigned int cnt_per_hwpage = pginfo->hwpage_size / - pginfo->u.fmr.fmr_pgsize; - unsigned int j; - u64 prev = *kpage; - /* check if adrs are contiguous */ - for (j = 1; j < cnt_per_hwpage; j++) { - u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1); - if (prev + pginfo->u.fmr.fmr_pgsize != p) { - ehca_gen_err("uncontiguous fmr pages " - "found prev=%llx p=%llx " - "idx=%x", prev, p, i + j); - return -EINVAL; - } - prev = p; - } - pginfo->kpage_cnt += cnt_per_hwpage; - pginfo->u.fmr.next_listelem += cnt_per_hwpage; - fmrlist += cnt_per_hwpage; - } - kpage++; - } - return ret; -} - -/* setup page buffer from page info */ -int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage) -{ - int ret; - - switch (pginfo->type) { - case EHCA_MR_PGI_PHYS: - ret = ehca_set_pagebuf_phys(pginfo, number, kpage); - break; - case EHCA_MR_PGI_USER: - ret = PAGE_SIZE >= pginfo->hwpage_size ? - ehca_set_pagebuf_user1(pginfo, number, kpage) : - ehca_set_pagebuf_user2(pginfo, number, kpage); - break; - case EHCA_MR_PGI_FMR: - ret = ehca_set_pagebuf_fmr(pginfo, number, kpage); - break; - default: - ehca_gen_err("bad pginfo->type=%x", pginfo->type); - ret = -EFAULT; - break; - } - return ret; -} /* end ehca_set_pagebuf() */ - -/*----------------------------------------------------------------------*/ - -/* - * check MR if it is a max-MR, i.e. uses whole memory - * in case it's a max-MR 1 is returned, else 0 - */ -int ehca_mr_is_maxmr(u64 size, - u64 *iova_start) -{ - /* a MR is treated as max-MR only if it fits following: */ - if ((size == ehca_mr_len) && - (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) { - ehca_gen_dbg("this is a max-MR"); - return 1; - } else - return 0; -} /* end ehca_mr_is_maxmr() */ - -/*----------------------------------------------------------------------*/ - -/* map access control for MR/MW. This routine is used for MR and MW. */ -void ehca_mrmw_map_acl(int ib_acl, - u32 *hipz_acl) -{ - *hipz_acl = 0; - if (ib_acl & IB_ACCESS_REMOTE_READ) - *hipz_acl |= HIPZ_ACCESSCTRL_R_READ; - if (ib_acl & IB_ACCESS_REMOTE_WRITE) - *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE; - if (ib_acl & IB_ACCESS_REMOTE_ATOMIC) - *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC; - if (ib_acl & IB_ACCESS_LOCAL_WRITE) - *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE; - if (ib_acl & IB_ACCESS_MW_BIND) - *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND; -} /* end ehca_mrmw_map_acl() */ - -/*----------------------------------------------------------------------*/ - -/* sets page size in hipz access control for MR/MW. */ -void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/ -{ - *hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24); -} /* end ehca_mrmw_set_pgsize_hipz_acl() */ - -/*----------------------------------------------------------------------*/ - -/* - * reverse map access control for MR/MW. - * This routine is used for MR and MW. - */ -void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, - int *ib_acl) /*OUT*/ -{ - *ib_acl = 0; - if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ) - *ib_acl |= IB_ACCESS_REMOTE_READ; - if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE) - *ib_acl |= IB_ACCESS_REMOTE_WRITE; - if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC) - *ib_acl |= IB_ACCESS_REMOTE_ATOMIC; - if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE) - *ib_acl |= IB_ACCESS_LOCAL_WRITE; - if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND) - *ib_acl |= IB_ACCESS_MW_BIND; -} /* end ehca_mrmw_reverse_map_acl() */ - - -/*----------------------------------------------------------------------*/ - -/* - * MR destructor and constructor - * used in Reregister MR verb, sets all fields in ehca_mr_t to 0, - * except struct ib_mr and spinlock - */ -void ehca_mr_deletenew(struct ehca_mr *mr) -{ - mr->flags = 0; - mr->num_kpages = 0; - mr->num_hwpages = 0; - mr->acl = 0; - mr->start = NULL; - mr->fmr_page_size = 0; - mr->fmr_max_pages = 0; - mr->fmr_max_maps = 0; - mr->fmr_map_cnt = 0; - memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle)); - memset(&mr->galpas, 0, sizeof(mr->galpas)); -} /* end ehca_mr_deletenew() */ - -int ehca_init_mrmw_cache(void) -{ - mr_cache = kmem_cache_create("ehca_cache_mr", - sizeof(struct ehca_mr), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!mr_cache) - return -ENOMEM; - mw_cache = kmem_cache_create("ehca_cache_mw", - sizeof(struct ehca_mw), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!mw_cache) { - kmem_cache_destroy(mr_cache); - mr_cache = NULL; - return -ENOMEM; - } - return 0; -} - -void ehca_cleanup_mrmw_cache(void) -{ - if (mr_cache) - kmem_cache_destroy(mr_cache); - if (mw_cache) - kmem_cache_destroy(mw_cache); -} - -static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap, - int dir) -{ - if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) { - ehca_top_bmap->dir[dir] = - kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL); - if (!ehca_top_bmap->dir[dir]) - return -ENOMEM; - /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ - memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE); - } - return 0; -} - -static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir) -{ - if (!ehca_bmap_valid(ehca_bmap->top[top])) { - ehca_bmap->top[top] = - kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL); - if (!ehca_bmap->top[top]) - return -ENOMEM; - /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ - memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE); - } - return ehca_init_top_bmap(ehca_bmap->top[top], dir); -} - -static inline int ehca_calc_index(unsigned long i, unsigned long s) -{ - return (i >> s) & EHCA_INDEX_MASK; -} - -void ehca_destroy_busmap(void) -{ - int top, dir; - - if (!ehca_bmap) - return; - - for (top = 0; top < EHCA_MAP_ENTRIES; top++) { - if (!ehca_bmap_valid(ehca_bmap->top[top])) - continue; - for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { - if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) - continue; - - kfree(ehca_bmap->top[top]->dir[dir]); - } - - kfree(ehca_bmap->top[top]); - } - - kfree(ehca_bmap); - ehca_bmap = NULL; -} - -static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages) -{ - unsigned long i, start_section, end_section; - int top, dir, idx; - - if (!nr_pages) - return 0; - - if (!ehca_bmap) { - ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL); - if (!ehca_bmap) - return -ENOMEM; - /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ - memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE); - } - - start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE; - end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE; - for (i = start_section; i < end_section; i++) { - int ret; - top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT); - dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT); - idx = i & EHCA_INDEX_MASK; - - ret = ehca_init_bmap(ehca_bmap, top, dir); - if (ret) { - ehca_destroy_busmap(); - return ret; - } - ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len; - ehca_mr_len += EHCA_SECTSIZE; - } - return 0; -} - -static int ehca_is_hugepage(unsigned long pfn) -{ - int page_order; - - if (pfn & EHCA_HUGEPAGE_PFN_MASK) - return 0; - - page_order = compound_order(pfn_to_page(pfn)); - if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT) - return 0; - - return 1; -} - -static int ehca_create_busmap_callback(unsigned long initial_pfn, - unsigned long total_nr_pages, void *arg) -{ - int ret; - unsigned long pfn, start_pfn, end_pfn, nr_pages; - - if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE) - return ehca_update_busmap(initial_pfn, total_nr_pages); - - /* Given chunk is >= 16GB -> check for hugepages */ - start_pfn = initial_pfn; - end_pfn = initial_pfn + total_nr_pages; - pfn = start_pfn; - - while (pfn < end_pfn) { - if (ehca_is_hugepage(pfn)) { - /* Add mem found in front of the hugepage */ - nr_pages = pfn - start_pfn; - ret = ehca_update_busmap(start_pfn, nr_pages); - if (ret) - return ret; - /* Skip the hugepage */ - pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE); - start_pfn = pfn; - } else - pfn += (EHCA_SECTSIZE / PAGE_SIZE); - } - - /* Add mem found behind the hugepage(s) */ - nr_pages = pfn - start_pfn; - return ehca_update_busmap(start_pfn, nr_pages); -} - -int ehca_create_busmap(void) -{ - int ret; - - ehca_mr_len = 0; - ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL, - ehca_create_busmap_callback); - return ret; -} - -static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, - struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo) -{ - int top; - u64 hret, *kpage; - - kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!kpage) { - ehca_err(&shca->ib_device, "kpage alloc failed"); - return -ENOMEM; - } - for (top = 0; top < EHCA_MAP_ENTRIES; top++) { - if (!ehca_bmap_valid(ehca_bmap->top[top])) - continue; - hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo); - if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS)) - break; - } - - ehca_free_fw_ctrlblock(kpage); - - if (hret == H_SUCCESS) - return 0; /* Everything is fine */ - else { - ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, " - "h_ret=%lli e_mr=%p top=%x lkey=%x " - "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top, - e_mr->ib.ib_mr.lkey, - shca->ipz_hca_handle.handle, - e_mr->ipz_mr_handle.handle); - return ehca2ib_return_code(hret); - } -} - -static u64 ehca_map_vaddr(void *caddr) -{ - int top, dir, idx; - unsigned long abs_addr, offset; - u64 entry; - - if (!ehca_bmap) - return EHCA_INVAL_ADDR; - - abs_addr = __pa(caddr); - top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT); - if (!ehca_bmap_valid(ehca_bmap->top[top])) - return EHCA_INVAL_ADDR; - - dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT); - if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) - return EHCA_INVAL_ADDR; - - idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT); - - entry = ehca_bmap->top[top]->dir[dir]->ent[idx]; - if (ehca_bmap_valid(entry)) { - offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1); - return entry | offset; - } else - return EHCA_INVAL_ADDR; -} - -static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr) -{ - return dma_addr == EHCA_INVAL_ADDR; -} - -static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr, - size_t size, enum dma_data_direction direction) -{ - if (cpu_addr) - return ehca_map_vaddr(cpu_addr); - else - return EHCA_INVAL_ADDR; -} - -static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, - enum dma_data_direction direction) -{ - /* This is only a stub; nothing to be done here */ -} - -static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction) -{ - u64 addr; - - if (offset + size > PAGE_SIZE) - return EHCA_INVAL_ADDR; - - addr = ehca_map_vaddr(page_address(page)); - if (!ehca_dma_mapping_error(dev, addr)) - addr += offset; - - return addr; -} - -static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, - enum dma_data_direction direction) -{ - /* This is only a stub; nothing to be done here */ -} - -static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction direction) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) { - u64 addr; - addr = ehca_map_vaddr(sg_virt(sg)); - if (ehca_dma_mapping_error(dev, addr)) - return 0; - - sg->dma_address = addr; - sg->dma_length = sg->length; - } - return nents; -} - -static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction direction) -{ - /* This is only a stub; nothing to be done here */ -} - -static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr, - size_t size, - enum dma_data_direction dir) -{ - dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); -} - -static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr, - size_t size, - enum dma_data_direction dir) -{ - dma_sync_single_for_device(dev->dma_device, addr, size, dir); -} - -static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size, - u64 *dma_handle, gfp_t flag) -{ - struct page *p; - void *addr = NULL; - u64 dma_addr; - - p = alloc_pages(flag, get_order(size)); - if (p) { - addr = page_address(p); - dma_addr = ehca_map_vaddr(addr); - if (ehca_dma_mapping_error(dev, dma_addr)) { - free_pages((unsigned long)addr, get_order(size)); - return NULL; - } - if (dma_handle) - *dma_handle = dma_addr; - return addr; - } - return NULL; -} - -static void ehca_dma_free_coherent(struct ib_device *dev, size_t size, - void *cpu_addr, u64 dma_handle) -{ - if (cpu_addr && size) - free_pages((unsigned long)cpu_addr, get_order(size)); -} - - -struct ib_dma_mapping_ops ehca_dma_mapping_ops = { - .mapping_error = ehca_dma_mapping_error, - .map_single = ehca_dma_map_single, - .unmap_single = ehca_dma_unmap_single, - .map_page = ehca_dma_map_page, - .unmap_page = ehca_dma_unmap_page, - .map_sg = ehca_dma_map_sg, - .unmap_sg = ehca_dma_unmap_sg, - .sync_single_for_cpu = ehca_dma_sync_single_for_cpu, - .sync_single_for_device = ehca_dma_sync_single_for_device, - .alloc_coherent = ehca_dma_alloc_coherent, - .free_coherent = ehca_dma_free_coherent, -}; diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.h b/drivers/infiniband/hw/ehca/ehca_mrmw.h deleted file mode 100644 index 50d8b51306dd..000000000000 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * MR/MW declarations and inline functions - * - * Authors: Dietmar Decker - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _EHCA_MRMW_H_ -#define _EHCA_MRMW_H_ - -enum ehca_reg_type { - EHCA_REG_MR, - EHCA_REG_BUSMAP_MR -}; - -int ehca_reg_mr(struct ehca_shca *shca, - struct ehca_mr *e_mr, - u64 *iova_start, - u64 size, - int acl, - struct ehca_pd *e_pd, - struct ehca_mr_pginfo *pginfo, - u32 *lkey, - u32 *rkey, - enum ehca_reg_type reg_type); - -int ehca_reg_mr_rpages(struct ehca_shca *shca, - struct ehca_mr *e_mr, - struct ehca_mr_pginfo *pginfo); - -int ehca_rereg_mr(struct ehca_shca *shca, - struct ehca_mr *e_mr, - u64 *iova_start, - u64 size, - int mr_access_flags, - struct ehca_pd *e_pd, - struct ehca_mr_pginfo *pginfo, - u32 *lkey, - u32 *rkey); - -int ehca_unmap_one_fmr(struct ehca_shca *shca, - struct ehca_mr *e_fmr); - -int ehca_reg_smr(struct ehca_shca *shca, - struct ehca_mr *e_origmr, - struct ehca_mr *e_newmr, - u64 *iova_start, - int acl, - struct ehca_pd *e_pd, - u32 *lkey, - u32 *rkey); - -int ehca_reg_internal_maxmr(struct ehca_shca *shca, - struct ehca_pd *e_pd, - struct ehca_mr **maxmr); - -int ehca_reg_maxmr(struct ehca_shca *shca, - struct ehca_mr *e_newmr, - u64 *iova_start, - int acl, - struct ehca_pd *e_pd, - u32 *lkey, - u32 *rkey); - -int ehca_dereg_internal_maxmr(struct ehca_shca *shca); - -int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, - int num_phys_buf, - u64 *iova_start, - u64 *size); - -int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, - u64 *page_list, - int list_len); - -int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, - u32 number, - u64 *kpage); - -int ehca_mr_is_maxmr(u64 size, - u64 *iova_start); - -void ehca_mrmw_map_acl(int ib_acl, - u32 *hipz_acl); - -void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl); - -void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, - int *ib_acl); - -void ehca_mr_deletenew(struct ehca_mr *mr); - -int ehca_create_busmap(void); - -void ehca_destroy_busmap(void); - -extern struct ib_dma_mapping_ops ehca_dma_mapping_ops; -#endif /*_EHCA_MRMW_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_pd.c b/drivers/infiniband/hw/ehca/ehca_pd.c deleted file mode 100644 index 351577a6670a..000000000000 --- a/drivers/infiniband/hw/ehca/ehca_pd.c +++ /dev/null @@ -1,124 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * PD functions - * - * Authors: Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_tools.h" -#include "ehca_iverbs.h" - -static struct kmem_cache *pd_cache; - -struct ib_pd *ehca_alloc_pd(struct ib_device *device, - struct ib_ucontext *context, struct ib_udata *udata) -{ - struct ehca_pd *pd; - int i; - - pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL); - if (!pd) { - ehca_err(device, "device=%p context=%p out of memory", - device, context); - return ERR_PTR(-ENOMEM); - } - - for (i = 0; i < 2; i++) { - INIT_LIST_HEAD(&pd->free[i]); - INIT_LIST_HEAD(&pd->full[i]); - } - mutex_init(&pd->lock); - - /* - * Kernel PD: when device = -1, 0 - * User PD: when context != -1 - */ - if (!context) { - /* - * Kernel PDs after init reuses always - * the one created in ehca_shca_reopen() - */ - struct ehca_shca *shca = container_of(device, struct ehca_shca, - ib_device); - pd->fw_pd.value = shca->pd->fw_pd.value; - } else - pd->fw_pd.value = (u64)pd; - - return &pd->ib_pd; -} - -int ehca_dealloc_pd(struct ib_pd *pd) -{ - struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); - int i, leftovers = 0; - struct ipz_small_queue_page *page, *tmp; - - for (i = 0; i < 2; i++) { - list_splice(&my_pd->full[i], &my_pd->free[i]); - list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) { - leftovers = 1; - free_page(page->page); - kmem_cache_free(small_qp_cache, page); - } - } - - if (leftovers) - ehca_warn(pd->device, - "Some small queue pages were not freed"); - - kmem_cache_free(pd_cache, my_pd); - - return 0; -} - -int ehca_init_pd_cache(void) -{ - pd_cache = kmem_cache_create("ehca_cache_pd", - sizeof(struct ehca_pd), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!pd_cache) - return -ENOMEM; - return 0; -} - -void ehca_cleanup_pd_cache(void) -{ - if (pd_cache) - kmem_cache_destroy(pd_cache); -} diff --git a/drivers/infiniband/hw/ehca/ehca_qes.h b/drivers/infiniband/hw/ehca/ehca_qes.h deleted file mode 100644 index 90c4efa67586..000000000000 --- a/drivers/infiniband/hw/ehca/ehca_qes.h +++ /dev/null @@ -1,260 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Hardware request structures - * - * Authors: Waleri Fomin - * Reinhard Ernst - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef _EHCA_QES_H_ -#define _EHCA_QES_H_ - -#include "ehca_tools.h" - -/* virtual scatter gather entry to specify remote addresses with length */ -struct ehca_vsgentry { - u64 vaddr; - u32 lkey; - u32 length; -}; - -#define GRH_FLAG_MASK EHCA_BMASK_IBM( 7, 7) -#define GRH_IPVERSION_MASK EHCA_BMASK_IBM( 0, 3) -#define GRH_TCLASS_MASK EHCA_BMASK_IBM( 4, 12) -#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13, 31) -#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32, 47) -#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48, 55) -#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56, 63) - -/* - * Unreliable Datagram Address Vector Format - * see IBTA Vol1 chapter 8.3 Global Routing Header - */ -struct ehca_ud_av { - u8 sl; - u8 lnh; - u16 dlid; - u8 reserved1; - u8 reserved2; - u8 reserved3; - u8 slid_path_bits; - u8 reserved4; - u8 ipd; - u8 reserved5; - u8 pmtu; - u32 reserved6; - u64 reserved7; - union { - struct { - u64 word_0; /* always set to 6 */ - /*should be 0x1B for IB transport */ - u64 word_1; - u64 word_2; - u64 word_3; - u64 word_4; - } grh; - struct { - u32 wd_0; - u32 wd_1; - /* DWord_1 --> SGID */ - - u32 sgid_wd3; - u32 sgid_wd2; - - u32 sgid_wd1; - u32 sgid_wd0; - /* DWord_3 --> DGID */ - - u32 dgid_wd3; - u32 dgid_wd2; - - u32 dgid_wd1; - u32 dgid_wd0; - } grh_l; - }; -}; - -/* maximum number of sg entries allowed in a WQE */ -#define MAX_WQE_SG_ENTRIES 252 - -#define WQE_OPTYPE_SEND 0x80 -#define WQE_OPTYPE_RDMAREAD 0x40 -#define WQE_OPTYPE_RDMAWRITE 0x20 -#define WQE_OPTYPE_CMPSWAP 0x10 -#define WQE_OPTYPE_FETCHADD 0x08 -#define WQE_OPTYPE_BIND 0x04 - -#define WQE_WRFLAG_REQ_SIGNAL_COM 0x80 -#define WQE_WRFLAG_FENCE 0x40 -#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20 -#define WQE_WRFLAG_SOLIC_EVENT 0x10 - -#define WQEF_CACHE_HINT 0x80 -#define WQEF_CACHE_HINT_RD_WR 0x40 -#define WQEF_TIMED_WQE 0x20 -#define WQEF_PURGE 0x08 -#define WQEF_HIGH_NIBBLE 0xF0 - -#define MW_BIND_ACCESSCTRL_R_WRITE 0x40 -#define MW_BIND_ACCESSCTRL_R_READ 0x20 -#define MW_BIND_ACCESSCTRL_R_ATOMIC 0x10 - -struct ehca_wqe { - u64 work_request_id; - u8 optype; - u8 wr_flag; - u16 pkeyi; - u8 wqef; - u8 nr_of_data_seg; - u16 wqe_provided_slid; - u32 destination_qp_number; - u32 resync_psn_sqp; - u32 local_ee_context_qkey; - u32 immediate_data; - union { - struct { - u64 remote_virtual_address; - u32 rkey; - u32 reserved; - u64 atomic_1st_op_dma_len; - u64 atomic_2nd_op; - struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; - - } nud; - struct { - u64 ehca_ud_av_ptr; - u64 reserved1; - u64 reserved2; - u64 reserved3; - struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; - } ud_avp; - struct { - struct ehca_ud_av ud_av; - struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES - - 2]; - } ud_av; - struct { - u64 reserved0; - u64 reserved1; - u64 reserved2; - u64 reserved3; - struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; - } all_rcv; - - struct { - u64 reserved; - u32 rkey; - u32 old_rkey; - u64 reserved1; - u64 reserved2; - u64 virtual_address; - u32 reserved3; - u32 length; - u32 reserved4; - u16 reserved5; - u8 reserved6; - u8 lr_ctl; - u32 lkey; - u32 reserved7; - u64 reserved8; - u64 reserved9; - u64 reserved10; - u64 reserved11; - } bind; - struct { - u64 reserved12; - u64 reserved13; - u32 size; - u32 start; - } inline_data; - } u; - -}; - -#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0) -#define WC_IMM_DATA EHCA_BMASK_IBM(1, 1) -#define WC_GRH_PRESENT EHCA_BMASK_IBM(2, 2) -#define WC_SE_BIT EHCA_BMASK_IBM(3, 3) -#define WC_STATUS_ERROR_BIT 0x80000000 -#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800 -#define WC_STATUS_PURGE_BIT 0x10 -#define WC_SEND_RECEIVE_BIT 0x80 - -struct ehca_cqe { - u64 work_request_id; - u8 optype; - u8 w_completion_flags; - u16 reserved1; - u32 nr_bytes_transferred; - u32 immediate_data; - u32 local_qp_number; - u8 freed_resource_count; - u8 service_level; - u16 wqe_count; - u32 qp_token; - u32 qkey_ee_token; - u32 remote_qp_number; - u16 dlid; - u16 rlid; - u16 reserved2; - u16 pkey_index; - u32 cqe_timestamp; - u32 wqe_timestamp; - u8 wqe_timestamp_valid; - u8 reserved3; - u8 reserved4; - u8 cqe_flags; - u32 status; -}; - -struct ehca_eqe { - u64 entry; -}; - -struct ehca_mrte { - u64 starting_va; - u64 length; /* length of memory region in bytes*/ - u32 pd; - u8 key_instance; - u8 pagesize; - u8 mr_control; - u8 local_remote_access_ctrl; - u8 reserved[0x20 - 0x18]; - u64 at_pointer[4]; -}; -#endif /*_EHCA_QES_H_*/ diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c deleted file mode 100644 index 2e89356c46fa..000000000000 --- a/drivers/infiniband/hw/ehca/ehca_qp.c +++ /dev/null @@ -1,2257 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * QP functions - * - * Authors: Joachim Fenkes - * Stefan Roscher - * Waleri Fomin - * Hoang-Nam Nguyen - * Reinhard Ernst - * Heiko J Schick - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_classes.h" -#include "ehca_tools.h" -#include "ehca_qes.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" -#include "hipz_fns.h" - -static struct kmem_cache *qp_cache; - -/* - * attributes not supported by query qp - */ -#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS | \ - IB_QP_EN_SQD_ASYNC_NOTIFY) - -/* - * ehca (internal) qp state values - */ -enum ehca_qp_state { - EHCA_QPS_RESET = 1, - EHCA_QPS_INIT = 2, - EHCA_QPS_RTR = 3, - EHCA_QPS_RTS = 5, - EHCA_QPS_SQD = 6, - EHCA_QPS_SQE = 8, - EHCA_QPS_ERR = 128 -}; - -/* - * qp state transitions as defined by IB Arch Rel 1.1 page 431 - */ -enum ib_qp_statetrans { - IB_QPST_ANY2RESET, - IB_QPST_ANY2ERR, - IB_QPST_RESET2INIT, - IB_QPST_INIT2RTR, - IB_QPST_INIT2INIT, - IB_QPST_RTR2RTS, - IB_QPST_RTS2SQD, - IB_QPST_RTS2RTS, - IB_QPST_SQD2RTS, - IB_QPST_SQE2RTS, - IB_QPST_SQD2SQD, - IB_QPST_MAX /* nr of transitions, this must be last!!! */ -}; - -/* - * ib2ehca_qp_state maps IB to ehca qp_state - * returns ehca qp state corresponding to given ib qp state - */ -static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state) -{ - switch (ib_qp_state) { - case IB_QPS_RESET: - return EHCA_QPS_RESET; - case IB_QPS_INIT: - return EHCA_QPS_INIT; - case IB_QPS_RTR: - return EHCA_QPS_RTR; - case IB_QPS_RTS: - return EHCA_QPS_RTS; - case IB_QPS_SQD: - return EHCA_QPS_SQD; - case IB_QPS_SQE: - return EHCA_QPS_SQE; - case IB_QPS_ERR: - return EHCA_QPS_ERR; - default: - ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state); - return -EINVAL; - } -} - -/* - * ehca2ib_qp_state maps ehca to IB qp_state - * returns ib qp state corresponding to given ehca qp state - */ -static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state - ehca_qp_state) -{ - switch (ehca_qp_state) { - case EHCA_QPS_RESET: - return IB_QPS_RESET; - case EHCA_QPS_INIT: - return IB_QPS_INIT; - case EHCA_QPS_RTR: - return IB_QPS_RTR; - case EHCA_QPS_RTS: - return IB_QPS_RTS; - case EHCA_QPS_SQD: - return IB_QPS_SQD; - case EHCA_QPS_SQE: - return IB_QPS_SQE; - case EHCA_QPS_ERR: - return IB_QPS_ERR; - default: - ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state); - return -EINVAL; - } -} - -/* - * ehca_qp_type used as index for req_attr and opt_attr of - * struct ehca_modqp_statetrans - */ -enum ehca_qp_type { - QPT_RC = 0, - QPT_UC = 1, - QPT_UD = 2, - QPT_SQP = 3, - QPT_MAX -}; - -/* - * ib2ehcaqptype maps Ib to ehca qp_type - * returns ehca qp type corresponding to ib qp type - */ -static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype) -{ - switch (ibqptype) { - case IB_QPT_SMI: - case IB_QPT_GSI: - return QPT_SQP; - case IB_QPT_RC: - return QPT_RC; - case IB_QPT_UC: - return QPT_UC; - case IB_QPT_UD: - return QPT_UD; - default: - ehca_gen_err("Invalid ibqptype=%x", ibqptype); - return -EINVAL; - } -} - -static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate, - int ib_tostate) -{ - int index = -EINVAL; - switch (ib_tostate) { - case IB_QPS_RESET: - index = IB_QPST_ANY2RESET; - break; - case IB_QPS_INIT: - switch (ib_fromstate) { - case IB_QPS_RESET: - index = IB_QPST_RESET2INIT; - break; - case IB_QPS_INIT: - index = IB_QPST_INIT2INIT; - break; - } - break; - case IB_QPS_RTR: - if (ib_fromstate == IB_QPS_INIT) - index = IB_QPST_INIT2RTR; - break; - case IB_QPS_RTS: - switch (ib_fromstate) { - case IB_QPS_RTR: - index = IB_QPST_RTR2RTS; - break; - case IB_QPS_RTS: - index = IB_QPST_RTS2RTS; - break; - case IB_QPS_SQD: - index = IB_QPST_SQD2RTS; - break; - case IB_QPS_SQE: - index = IB_QPST_SQE2RTS; - break; - } - break; - case IB_QPS_SQD: - if (ib_fromstate == IB_QPS_RTS) - index = IB_QPST_RTS2SQD; - break; - case IB_QPS_SQE: - break; - case IB_QPS_ERR: - index = IB_QPST_ANY2ERR; - break; - default: - break; - } - return index; -} - -/* - * ibqptype2servicetype returns hcp service type corresponding to given - * ib qp type used by create_qp() - */ -static inline int ibqptype2servicetype(enum ib_qp_type ibqptype) -{ - switch (ibqptype) { - case IB_QPT_SMI: - case IB_QPT_GSI: - return ST_UD; - case IB_QPT_RC: - return ST_RC; - case IB_QPT_UC: - return ST_UC; - case IB_QPT_UD: - return ST_UD; - case IB_QPT_RAW_IPV6: - return -EINVAL; - case IB_QPT_RAW_ETHERTYPE: - return -EINVAL; - default: - ehca_gen_err("Invalid ibqptype=%x", ibqptype); - return -EINVAL; - } -} - -/* - * init userspace queue info from ipz_queue data - */ -static inline void queue2resp(struct ipzu_queue_resp *resp, - struct ipz_queue *queue) -{ - resp->qe_size = queue->qe_size; - resp->act_nr_of_sg = queue->act_nr_of_sg; - resp->queue_length = queue->queue_length; - resp->pagesize = queue->pagesize; - resp->toggle_state = queue->toggle_state; - resp->offset = queue->offset; -} - -/* - * init_qp_queue initializes/constructs r/squeue and registers queue pages. - */ -static inline int init_qp_queue(struct ehca_shca *shca, - struct ehca_pd *pd, - struct ehca_qp *my_qp, - struct ipz_queue *queue, - int q_type, - u64 expected_hret, - struct ehca_alloc_queue_parms *parms, - int wqe_size) -{ - int ret, cnt, ipz_rc, nr_q_pages; - void *vpage; - u64 rpage, h_ret; - struct ib_device *ib_dev = &shca->ib_device; - struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle; - - if (!parms->queue_size) - return 0; - - if (parms->is_small) { - nr_q_pages = 1; - ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, - 128 << parms->page_size, - wqe_size, parms->act_nr_sges, 1); - } else { - nr_q_pages = parms->queue_size; - ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, - EHCA_PAGESIZE, wqe_size, - parms->act_nr_sges, 0); - } - - if (!ipz_rc) { - ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%i", - ipz_rc); - return -EBUSY; - } - - /* register queue pages */ - for (cnt = 0; cnt < nr_q_pages; cnt++) { - vpage = ipz_qpageit_get_inc(queue); - if (!vpage) { - ehca_err(ib_dev, "ipz_qpageit_get_inc() " - "failed p_vpage= %p", vpage); - ret = -EINVAL; - goto init_qp_queue1; - } - rpage = __pa(vpage); - - h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, - my_qp->ipz_qp_handle, - NULL, 0, q_type, - rpage, parms->is_small ? 0 : 1, - my_qp->galpas.kernel); - if (cnt == (nr_q_pages - 1)) { /* last page! */ - if (h_ret != expected_hret) { - ehca_err(ib_dev, "hipz_qp_register_rpage() " - "h_ret=%lli", h_ret); - ret = ehca2ib_return_code(h_ret); - goto init_qp_queue1; - } - vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); - if (vpage) { - ehca_err(ib_dev, "ipz_qpageit_get_inc() " - "should not succeed vpage=%p", vpage); - ret = -EINVAL; - goto init_qp_queue1; - } - } else { - if (h_ret != H_PAGE_REGISTERED) { - ehca_err(ib_dev, "hipz_qp_register_rpage() " - "h_ret=%lli", h_ret); - ret = ehca2ib_return_code(h_ret); - goto init_qp_queue1; - } - } - } - - ipz_qeit_reset(queue); - - return 0; - -init_qp_queue1: - ipz_queue_dtor(pd, queue); - return ret; -} - -static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp) -{ - if (is_llqp) - return 128 << act_nr_sge; - else - return offsetof(struct ehca_wqe, - u.nud.sg_list[act_nr_sge]); -} - -static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue, - int req_nr_sge, int is_llqp) -{ - u32 wqe_size, q_size; - int act_nr_sge = req_nr_sge; - - if (!is_llqp) - /* round up #SGEs so WQE size is a power of 2 */ - for (act_nr_sge = 4; act_nr_sge <= 252; - act_nr_sge = 4 + 2 * act_nr_sge) - if (act_nr_sge >= req_nr_sge) - break; - - wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp); - q_size = wqe_size * (queue->max_wr + 1); - - if (q_size <= 512) - queue->page_size = 2; - else if (q_size <= 1024) - queue->page_size = 3; - else - queue->page_size = 0; - - queue->is_small = (queue->page_size != 0); -} - -/* needs to be called with cq->spinlock held */ -void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq) -{ - struct list_head *list, *node; - - /* TODO: support low latency QPs */ - if (qp->ext_type == EQPT_LLQP) - return; - - if (on_sq) { - list = &qp->send_cq->sqp_err_list; - node = &qp->sq_err_node; - } else { - list = &qp->recv_cq->rqp_err_list; - node = &qp->rq_err_node; - } - - if (list_empty(node)) - list_add_tail(node, list); - - return; -} - -static void del_from_err_list(struct ehca_cq *cq, struct list_head *node) -{ - unsigned long flags; - - spin_lock_irqsave(&cq->spinlock, flags); - - if (!list_empty(node)) - list_del_init(node); - - spin_unlock_irqrestore(&cq->spinlock, flags); -} - -static void reset_queue_map(struct ehca_queue_map *qmap) -{ - int i; - - qmap->tail = qmap->entries - 1; - qmap->left_to_poll = 0; - qmap->next_wqe_idx = 0; - for (i = 0; i < qmap->entries; i++) { - qmap->map[i].reported = 1; - qmap->map[i].cqe_req = 0; - } -} - -/* - * Create an ib_qp struct that is either a QP or an SRQ, depending on - * the value of the is_srq parameter. If init_attr and srq_init_attr share - * fields, the field out of init_attr is used. - */ -static struct ehca_qp *internal_create_qp( - struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata, int is_srq) -{ - struct ehca_qp *my_qp, *my_srq = NULL; - struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); - struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, - ib_device); - struct ib_ucontext *context = NULL; - u64 h_ret; - int is_llqp = 0, has_srq = 0, is_user = 0; - int qp_type, max_send_sge, max_recv_sge, ret; - - /* h_call's out parameters */ - struct ehca_alloc_qp_parms parms; - u32 swqe_size = 0, rwqe_size = 0, ib_qp_num; - unsigned long flags; - - if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) { - ehca_err(pd->device, "Unable to create QP, max number of %i " - "QPs reached.", shca->max_num_qps); - ehca_err(pd->device, "To increase the maximum number of QPs " - "use the number_of_qps module parameter.\n"); - return ERR_PTR(-ENOSPC); - } - - if (init_attr->create_flags) { - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - - memset(&parms, 0, sizeof(parms)); - qp_type = init_attr->qp_type; - - if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR && - init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { - ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", - init_attr->sq_sig_type); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - - /* save LLQP info */ - if (qp_type & 0x80) { - is_llqp = 1; - parms.ext_type = EQPT_LLQP; - parms.ll_comp_flags = qp_type & LLQP_COMP_MASK; - } - qp_type &= 0x1F; - init_attr->qp_type &= 0x1F; - - /* handle SRQ base QPs */ - if (init_attr->srq) { - my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq); - - if (qp_type == IB_QPT_UC) { - ehca_err(pd->device, "UC with SRQ not supported"); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - - has_srq = 1; - parms.ext_type = EQPT_SRQBASE; - parms.srq_qpn = my_srq->real_qp_num; - } - - if (is_llqp && has_srq) { - ehca_err(pd->device, "LLQPs can't have an SRQ"); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - - /* handle SRQs */ - if (is_srq) { - parms.ext_type = EQPT_SRQ; - parms.srq_limit = srq_init_attr->attr.srq_limit; - if (init_attr->cap.max_recv_sge > 3) { - ehca_err(pd->device, "no more than three SGEs " - "supported for SRQ pd=%p max_sge=%x", - pd, init_attr->cap.max_recv_sge); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - } - - /* check QP type */ - if (qp_type != IB_QPT_UD && - qp_type != IB_QPT_UC && - qp_type != IB_QPT_RC && - qp_type != IB_QPT_SMI && - qp_type != IB_QPT_GSI) { - ehca_err(pd->device, "wrong QP Type=%x", qp_type); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - - if (is_llqp) { - switch (qp_type) { - case IB_QPT_RC: - if ((init_attr->cap.max_send_wr > 255) || - (init_attr->cap.max_recv_wr > 255)) { - ehca_err(pd->device, - "Invalid Number of max_sq_wr=%x " - "or max_rq_wr=%x for RC LLQP", - init_attr->cap.max_send_wr, - init_attr->cap.max_recv_wr); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - break; - case IB_QPT_UD: - if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) { - ehca_err(pd->device, "UD LLQP not supported " - "by this adapter"); - atomic_dec(&shca->num_qps); - return ERR_PTR(-ENOSYS); - } - if (!(init_attr->cap.max_send_sge <= 5 - && init_attr->cap.max_send_sge >= 1 - && init_attr->cap.max_recv_sge <= 5 - && init_attr->cap.max_recv_sge >= 1)) { - ehca_err(pd->device, - "Invalid Number of max_send_sge=%x " - "or max_recv_sge=%x for UD LLQP", - init_attr->cap.max_send_sge, - init_attr->cap.max_recv_sge); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } else if (init_attr->cap.max_send_wr > 255) { - ehca_err(pd->device, - "Invalid Number of " - "max_send_wr=%x for UD QP_TYPE=%x", - init_attr->cap.max_send_wr, qp_type); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - break; - default: - ehca_err(pd->device, "unsupported LL QP Type=%x", - qp_type); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - } else { - int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI - || qp_type == IB_QPT_GSI) ? 250 : 252; - - if (init_attr->cap.max_send_sge > max_sge - || init_attr->cap.max_recv_sge > max_sge) { - ehca_err(pd->device, "Invalid number of SGEs requested " - "send_sge=%x recv_sge=%x max_sge=%x", - init_attr->cap.max_send_sge, - init_attr->cap.max_recv_sge, max_sge); - atomic_dec(&shca->num_qps); - return ERR_PTR(-EINVAL); - } - } - - my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL); - if (!my_qp) { - ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); - atomic_dec(&shca->num_qps); - return ERR_PTR(-ENOMEM); - } - - if (pd->uobject && udata) { - is_user = 1; - context = pd->uobject->context; - } - - atomic_set(&my_qp->nr_events, 0); - init_waitqueue_head(&my_qp->wait_completion); - spin_lock_init(&my_qp->spinlock_s); - spin_lock_init(&my_qp->spinlock_r); - my_qp->qp_type = qp_type; - my_qp->ext_type = parms.ext_type; - my_qp->state = IB_QPS_RESET; - - if (init_attr->recv_cq) - my_qp->recv_cq = - container_of(init_attr->recv_cq, struct ehca_cq, ib_cq); - if (init_attr->send_cq) - my_qp->send_cq = - container_of(init_attr->send_cq, struct ehca_cq, ib_cq); - - idr_preload(GFP_KERNEL); - write_lock_irqsave(&ehca_qp_idr_lock, flags); - - ret = idr_alloc(&ehca_qp_idr, my_qp, 0, 0x2000000, GFP_NOWAIT); - if (ret >= 0) - my_qp->token = ret; - - write_unlock_irqrestore(&ehca_qp_idr_lock, flags); - idr_preload_end(); - if (ret < 0) { - if (ret == -ENOSPC) { - ret = -EINVAL; - ehca_err(pd->device, "Invalid number of qp"); - } else { - ret = -ENOMEM; - ehca_err(pd->device, "Can't allocate new idr entry."); - } - goto create_qp_exit0; - } - - if (has_srq) - parms.srq_token = my_qp->token; - - parms.servicetype = ibqptype2servicetype(qp_type); - if (parms.servicetype < 0) { - ret = -EINVAL; - ehca_err(pd->device, "Invalid qp_type=%x", qp_type); - goto create_qp_exit1; - } - - /* Always signal by WQE so we can hide circ. WQEs */ - parms.sigtype = HCALL_SIGT_BY_WQE; - - /* UD_AV CIRCUMVENTION */ - max_send_sge = init_attr->cap.max_send_sge; - max_recv_sge = init_attr->cap.max_recv_sge; - if (parms.servicetype == ST_UD && !is_llqp) { - max_send_sge += 2; - max_recv_sge += 2; - } - - parms.token = my_qp->token; - parms.eq_handle = shca->eq.ipz_eq_handle; - parms.pd = my_pd->fw_pd; - if (my_qp->send_cq) - parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle; - if (my_qp->recv_cq) - parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle; - - parms.squeue.max_wr = init_attr->cap.max_send_wr; - parms.rqueue.max_wr = init_attr->cap.max_recv_wr; - parms.squeue.max_sge = max_send_sge; - parms.rqueue.max_sge = max_recv_sge; - - /* RC QPs need one more SWQE for unsolicited ack circumvention */ - if (qp_type == IB_QPT_RC) - parms.squeue.max_wr++; - - if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) { - if (HAS_SQ(my_qp)) - ehca_determine_small_queue( - &parms.squeue, max_send_sge, is_llqp); - if (HAS_RQ(my_qp)) - ehca_determine_small_queue( - &parms.rqueue, max_recv_sge, is_llqp); - parms.qp_storage = - (parms.squeue.is_small || parms.rqueue.is_small); - } - - h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user); - if (h_ret != H_SUCCESS) { - ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli", - h_ret); - ret = ehca2ib_return_code(h_ret); - goto create_qp_exit1; - } - - ib_qp_num = my_qp->real_qp_num = parms.real_qp_num; - my_qp->ipz_qp_handle = parms.qp_handle; - my_qp->galpas = parms.galpas; - - swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp); - rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp); - - switch (qp_type) { - case IB_QPT_RC: - if (is_llqp) { - parms.squeue.act_nr_sges = 1; - parms.rqueue.act_nr_sges = 1; - } - /* hide the extra WQE */ - parms.squeue.act_nr_wqes--; - break; - case IB_QPT_UD: - case IB_QPT_GSI: - case IB_QPT_SMI: - /* UD circumvention */ - if (is_llqp) { - parms.squeue.act_nr_sges = 1; - parms.rqueue.act_nr_sges = 1; - } else { - parms.squeue.act_nr_sges -= 2; - parms.rqueue.act_nr_sges -= 2; - } - - if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) { - parms.squeue.act_nr_wqes = init_attr->cap.max_send_wr; - parms.rqueue.act_nr_wqes = init_attr->cap.max_recv_wr; - parms.squeue.act_nr_sges = init_attr->cap.max_send_sge; - parms.rqueue.act_nr_sges = init_attr->cap.max_recv_sge; - ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1; - } - - break; - - default: - break; - } - - /* initialize r/squeue and register queue pages */ - if (HAS_SQ(my_qp)) { - ret = init_qp_queue( - shca, my_pd, my_qp, &my_qp->ipz_squeue, 0, - HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS, - &parms.squeue, swqe_size); - if (ret) { - ehca_err(pd->device, "Couldn't initialize squeue " - "and pages ret=%i", ret); - goto create_qp_exit2; - } - - if (!is_user) { - my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / - my_qp->ipz_squeue.qe_size; - my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * - sizeof(struct ehca_qmap_entry)); - if (!my_qp->sq_map.map) { - ehca_err(pd->device, "Couldn't allocate squeue " - "map ret=%i", ret); - goto create_qp_exit3; - } - INIT_LIST_HEAD(&my_qp->sq_err_node); - /* to avoid the generation of bogus flush CQEs */ - reset_queue_map(&my_qp->sq_map); - } - } - - if (HAS_RQ(my_qp)) { - ret = init_qp_queue( - shca, my_pd, my_qp, &my_qp->ipz_rqueue, 1, - H_SUCCESS, &parms.rqueue, rwqe_size); - if (ret) { - ehca_err(pd->device, "Couldn't initialize rqueue " - "and pages ret=%i", ret); - goto create_qp_exit4; - } - if (!is_user) { - my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / - my_qp->ipz_rqueue.qe_size; - my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * - sizeof(struct ehca_qmap_entry)); - if (!my_qp->rq_map.map) { - ehca_err(pd->device, "Couldn't allocate squeue " - "map ret=%i", ret); - goto create_qp_exit5; - } - INIT_LIST_HEAD(&my_qp->rq_err_node); - /* to avoid the generation of bogus flush CQEs */ - reset_queue_map(&my_qp->rq_map); - } - } else if (init_attr->srq && !is_user) { - /* this is a base QP, use the queue map of the SRQ */ - my_qp->rq_map = my_srq->rq_map; - INIT_LIST_HEAD(&my_qp->rq_err_node); - - my_qp->ipz_rqueue = my_srq->ipz_rqueue; - } - - if (is_srq) { - my_qp->ib_srq.pd = &my_pd->ib_pd; - my_qp->ib_srq.device = my_pd->ib_pd.device; - - my_qp->ib_srq.srq_context = init_attr->qp_context; - my_qp->ib_srq.event_handler = init_attr->event_handler; - } else { - my_qp->ib_qp.qp_num = ib_qp_num; - my_qp->ib_qp.pd = &my_pd->ib_pd; - my_qp->ib_qp.device = my_pd->ib_pd.device; - - my_qp->ib_qp.recv_cq = init_attr->recv_cq; - my_qp->ib_qp.send_cq = init_attr->send_cq; - - my_qp->ib_qp.qp_type = qp_type; - my_qp->ib_qp.srq = init_attr->srq; - - my_qp->ib_qp.qp_context = init_attr->qp_context; - my_qp->ib_qp.event_handler = init_attr->event_handler; - } - - init_attr->cap.max_inline_data = 0; /* not supported yet */ - init_attr->cap.max_recv_sge = parms.rqueue.act_nr_sges; - init_attr->cap.max_recv_wr = parms.rqueue.act_nr_wqes; - init_attr->cap.max_send_sge = parms.squeue.act_nr_sges; - init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes; - my_qp->init_attr = *init_attr; - - if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { - shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = - &my_qp->ib_qp; - if (ehca_nr_ports < 0) { - /* alloc array to cache subsequent modify qp parms - * for autodetect mode - */ - my_qp->mod_qp_parm = - kzalloc(EHCA_MOD_QP_PARM_MAX * - sizeof(*my_qp->mod_qp_parm), - GFP_KERNEL); - if (!my_qp->mod_qp_parm) { - ehca_err(pd->device, - "Could not alloc mod_qp_parm"); - goto create_qp_exit5; - } - } - } - - /* NOTE: define_apq0() not supported yet */ - if (qp_type == IB_QPT_GSI) { - h_ret = ehca_define_sqp(shca, my_qp, init_attr); - if (h_ret != H_SUCCESS) { - kfree(my_qp->mod_qp_parm); - my_qp->mod_qp_parm = NULL; - /* the QP pointer is no longer valid */ - shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = - NULL; - ret = ehca2ib_return_code(h_ret); - goto create_qp_exit6; - } - } - - if (my_qp->send_cq) { - ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp); - if (ret) { - ehca_err(pd->device, - "Couldn't assign qp to send_cq ret=%i", ret); - goto create_qp_exit7; - } - } - - /* copy queues, galpa data to user space */ - if (context && udata) { - struct ehca_create_qp_resp resp; - memset(&resp, 0, sizeof(resp)); - - resp.qp_num = my_qp->real_qp_num; - resp.token = my_qp->token; - resp.qp_type = my_qp->qp_type; - resp.ext_type = my_qp->ext_type; - resp.qkey = my_qp->qkey; - resp.real_qp_num = my_qp->real_qp_num; - - if (HAS_SQ(my_qp)) - queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue); - if (HAS_RQ(my_qp)) - queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue); - resp.fw_handle_ofs = (u32) - (my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1)); - - if (ib_copy_to_udata(udata, &resp, sizeof resp)) { - ehca_err(pd->device, "Copy to udata failed"); - ret = -EINVAL; - goto create_qp_exit8; - } - } - - return my_qp; - -create_qp_exit8: - ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num); - -create_qp_exit7: - kfree(my_qp->mod_qp_parm); - -create_qp_exit6: - if (HAS_RQ(my_qp) && !is_user) - vfree(my_qp->rq_map.map); - -create_qp_exit5: - if (HAS_RQ(my_qp)) - ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); - -create_qp_exit4: - if (HAS_SQ(my_qp) && !is_user) - vfree(my_qp->sq_map.map); - -create_qp_exit3: - if (HAS_SQ(my_qp)) - ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); - -create_qp_exit2: - hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); - -create_qp_exit1: - write_lock_irqsave(&ehca_qp_idr_lock, flags); - idr_remove(&ehca_qp_idr, my_qp->token); - write_unlock_irqrestore(&ehca_qp_idr_lock, flags); - -create_qp_exit0: - kmem_cache_free(qp_cache, my_qp); - atomic_dec(&shca->num_qps); - return ERR_PTR(ret); -} - -struct ib_qp *ehca_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr, - struct ib_udata *udata) -{ - struct ehca_qp *ret; - - ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0); - return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp; -} - -static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, - struct ib_uobject *uobject); - -struct ib_srq *ehca_create_srq(struct ib_pd *pd, - struct ib_srq_init_attr *srq_init_attr, - struct ib_udata *udata) -{ - struct ib_qp_init_attr qp_init_attr; - struct ehca_qp *my_qp; - struct ib_srq *ret; - struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, - ib_device); - struct hcp_modify_qp_control_block *mqpcb; - u64 hret, update_mask; - - if (srq_init_attr->srq_type != IB_SRQT_BASIC) - return ERR_PTR(-ENOSYS); - - /* For common attributes, internal_create_qp() takes its info - * out of qp_init_attr, so copy all common attrs there. - */ - memset(&qp_init_attr, 0, sizeof(qp_init_attr)); - qp_init_attr.event_handler = srq_init_attr->event_handler; - qp_init_attr.qp_context = srq_init_attr->srq_context; - qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; - qp_init_attr.qp_type = IB_QPT_RC; - qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr; - qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge; - - my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1); - if (IS_ERR(my_qp)) - return (struct ib_srq *)my_qp; - - /* copy back return values */ - srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr; - srq_init_attr->attr.max_sge = 3; - - /* drive SRQ into RTR state */ - mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!mqpcb) { - ehca_err(pd->device, "Could not get zeroed page for mqpcb " - "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); - ret = ERR_PTR(-ENOMEM); - goto create_srq1; - } - - mqpcb->qp_state = EHCA_QPS_INIT; - mqpcb->prim_phys_port = 1; - update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); - hret = hipz_h_modify_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - update_mask, - mqpcb, my_qp->galpas.kernel); - if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not modify SRQ to INIT " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, my_qp->real_qp_num, hret); - goto create_srq2; - } - - mqpcb->qp_enable = 1; - update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); - hret = hipz_h_modify_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - update_mask, - mqpcb, my_qp->galpas.kernel); - if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not enable SRQ " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, my_qp->real_qp_num, hret); - goto create_srq2; - } - - mqpcb->qp_state = EHCA_QPS_RTR; - update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); - hret = hipz_h_modify_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - update_mask, - mqpcb, my_qp->galpas.kernel); - if (hret != H_SUCCESS) { - ehca_err(pd->device, "Could not modify SRQ to RTR " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, my_qp->real_qp_num, hret); - goto create_srq2; - } - - ehca_free_fw_ctrlblock(mqpcb); - - return &my_qp->ib_srq; - -create_srq2: - ret = ERR_PTR(ehca2ib_return_code(hret)); - ehca_free_fw_ctrlblock(mqpcb); - -create_srq1: - internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject); - - return ret; -} - -/* - * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts - * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe - * returns total number of bad wqes in bad_wqe_cnt - */ -static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, - int *bad_wqe_cnt) -{ - u64 h_ret; - struct ipz_queue *squeue; - void *bad_send_wqe_p, *bad_send_wqe_v; - u64 q_ofs; - struct ehca_wqe *wqe; - int qp_num = my_qp->ib_qp.qp_num; - - /* get send wqe pointer */ - h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, &my_qp->pf, - &bad_send_wqe_p, NULL, 2); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed" - " ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, qp_num, h_ret); - return ehca2ib_return_code(h_ret); - } - bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63))); - ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p", - qp_num, bad_send_wqe_p); - /* convert wqe pointer to vadr */ - bad_send_wqe_v = __va((u64)bad_send_wqe_p); - if (ehca_debug_level >= 2) - ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num); - squeue = &my_qp->ipz_squeue; - if (ipz_queue_abs_to_offset(squeue, (u64)bad_send_wqe_p, &q_ofs)) { - ehca_err(&shca->ib_device, "failed to get wqe offset qp_num=%x" - " bad_send_wqe_p=%p", qp_num, bad_send_wqe_p); - return -EFAULT; - } - - /* loop sets wqe's purge bit */ - wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); - *bad_wqe_cnt = 0; - while (wqe->optype != 0xff && wqe->wqef != 0xff) { - if (ehca_debug_level >= 2) - ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num); - wqe->nr_of_data_seg = 0; /* suppress data access */ - wqe->wqef = WQEF_PURGE; /* WQE to be purged */ - q_ofs = ipz_queue_advance_offset(squeue, q_ofs); - wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); - *bad_wqe_cnt = (*bad_wqe_cnt)+1; - } - /* - * bad wqe will be reprocessed and ignored when pol_cq() is called, - * i.e. nr of wqes with flush error status is one less - */ - ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x", - qp_num, (*bad_wqe_cnt)-1); - wqe->wqef = 0; - - return 0; -} - -static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue, - struct ehca_queue_map *qmap) -{ - void *wqe_v; - u64 q_ofs; - u32 wqe_idx; - unsigned int tail_idx; - - /* convert real to abs address */ - wqe_p = wqe_p & (~(1UL << 63)); - - wqe_v = __va(wqe_p); - - if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) { - ehca_gen_err("Invalid offset for calculating left cqes " - "wqe_p=%#llx wqe_v=%p\n", wqe_p, wqe_v); - return -EFAULT; - } - - tail_idx = next_index(qmap->tail, qmap->entries); - wqe_idx = q_ofs / ipz_queue->qe_size; - - /* check all processed wqes, whether a cqe is requested or not */ - while (tail_idx != wqe_idx) { - if (qmap->map[tail_idx].cqe_req) - qmap->left_to_poll++; - tail_idx = next_index(tail_idx, qmap->entries); - } - /* save index in queue, where we have to start flushing */ - qmap->next_wqe_idx = wqe_idx; - return 0; -} - -static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca) -{ - u64 h_ret; - void *send_wqe_p, *recv_wqe_p; - int ret; - unsigned long flags; - int qp_num = my_qp->ib_qp.qp_num; - - /* this hcall is not supported on base QPs */ - if (my_qp->ext_type != EQPT_SRQBASE) { - /* get send and receive wqe pointer */ - h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, &my_qp->pf, - &send_wqe_p, &recv_wqe_p, 4); - if (h_ret != H_SUCCESS) { - ehca_err(&shca->ib_device, "disable_and_get_wqe() " - "failed ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, qp_num, h_ret); - return ehca2ib_return_code(h_ret); - } - - /* - * acquire lock to ensure that nobody is polling the cq which - * could mean that the qmap->tail pointer is in an - * inconsistent state. - */ - spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); - ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue, - &my_qp->sq_map); - spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); - if (ret) - return ret; - - - spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); - ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue, - &my_qp->rq_map); - spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); - if (ret) - return ret; - } else { - spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); - my_qp->sq_map.left_to_poll = 0; - my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, - my_qp->sq_map.entries); - spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); - - spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); - my_qp->rq_map.left_to_poll = 0; - my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, - my_qp->rq_map.entries); - spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); - } - - /* this assures flush cqes being generated only for pending wqes */ - if ((my_qp->sq_map.left_to_poll == 0) && - (my_qp->rq_map.left_to_poll == 0)) { - spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); - ehca_add_to_err_list(my_qp, 1); - spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); - - if (HAS_RQ(my_qp)) { - spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); - ehca_add_to_err_list(my_qp, 0); - spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, - flags); - } - } - - return 0; -} - -/* - * internal_modify_qp with circumvention to handle aqp0 properly - * smi_reset2init indicates if this is an internal reset-to-init-call for - * smi. This flag must always be zero if called from ehca_modify_qp()! - * This internal func was intorduced to avoid recursion of ehca_modify_qp()! - */ -static int internal_modify_qp(struct ib_qp *ibqp, - struct ib_qp_attr *attr, - int attr_mask, int smi_reset2init) -{ - enum ib_qp_state qp_cur_state, qp_new_state; - int cnt, qp_attr_idx, ret = 0; - enum ib_qp_statetrans statetrans; - struct hcp_modify_qp_control_block *mqpcb; - struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); - struct ehca_shca *shca = - container_of(ibqp->pd->device, struct ehca_shca, ib_device); - u64 update_mask; - u64 h_ret; - int bad_wqe_cnt = 0; - int is_user = 0; - int squeue_locked = 0; - unsigned long flags = 0; - - /* do query_qp to obtain current attr values */ - mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); - if (!mqpcb) { - ehca_err(ibqp->device, "Could not get zeroed page for mqpcb " - "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num); - return -ENOMEM; - } - - h_ret = hipz_h_query_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - mqpcb, my_qp->galpas.kernel); - if (h_ret != H_SUCCESS) { - ehca_err(ibqp->device, "hipz_h_query_qp() failed " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, ibqp->qp_num, h_ret); - ret = ehca2ib_return_code(h_ret); - goto modify_qp_exit1; - } - if (ibqp->uobject) - is_user = 1; - - qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state); - - if (qp_cur_state == -EINVAL) { /* invalid qp state */ - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x " - "ehca_qp=%p qp_num=%x", - mqpcb->qp_state, my_qp, ibqp->qp_num); - goto modify_qp_exit1; - } - /* - * circumvention to set aqp0 initial state to init - * as expected by IB spec - */ - if (smi_reset2init == 0 && - ibqp->qp_type == IB_QPT_SMI && - qp_cur_state == IB_QPS_RESET && - (attr_mask & IB_QP_STATE) && - attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */ - struct ib_qp_attr smiqp_attr = { - .qp_state = IB_QPS_INIT, - .port_num = my_qp->init_attr.port_num, - .pkey_index = 0, - .qkey = 0 - }; - int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT | - IB_QP_PKEY_INDEX | IB_QP_QKEY; - int smirc = internal_modify_qp( - ibqp, &smiqp_attr, smiqp_attr_mask, 1); - if (smirc) { - ehca_err(ibqp->device, "SMI RESET -> INIT failed. " - "ehca_modify_qp() rc=%i", smirc); - ret = H_PARAMETER; - goto modify_qp_exit1; - } - qp_cur_state = IB_QPS_INIT; - ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded"); - } - /* is transmitted current state equal to "real" current state */ - if ((attr_mask & IB_QP_CUR_STATE) && - qp_cur_state != attr->cur_qp_state) { - ret = -EINVAL; - ehca_err(ibqp->device, - "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>" - " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x", - attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num); - goto modify_qp_exit1; - } - - ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x " - "new qp_state=%x attribute_mask=%x", - my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask); - - qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state; - if (!smi_reset2init && - !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type, - attr_mask, IB_LINK_LAYER_UNSPECIFIED)) { - ret = -EINVAL; - ehca_err(ibqp->device, - "Invalid qp transition new_state=%x cur_state=%x " - "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state, - qp_cur_state, my_qp, ibqp->qp_num, attr_mask); - goto modify_qp_exit1; - } - - mqpcb->qp_state = ib2ehca_qp_state(qp_new_state); - if (mqpcb->qp_state) - update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); - else { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid new qp state=%x " - "ehca_qp=%p qp_num=%x", - qp_new_state, my_qp, ibqp->qp_num); - goto modify_qp_exit1; - } - - /* retrieve state transition struct to get req and opt attrs */ - statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state); - if (statetrans < 0) { - ret = -EINVAL; - ehca_err(ibqp->device, " qp_cur_state=%x " - "new_qp_state=%x State_xsition=%x ehca_qp=%p " - "qp_num=%x", qp_cur_state, qp_new_state, - statetrans, my_qp, ibqp->qp_num); - goto modify_qp_exit1; - } - - qp_attr_idx = ib2ehcaqptype(ibqp->qp_type); - - if (qp_attr_idx < 0) { - ret = qp_attr_idx; - ehca_err(ibqp->device, - "Invalid QP type=%x ehca_qp=%p qp_num=%x", - ibqp->qp_type, my_qp, ibqp->qp_num); - goto modify_qp_exit1; - } - - ehca_dbg(ibqp->device, - "ehca_qp=%p qp_num=%x qp_state_xsit=%x", - my_qp, ibqp->qp_num, statetrans); - - /* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set - * in non-LL UD QPs. - */ - if ((my_qp->qp_type == IB_QPT_UD) && - (my_qp->ext_type != EQPT_LLQP) && - (statetrans == IB_QPST_INIT2RTR) && - (shca->hw_level >= 0x22)) { - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); - mqpcb->send_grh_flag = 1; - } - - /* sqe -> rts: set purge bit of bad wqe before actual trans */ - if ((my_qp->qp_type == IB_QPT_UD || - my_qp->qp_type == IB_QPT_GSI || - my_qp->qp_type == IB_QPT_SMI) && - statetrans == IB_QPST_SQE2RTS) { - /* mark next free wqe if kernel */ - if (!ibqp->uobject) { - struct ehca_wqe *wqe; - /* lock send queue */ - spin_lock_irqsave(&my_qp->spinlock_s, flags); - squeue_locked = 1; - /* mark next free wqe */ - wqe = (struct ehca_wqe *) - ipz_qeit_get(&my_qp->ipz_squeue); - wqe->optype = wqe->wqef = 0xff; - ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p", - ibqp->qp_num, wqe); - } - ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt); - if (ret) { - ehca_err(ibqp->device, "prepare_sqe_rts() failed " - "ehca_qp=%p qp_num=%x ret=%i", - my_qp, ibqp->qp_num, ret); - goto modify_qp_exit2; - } - } - - /* - * enable RDMA_Atomic_Control if reset->init und reliable con - * this is necessary since gen2 does not provide that flag, - * but pHyp requires it - */ - if (statetrans == IB_QPST_RESET2INIT && - (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) { - mqpcb->rdma_atomic_ctrl = 3; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1); - } - /* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */ - if (statetrans == IB_QPST_INIT2RTR && - (ibqp->qp_type == IB_QPT_UC) && - !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) { - mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */ - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); - } - - if (attr_mask & IB_QP_PKEY_INDEX) { - if (attr->pkey_index >= 16) { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid pkey_index=%x. " - "ehca_qp=%p qp_num=%x max_pkey_index=f", - attr->pkey_index, my_qp, ibqp->qp_num); - goto modify_qp_exit2; - } - mqpcb->prim_p_key_idx = attr->pkey_index; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1); - } - if (attr_mask & IB_QP_PORT) { - struct ehca_sport *sport; - struct ehca_qp *aqp1; - if (attr->port_num < 1 || attr->port_num > shca->num_ports) { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid port=%x. " - "ehca_qp=%p qp_num=%x num_ports=%x", - attr->port_num, my_qp, ibqp->qp_num, - shca->num_ports); - goto modify_qp_exit2; - } - sport = &shca->sport[attr->port_num - 1]; - if (!sport->ibqp_sqp[IB_QPT_GSI]) { - /* should not occur */ - ret = -EFAULT; - ehca_err(ibqp->device, "AQP1 was not created for " - "port=%x", attr->port_num); - goto modify_qp_exit2; - } - aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI], - struct ehca_qp, ib_qp); - if (ibqp->qp_type != IB_QPT_GSI && - ibqp->qp_type != IB_QPT_SMI && - aqp1->mod_qp_parm) { - /* - * firmware will reject this modify_qp() because - * port is not activated/initialized fully - */ - ret = -EFAULT; - ehca_warn(ibqp->device, "Couldn't modify qp port=%x: " - "either port is being activated (try again) " - "or cabling issue", attr->port_num); - goto modify_qp_exit2; - } - mqpcb->prim_phys_port = attr->port_num; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1); - } - if (attr_mask & IB_QP_QKEY) { - mqpcb->qkey = attr->qkey; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1); - } - if (attr_mask & IB_QP_AV) { - mqpcb->dlid = attr->ah_attr.dlid; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1); - mqpcb->source_path_bits = attr->ah_attr.src_path_bits; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1); - mqpcb->service_level = attr->ah_attr.sl; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1); - - if (ehca_calc_ipd(shca, mqpcb->prim_phys_port, - attr->ah_attr.static_rate, - &mqpcb->max_static_rate)) { - ret = -EINVAL; - goto modify_qp_exit2; - } - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1); - - /* - * Always supply the GRH flag, even if it's zero, to give the - * hypervisor a clear "yes" or "no" instead of a "perhaps" - */ - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); - - /* - * only if GRH is TRUE we might consider SOURCE_GID_IDX - * and DEST_GID otherwise phype will return H_ATTR_PARM!!! - */ - if (attr->ah_attr.ah_flags == IB_AH_GRH) { - mqpcb->send_grh_flag = 1; - - mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1); - - for (cnt = 0; cnt < 16; cnt++) - mqpcb->dest_gid.byte[cnt] = - attr->ah_attr.grh.dgid.raw[cnt]; - - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1); - mqpcb->flow_label = attr->ah_attr.grh.flow_label; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1); - mqpcb->hop_limit = attr->ah_attr.grh.hop_limit; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1); - mqpcb->traffic_class = attr->ah_attr.grh.traffic_class; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1); - } - } - - if (attr_mask & IB_QP_PATH_MTU) { - /* store ld(MTU) */ - my_qp->mtu_shift = attr->path_mtu + 7; - mqpcb->path_mtu = attr->path_mtu; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1); - } - if (attr_mask & IB_QP_TIMEOUT) { - mqpcb->timeout = attr->timeout; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1); - } - if (attr_mask & IB_QP_RETRY_CNT) { - mqpcb->retry_count = attr->retry_cnt; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1); - } - if (attr_mask & IB_QP_RNR_RETRY) { - mqpcb->rnr_retry_count = attr->rnr_retry; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1); - } - if (attr_mask & IB_QP_RQ_PSN) { - mqpcb->receive_psn = attr->rq_psn; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1); - } - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { - mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ? - attr->max_dest_rd_atomic : 2; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); - } - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { - mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ? - attr->max_rd_atomic : 2; - update_mask |= - EHCA_BMASK_SET - (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1); - } - if (attr_mask & IB_QP_ALT_PATH) { - if (attr->alt_port_num < 1 - || attr->alt_port_num > shca->num_ports) { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid alt_port=%x. " - "ehca_qp=%p qp_num=%x num_ports=%x", - attr->alt_port_num, my_qp, ibqp->qp_num, - shca->num_ports); - goto modify_qp_exit2; - } - mqpcb->alt_phys_port = attr->alt_port_num; - - if (attr->alt_pkey_index >= 16) { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid alt_pkey_index=%x. " - "ehca_qp=%p qp_num=%x max_pkey_index=f", - attr->pkey_index, my_qp, ibqp->qp_num); - goto modify_qp_exit2; - } - mqpcb->alt_p_key_idx = attr->alt_pkey_index; - - mqpcb->timeout_al = attr->alt_timeout; - mqpcb->dlid_al = attr->alt_ah_attr.dlid; - mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits; - mqpcb->service_level_al = attr->alt_ah_attr.sl; - - if (ehca_calc_ipd(shca, mqpcb->alt_phys_port, - attr->alt_ah_attr.static_rate, - &mqpcb->max_static_rate_al)) { - ret = -EINVAL; - goto modify_qp_exit2; - } - - /* OpenIB doesn't support alternate retry counts - copy them */ - mqpcb->retry_count_al = mqpcb->retry_count; - mqpcb->rnr_retry_count_al = mqpcb->rnr_retry_count; - - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_ALT_PHYS_PORT, 1) - | EHCA_BMASK_SET(MQPCB_MASK_ALT_P_KEY_IDX, 1) - | EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT_AL, 1); - - /* - * Always supply the GRH flag, even if it's zero, to give the - * hypervisor a clear "yes" or "no" instead of a "perhaps" - */ - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1); - - /* - * only if GRH is TRUE we might consider SOURCE_GID_IDX - * and DEST_GID otherwise phype will return H_ATTR_PARM!!! - */ - if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) { - mqpcb->send_grh_flag_al = 1; - - for (cnt = 0; cnt < 16; cnt++) - mqpcb->dest_gid_al.byte[cnt] = - attr->alt_ah_attr.grh.dgid.raw[cnt]; - mqpcb->source_gid_idx_al = - attr->alt_ah_attr.grh.sgid_index; - mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label; - mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit; - mqpcb->traffic_class_al = - attr->alt_ah_attr.grh.traffic_class; - - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1) - | EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1) | - EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1); - } - } - - if (attr_mask & IB_QP_MIN_RNR_TIMER) { - mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1); - } - - if (attr_mask & IB_QP_SQ_PSN) { - mqpcb->send_psn = attr->sq_psn; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1); - } - - if (attr_mask & IB_QP_DEST_QPN) { - mqpcb->dest_qp_nr = attr->dest_qp_num; - update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1); - } - - if (attr_mask & IB_QP_PATH_MIG_STATE) { - if (attr->path_mig_state != IB_MIG_REARM - && attr->path_mig_state != IB_MIG_MIGRATED) { - ret = -EINVAL; - ehca_err(ibqp->device, "Invalid mig_state=%x", - attr->path_mig_state); - goto modify_qp_exit2; - } - mqpcb->path_migration_state = attr->path_mig_state + 1; - if (attr->path_mig_state == IB_MIG_REARM) - my_qp->mig_armed = 1; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1); - } - - if (attr_mask & IB_QP_CAP) { - mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1); - mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1); - /* no support for max_send/recv_sge yet */ - } - - if (ehca_debug_level >= 2) - ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num); - - h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - update_mask, - mqpcb, my_qp->galpas.kernel); - - if (h_ret != H_SUCCESS) { - ret = ehca2ib_return_code(h_ret); - ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%lli " - "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num); - goto modify_qp_exit2; - } - - if ((my_qp->qp_type == IB_QPT_UD || - my_qp->qp_type == IB_QPT_GSI || - my_qp->qp_type == IB_QPT_SMI) && - statetrans == IB_QPST_SQE2RTS) { - /* doorbell to reprocessing wqes */ - iosync(); /* serialize GAL register access */ - hipz_update_sqa(my_qp, bad_wqe_cnt-1); - ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt); - } - - if (statetrans == IB_QPST_RESET2INIT || - statetrans == IB_QPST_INIT2INIT) { - mqpcb->qp_enable = 1; - mqpcb->qp_state = EHCA_QPS_INIT; - update_mask = 0; - update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); - - h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - update_mask, - mqpcb, - my_qp->galpas.kernel); - - if (h_ret != H_SUCCESS) { - ret = ehca2ib_return_code(h_ret); - ehca_err(ibqp->device, "ENABLE in context of " - "RESET_2_INIT failed! Maybe you didn't get " - "a LID h_ret=%lli ehca_qp=%p qp_num=%x", - h_ret, my_qp, ibqp->qp_num); - goto modify_qp_exit2; - } - } - if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR) - && !is_user) { - ret = check_for_left_cqes(my_qp, shca); - if (ret) - goto modify_qp_exit2; - } - - if (statetrans == IB_QPST_ANY2RESET) { - ipz_qeit_reset(&my_qp->ipz_rqueue); - ipz_qeit_reset(&my_qp->ipz_squeue); - - if (qp_cur_state == IB_QPS_ERR && !is_user) { - del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); - - if (HAS_RQ(my_qp)) - del_from_err_list(my_qp->recv_cq, - &my_qp->rq_err_node); - } - if (!is_user) - reset_queue_map(&my_qp->sq_map); - - if (HAS_RQ(my_qp) && !is_user) - reset_queue_map(&my_qp->rq_map); - } - - if (attr_mask & IB_QP_QKEY) - my_qp->qkey = attr->qkey; - -modify_qp_exit2: - if (squeue_locked) { /* this means: sqe -> rts */ - spin_unlock_irqrestore(&my_qp->spinlock_s, flags); - my_qp->sqerr_purgeflag = 1; - } - -modify_qp_exit1: - ehca_free_fw_ctrlblock(mqpcb); - - return ret; -} - -int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, - struct ib_udata *udata) -{ - int ret = 0; - - struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, - ib_device); - struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); - - /* The if-block below caches qp_attr to be modified for GSI and SMI - * qps during the initialization by ib_mad. When the respective port - * is activated, ie we got an event PORT_ACTIVE, we'll replay the - * cached modify calls sequence, see ehca_recover_sqs() below. - * Why that is required: - * 1) If one port is connected, older code requires that port one - * to be connected and module option nr_ports=1 to be given by - * user, which is very inconvenient for end user. - * 2) Firmware accepts modify_qp() only if respective port has become - * active. Older code had a wait loop of 30sec create_qp()/ - * define_aqp1(), which is not appropriate in practice. This - * code now removes that wait loop, see define_aqp1(), and always - * reports all ports to ib_mad resp. users. Only activated ports - * will then usable for the users. - */ - if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) { - int port = my_qp->init_attr.port_num; - struct ehca_sport *sport = &shca->sport[port - 1]; - unsigned long flags; - spin_lock_irqsave(&sport->mod_sqp_lock, flags); - /* cache qp_attr only during init */ - if (my_qp->mod_qp_parm) { - struct ehca_mod_qp_parm *p; - if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) { - ehca_err(&shca->ib_device, - "mod_qp_parm overflow state=%x port=%x" - " type=%x", attr->qp_state, - my_qp->init_attr.port_num, - ibqp->qp_type); - spin_unlock_irqrestore(&sport->mod_sqp_lock, - flags); - return -EINVAL; - } - p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx]; - p->mask = attr_mask; - p->attr = *attr; - my_qp->mod_qp_parm_idx++; - ehca_dbg(&shca->ib_device, - "Saved qp_attr for state=%x port=%x type=%x", - attr->qp_state, my_qp->init_attr.port_num, - ibqp->qp_type); - spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - goto out; - } - spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - } - - ret = internal_modify_qp(ibqp, attr, attr_mask, 0); - -out: - if ((ret == 0) && (attr_mask & IB_QP_STATE)) - my_qp->state = attr->qp_state; - - return ret; -} - -void ehca_recover_sqp(struct ib_qp *sqp) -{ - struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp); - int port = my_sqp->init_attr.port_num; - struct ib_qp_attr attr; - struct ehca_mod_qp_parm *qp_parm; - int i, qp_parm_idx, ret; - unsigned long flags, wr_cnt; - - if (!my_sqp->mod_qp_parm) - return; - ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num); - - qp_parm = my_sqp->mod_qp_parm; - qp_parm_idx = my_sqp->mod_qp_parm_idx; - for (i = 0; i < qp_parm_idx; i++) { - attr = qp_parm[i].attr; - ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0); - if (ret) { - ehca_err(sqp->device, "Could not modify SQP port=%x " - "qp_num=%x ret=%x", port, sqp->qp_num, ret); - goto free_qp_parm; - } - ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x", - port, sqp->qp_num, attr.qp_state); - } - - /* re-trigger posted recv wrs */ - wr_cnt = my_sqp->ipz_rqueue.current_q_offset / - my_sqp->ipz_rqueue.qe_size; - if (wr_cnt) { - spin_lock_irqsave(&my_sqp->spinlock_r, flags); - hipz_update_rqa(my_sqp, wr_cnt); - spin_unlock_irqrestore(&my_sqp->spinlock_r, flags); - ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx", - port, sqp->qp_num, wr_cnt); - } - -free_qp_parm: - kfree(qp_parm); - /* this prevents subsequent calls to modify_qp() to cache qp_attr */ - my_sqp->mod_qp_parm = NULL; -} - -int ehca_query_qp(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, - int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) -{ - struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); - struct ehca_shca *shca = container_of(qp->device, struct ehca_shca, - ib_device); - struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; - struct hcp_modify_qp_control_block *qpcb; - int cnt, ret = 0; - u64 h_ret; - - if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) { - ehca_err(qp->device, "Invalid attribute mask " - "ehca_qp=%p qp_num=%x qp_attr_mask=%x ", - my_qp, qp->qp_num, qp_attr_mask); - return -EINVAL; - } - - qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!qpcb) { - ehca_err(qp->device, "Out of memory for qpcb " - "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); - return -ENOMEM; - } - - h_ret = hipz_h_query_qp(adapter_handle, - my_qp->ipz_qp_handle, - &my_qp->pf, - qpcb, my_qp->galpas.kernel); - - if (h_ret != H_SUCCESS) { - ret = ehca2ib_return_code(h_ret); - ehca_err(qp->device, "hipz_h_query_qp() failed " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, qp->qp_num, h_ret); - goto query_qp_exit1; - } - - qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state); - qp_attr->qp_state = qp_attr->cur_qp_state; - - if (qp_attr->cur_qp_state == -EINVAL) { - ret = -EINVAL; - ehca_err(qp->device, "Got invalid ehca_qp_state=%x " - "ehca_qp=%p qp_num=%x", - qpcb->qp_state, my_qp, qp->qp_num); - goto query_qp_exit1; - } - - if (qp_attr->qp_state == IB_QPS_SQD) - qp_attr->sq_draining = 1; - - qp_attr->qkey = qpcb->qkey; - qp_attr->path_mtu = qpcb->path_mtu; - qp_attr->path_mig_state = qpcb->path_migration_state - 1; - qp_attr->rq_psn = qpcb->receive_psn; - qp_attr->sq_psn = qpcb->send_psn; - qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field; - qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1; - qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1; - /* UD_AV CIRCUMVENTION */ - if (my_qp->qp_type == IB_QPT_UD) { - qp_attr->cap.max_send_sge = - qpcb->actual_nr_sges_in_sq_wqe - 2; - qp_attr->cap.max_recv_sge = - qpcb->actual_nr_sges_in_rq_wqe - 2; - } else { - qp_attr->cap.max_send_sge = - qpcb->actual_nr_sges_in_sq_wqe; - qp_attr->cap.max_recv_sge = - qpcb->actual_nr_sges_in_rq_wqe; - } - - qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size; - qp_attr->dest_qp_num = qpcb->dest_qp_nr; - - qp_attr->pkey_index = qpcb->prim_p_key_idx; - qp_attr->port_num = qpcb->prim_phys_port; - qp_attr->timeout = qpcb->timeout; - qp_attr->retry_cnt = qpcb->retry_count; - qp_attr->rnr_retry = qpcb->rnr_retry_count; - - qp_attr->alt_pkey_index = qpcb->alt_p_key_idx; - qp_attr->alt_port_num = qpcb->alt_phys_port; - qp_attr->alt_timeout = qpcb->timeout_al; - - qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res; - qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp; - - /* primary av */ - qp_attr->ah_attr.sl = qpcb->service_level; - - if (qpcb->send_grh_flag) { - qp_attr->ah_attr.ah_flags = IB_AH_GRH; - } - - qp_attr->ah_attr.static_rate = qpcb->max_static_rate; - qp_attr->ah_attr.dlid = qpcb->dlid; - qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits; - qp_attr->ah_attr.port_num = qp_attr->port_num; - - /* primary GRH */ - qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class; - qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit; - qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx; - qp_attr->ah_attr.grh.flow_label = qpcb->flow_label; - - for (cnt = 0; cnt < 16; cnt++) - qp_attr->ah_attr.grh.dgid.raw[cnt] = - qpcb->dest_gid.byte[cnt]; - - /* alternate AV */ - qp_attr->alt_ah_attr.sl = qpcb->service_level_al; - if (qpcb->send_grh_flag_al) { - qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH; - } - - qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al; - qp_attr->alt_ah_attr.dlid = qpcb->dlid_al; - qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al; - - /* alternate GRH */ - qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al; - qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al; - qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al; - qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al; - - for (cnt = 0; cnt < 16; cnt++) - qp_attr->alt_ah_attr.grh.dgid.raw[cnt] = - qpcb->dest_gid_al.byte[cnt]; - - /* return init attributes given in ehca_create_qp */ - if (qp_init_attr) - *qp_init_attr = my_qp->init_attr; - - if (ehca_debug_level >= 2) - ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num); - -query_qp_exit1: - ehca_free_fw_ctrlblock(qpcb); - - return ret; -} - -int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, - enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) -{ - struct ehca_qp *my_qp = - container_of(ibsrq, struct ehca_qp, ib_srq); - struct ehca_shca *shca = - container_of(ibsrq->pd->device, struct ehca_shca, ib_device); - struct hcp_modify_qp_control_block *mqpcb; - u64 update_mask; - u64 h_ret; - int ret = 0; - - mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!mqpcb) { - ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb " - "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); - return -ENOMEM; - } - - update_mask = 0; - if (attr_mask & IB_SRQ_LIMIT) { - attr_mask &= ~IB_SRQ_LIMIT; - update_mask |= - EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1) - | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1); - mqpcb->curr_srq_limit = attr->srq_limit; - mqpcb->qp_aff_asyn_ev_log_reg = - EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1); - } - - /* by now, all bits in attr_mask should have been cleared */ - if (attr_mask) { - ehca_err(ibsrq->device, "invalid attribute mask bits set " - "attr_mask=%x", attr_mask); - ret = -EINVAL; - goto modify_srq_exit0; - } - - if (ehca_debug_level >= 2) - ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); - - h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle, - NULL, update_mask, mqpcb, - my_qp->galpas.kernel); - - if (h_ret != H_SUCCESS) { - ret = ehca2ib_return_code(h_ret); - ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%lli " - "ehca_qp=%p qp_num=%x", - h_ret, my_qp, my_qp->real_qp_num); - } - -modify_srq_exit0: - ehca_free_fw_ctrlblock(mqpcb); - - return ret; -} - -int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) -{ - struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq); - struct ehca_shca *shca = container_of(srq->device, struct ehca_shca, - ib_device); - struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; - struct hcp_modify_qp_control_block *qpcb; - int ret = 0; - u64 h_ret; - - qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); - if (!qpcb) { - ehca_err(srq->device, "Out of memory for qpcb " - "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num); - return -ENOMEM; - } - - h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle, - NULL, qpcb, my_qp->galpas.kernel); - - if (h_ret != H_SUCCESS) { - ret = ehca2ib_return_code(h_ret); - ehca_err(srq->device, "hipz_h_query_qp() failed " - "ehca_qp=%p qp_num=%x h_ret=%lli", - my_qp, my_qp->real_qp_num, h_ret); - goto query_srq_exit1; - } - - srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1; - srq_attr->max_sge = 3; - srq_attr->srq_limit = qpcb->curr_srq_limit; - - if (ehca_debug_level >= 2) - ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); - -query_srq_exit1: - ehca_free_fw_ctrlblock(qpcb); - - return ret; -} - -static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, - struct ib_uobject *uobject) -{ - struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device); - struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, - ib_pd); - struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1]; - u32 qp_num = my_qp->real_qp_num; - int ret; - u64 h_ret; - u8 port_num; - int is_user = 0; - enum ib_qp_type qp_type; - unsigned long flags; - - if (uobject) { - is_user = 1; - if (my_qp->mm_count_galpa || - my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { - ehca_err(dev, "Resources still referenced in " - "user space qp_num=%x", qp_num); - return -EINVAL; - } - } - - if (my_qp->send_cq) { - ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num); - if (ret) { - ehca_err(dev, "Couldn't unassign qp from " - "send_cq ret=%i qp_num=%x cq_num=%x", ret, - qp_num, my_qp->send_cq->cq_number); - return ret; - } - } - - write_lock_irqsave(&ehca_qp_idr_lock, flags); - idr_remove(&ehca_qp_idr, my_qp->token); - write_unlock_irqrestore(&ehca_qp_idr_lock, flags); - - /* - * SRQs will never get into an error list and do not have a recv_cq, - * so we need to skip them here. - */ - if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user) - del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); - - if (HAS_SQ(my_qp) && !is_user) - del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); - - /* now wait until all pending events have completed */ - wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events)); - - h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); - if (h_ret != H_SUCCESS) { - ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%lli " - "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num); - return ehca2ib_return_code(h_ret); - } - - port_num = my_qp->init_attr.port_num; - qp_type = my_qp->init_attr.qp_type; - - if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { - spin_lock_irqsave(&sport->mod_sqp_lock, flags); - kfree(my_qp->mod_qp_parm); - my_qp->mod_qp_parm = NULL; - shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL; - spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); - } - - /* no support for IB_QPT_SMI yet */ - if (qp_type == IB_QPT_GSI) { - struct ib_event event; - ehca_info(dev, "device %s: port %x is inactive.", - shca->ib_device.name, port_num); - event.device = &shca->ib_device; - event.event = IB_EVENT_PORT_ERR; - event.element.port_num = port_num; - shca->sport[port_num - 1].port_state = IB_PORT_DOWN; - ib_dispatch_event(&event); - } - - if (HAS_RQ(my_qp)) { - ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); - if (!is_user) - vfree(my_qp->rq_map.map); - } - if (HAS_SQ(my_qp)) { - ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); - if (!is_user) - vfree(my_qp->sq_map.map); - } - kmem_cache_free(qp_cache, my_qp); - atomic_dec(&shca->num_qps); - return 0; -} - -int ehca_destroy_qp(struct ib_qp *qp) -{ - return internal_destroy_qp(qp->device, - container_of(qp, struct ehca_qp, ib_qp), - qp->uobject); -} - -int ehca_destroy_srq(struct ib_srq *srq) -{ - return internal_destroy_qp(srq->device, - container_of(srq, struct ehca_qp, ib_srq), - srq->uobject); -} - -int ehca_init_qp_cache(void) -{ - qp_cache = kmem_cache_create("ehca_cache_qp", - sizeof(struct ehca_qp), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!qp_cache) - return -ENOMEM; - return 0; -} - -void ehca_cleanup_qp_cache(void) -{ - if (qp_cache) - kmem_cache_destroy(qp_cache); -} diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c deleted file mode 100644 index 47f94984353d..000000000000 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ /dev/null @@ -1,953 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * post_send/recv, poll_cq, req_notify - * - * Authors: Hoang-Nam Nguyen - * Waleri Fomin - * Joachim Fenkes - * Reinhard Ernst - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - - -#include "ehca_classes.h" -#include "ehca_tools.h" -#include "ehca_qes.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" -#include "hipz_fns.h" - -/* in RC traffic, insert an empty RDMA READ every this many packets */ -#define ACK_CIRC_THRESHOLD 2000000 - -static u64 replace_wr_id(u64 wr_id, u16 idx) -{ - u64 ret; - - ret = wr_id & ~QMAP_IDX_MASK; - ret |= idx & QMAP_IDX_MASK; - - return ret; -} - -static u16 get_app_wr_id(u64 wr_id) -{ - return wr_id & QMAP_IDX_MASK; -} - -static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, - struct ehca_wqe *wqe_p, - struct ib_recv_wr *recv_wr, - u32 rq_map_idx) -{ - u8 cnt_ds; - if (unlikely((recv_wr->num_sge < 0) || - (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) { - ehca_gen_err("Invalid number of WQE SGE. " - "num_sqe=%x max_nr_of_sg=%x", - recv_wr->num_sge, ipz_rqueue->act_nr_of_sg); - return -EINVAL; /* invalid SG list length */ - } - - /* clear wqe header until sglist */ - memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - - wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx); - wqe_p->nr_of_data_seg = recv_wr->num_sge; - - for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { - wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr = - recv_wr->sg_list[cnt_ds].addr; - wqe_p->u.all_rcv.sg_list[cnt_ds].lkey = - recv_wr->sg_list[cnt_ds].lkey; - wqe_p->u.all_rcv.sg_list[cnt_ds].length = - recv_wr->sg_list[cnt_ds].length; - } - - if (ehca_debug_level >= 3) { - ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", - ipz_rqueue); - ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); - } - - return 0; -} - -#if defined(DEBUG_GSI_SEND_WR) - -/* need ib_mad struct */ -#include - -static void trace_send_wr_ud(const struct ib_send_wr *send_wr) -{ - int idx; - int j; - while (send_wr) { - struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; - struct ib_sge *sge = send_wr->sg_list; - ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x " - "send_flags=%x opcode=%x", idx, send_wr->wr_id, - send_wr->num_sge, send_wr->send_flags, - send_wr->opcode); - if (mad_hdr) { - ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x " - "mgmt_class=%x class_version=%x method=%x " - "status=%x class_specific=%x tid=%lx " - "attr_id=%x resv=%x attr_mod=%x", - idx, mad_hdr->base_version, - mad_hdr->mgmt_class, - mad_hdr->class_version, mad_hdr->method, - mad_hdr->status, mad_hdr->class_specific, - mad_hdr->tid, mad_hdr->attr_id, - mad_hdr->resv, - mad_hdr->attr_mod); - } - for (j = 0; j < send_wr->num_sge; j++) { - u8 *data = __va(sge->addr); - ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " - "lkey=%x", - idx, j, data, sge->length, sge->lkey); - /* assume length is n*16 */ - ehca_dmp(data, sge->length, "send_wr#%x sge#%x", - idx, j); - sge++; - } /* eof for j */ - idx++; - send_wr = send_wr->next; - } /* eof while send_wr */ -} - -#endif /* DEBUG_GSI_SEND_WR */ - -static inline int ehca_write_swqe(struct ehca_qp *qp, - struct ehca_wqe *wqe_p, - const struct ib_send_wr *send_wr, - u32 sq_map_idx, - int hidden) -{ - u32 idx; - u64 dma_length; - struct ehca_av *my_av; - u32 remote_qkey = send_wr->wr.ud.remote_qkey; - struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx]; - - if (unlikely((send_wr->num_sge < 0) || - (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { - ehca_gen_err("Invalid number of WQE SGE. " - "num_sqe=%x max_nr_of_sg=%x", - send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg); - return -EINVAL; /* invalid SG list length */ - } - - /* clear wqe header until sglist */ - memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); - - wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx); - - qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); - qmap_entry->reported = 0; - qmap_entry->cqe_req = 0; - - switch (send_wr->opcode) { - case IB_WR_SEND: - case IB_WR_SEND_WITH_IMM: - wqe_p->optype = WQE_OPTYPE_SEND; - break; - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - wqe_p->optype = WQE_OPTYPE_RDMAWRITE; - break; - case IB_WR_RDMA_READ: - wqe_p->optype = WQE_OPTYPE_RDMAREAD; - break; - default: - ehca_gen_err("Invalid opcode=%x", send_wr->opcode); - return -EINVAL; /* invalid opcode */ - } - - wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE; - - wqe_p->wr_flag = 0; - - if ((send_wr->send_flags & IB_SEND_SIGNALED || - qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR) - && !hidden) { - wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; - qmap_entry->cqe_req = 1; - } - - if (send_wr->opcode == IB_WR_SEND_WITH_IMM || - send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { - /* this might not work as long as HW does not support it */ - wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data); - wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT; - } - - wqe_p->nr_of_data_seg = send_wr->num_sge; - - switch (qp->qp_type) { - case IB_QPT_SMI: - case IB_QPT_GSI: - /* no break is intential here */ - case IB_QPT_UD: - /* IB 1.2 spec C10-15 compliance */ - if (send_wr->wr.ud.remote_qkey & 0x80000000) - remote_qkey = qp->qkey; - - wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; - wqe_p->local_ee_context_qkey = remote_qkey; - if (unlikely(!send_wr->wr.ud.ah)) { - ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); - return -EINVAL; - } - if (unlikely(send_wr->wr.ud.remote_qpn == 0)) { - ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num); - return -EINVAL; - } - my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah); - wqe_p->u.ud_av.ud_av = my_av->av; - - /* - * omitted check of IB_SEND_INLINE - * since HW does not support it - */ - for (idx = 0; idx < send_wr->num_sge; idx++) { - wqe_p->u.ud_av.sg_list[idx].vaddr = - send_wr->sg_list[idx].addr; - wqe_p->u.ud_av.sg_list[idx].lkey = - send_wr->sg_list[idx].lkey; - wqe_p->u.ud_av.sg_list[idx].length = - send_wr->sg_list[idx].length; - } /* eof for idx */ - if (qp->qp_type == IB_QPT_SMI || - qp->qp_type == IB_QPT_GSI) - wqe_p->u.ud_av.ud_av.pmtu = 1; - if (qp->qp_type == IB_QPT_GSI) { - wqe_p->pkeyi = send_wr->wr.ud.pkey_index; -#ifdef DEBUG_GSI_SEND_WR - trace_send_wr_ud(send_wr); -#endif /* DEBUG_GSI_SEND_WR */ - } - break; - - case IB_QPT_UC: - if (send_wr->send_flags & IB_SEND_FENCE) - wqe_p->wr_flag |= WQE_WRFLAG_FENCE; - /* no break is intentional here */ - case IB_QPT_RC: - /* TODO: atomic not implemented */ - wqe_p->u.nud.remote_virtual_address = - send_wr->wr.rdma.remote_addr; - wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey; - - /* - * omitted checking of IB_SEND_INLINE - * since HW does not support it - */ - dma_length = 0; - for (idx = 0; idx < send_wr->num_sge; idx++) { - wqe_p->u.nud.sg_list[idx].vaddr = - send_wr->sg_list[idx].addr; - wqe_p->u.nud.sg_list[idx].lkey = - send_wr->sg_list[idx].lkey; - wqe_p->u.nud.sg_list[idx].length = - send_wr->sg_list[idx].length; - dma_length += send_wr->sg_list[idx].length; - } /* eof idx */ - wqe_p->u.nud.atomic_1st_op_dma_len = dma_length; - - /* unsolicited ack circumvention */ - if (send_wr->opcode == IB_WR_RDMA_READ) { - /* on RDMA read, switch on and reset counters */ - qp->message_count = qp->packet_count = 0; - qp->unsol_ack_circ = 1; - } else - /* else estimate #packets */ - qp->packet_count += (dma_length >> qp->mtu_shift) + 1; - - break; - - default: - ehca_gen_err("Invalid qptype=%x", qp->qp_type); - return -EINVAL; - } - - if (ehca_debug_level >= 3) { - ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp); - ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe"); - } - return 0; -} - -/* map_ib_wc_status converts raw cqe_status to ib_wc_status */ -static inline void map_ib_wc_status(u32 cqe_status, - enum ib_wc_status *wc_status) -{ - if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) { - switch (cqe_status & 0x3F) { - case 0x01: - case 0x21: - *wc_status = IB_WC_LOC_LEN_ERR; - break; - case 0x02: - case 0x22: - *wc_status = IB_WC_LOC_QP_OP_ERR; - break; - case 0x03: - case 0x23: - *wc_status = IB_WC_LOC_EEC_OP_ERR; - break; - case 0x04: - case 0x24: - *wc_status = IB_WC_LOC_PROT_ERR; - break; - case 0x05: - case 0x25: - *wc_status = IB_WC_WR_FLUSH_ERR; - break; - case 0x06: - *wc_status = IB_WC_MW_BIND_ERR; - break; - case 0x07: /* remote error - look into bits 20:24 */ - switch ((cqe_status - & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) { - case 0x0: - /* - * PSN Sequence Error! - * couldn't find a matching status! - */ - *wc_status = IB_WC_GENERAL_ERR; - break; - case 0x1: - *wc_status = IB_WC_REM_INV_REQ_ERR; - break; - case 0x2: - *wc_status = IB_WC_REM_ACCESS_ERR; - break; - case 0x3: - *wc_status = IB_WC_REM_OP_ERR; - break; - case 0x4: - *wc_status = IB_WC_REM_INV_RD_REQ_ERR; - break; - } - break; - case 0x08: - *wc_status = IB_WC_RETRY_EXC_ERR; - break; - case 0x09: - *wc_status = IB_WC_RNR_RETRY_EXC_ERR; - break; - case 0x0A: - case 0x2D: - *wc_status = IB_WC_REM_ABORT_ERR; - break; - case 0x0B: - case 0x2E: - *wc_status = IB_WC_INV_EECN_ERR; - break; - case 0x0C: - case 0x2F: - *wc_status = IB_WC_INV_EEC_STATE_ERR; - break; - case 0x0D: - *wc_status = IB_WC_BAD_RESP_ERR; - break; - case 0x10: - /* WQE purged */ - *wc_status = IB_WC_WR_FLUSH_ERR; - break; - default: - *wc_status = IB_WC_FATAL_ERR; - - } - } else - *wc_status = IB_WC_SUCCESS; -} - -static inline int post_one_send(struct ehca_qp *my_qp, - struct ib_send_wr *cur_send_wr, - int hidden) -{ - struct ehca_wqe *wqe_p; - int ret; - u32 sq_map_idx; - u64 start_offset = my_qp->ipz_squeue.current_q_offset; - - /* get pointer next to free WQE */ - wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); - if (unlikely(!wqe_p)) { - /* too many posted work requests: queue overflow */ - ehca_err(my_qp->ib_qp.device, "Too many posted WQEs " - "qp_num=%x", my_qp->ib_qp.qp_num); - return -ENOMEM; - } - - /* - * Get the index of the WQE in the send queue. The same index is used - * for writing into the sq_map. - */ - sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size; - - /* write a SEND WQE into the QUEUE */ - ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden); - /* - * if something failed, - * reset the free entry pointer to the start value - */ - if (unlikely(ret)) { - my_qp->ipz_squeue.current_q_offset = start_offset; - ehca_err(my_qp->ib_qp.device, "Could not write WQE " - "qp_num=%x", my_qp->ib_qp.qp_num); - return -EINVAL; - } - - return 0; -} - -int ehca_post_send(struct ib_qp *qp, - struct ib_send_wr *send_wr, - struct ib_send_wr **bad_send_wr) -{ - struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); - int wqe_cnt = 0; - int ret = 0; - unsigned long flags; - - /* Reject WR if QP is in RESET, INIT or RTR state */ - if (unlikely(my_qp->state < IB_QPS_RTS)) { - ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", - my_qp->state, qp->qp_num); - ret = -EINVAL; - goto out; - } - - /* LOCK the QUEUE */ - spin_lock_irqsave(&my_qp->spinlock_s, flags); - - /* Send an empty extra RDMA read if: - * 1) there has been an RDMA read on this connection before - * 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets - * 3) we can be sure that any previous extra RDMA read has been - * processed so we don't overflow the SQ - */ - if (unlikely(my_qp->unsol_ack_circ && - my_qp->packet_count > ACK_CIRC_THRESHOLD && - my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) { - /* insert an empty RDMA READ to fix up the remote QP state */ - struct ib_send_wr circ_wr; - memset(&circ_wr, 0, sizeof(circ_wr)); - circ_wr.opcode = IB_WR_RDMA_READ; - post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */ - wqe_cnt++; - ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num); - my_qp->message_count = my_qp->packet_count = 0; - } - - /* loop processes list of send reqs */ - while (send_wr) { - ret = post_one_send(my_qp, send_wr, 0); - if (unlikely(ret)) { - goto post_send_exit0; - } - wqe_cnt++; - send_wr = send_wr->next; - } - -post_send_exit0: - iosync(); /* serialize GAL register access */ - hipz_update_sqa(my_qp, wqe_cnt); - if (unlikely(ret || ehca_debug_level >= 2)) - ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", - my_qp, qp->qp_num, wqe_cnt, ret); - my_qp->message_count += wqe_cnt; - spin_unlock_irqrestore(&my_qp->spinlock_s, flags); - -out: - if (ret) - *bad_send_wr = send_wr; - return ret; -} - -static int internal_post_recv(struct ehca_qp *my_qp, - struct ib_device *dev, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) -{ - struct ehca_wqe *wqe_p; - int wqe_cnt = 0; - int ret = 0; - u32 rq_map_idx; - unsigned long flags; - struct ehca_qmap_entry *qmap_entry; - - if (unlikely(!HAS_RQ(my_qp))) { - ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", - my_qp, my_qp->real_qp_num, my_qp->ext_type); - ret = -ENODEV; - goto out; - } - - /* LOCK the QUEUE */ - spin_lock_irqsave(&my_qp->spinlock_r, flags); - - /* loop processes list of recv reqs */ - while (recv_wr) { - u64 start_offset = my_qp->ipz_rqueue.current_q_offset; - /* get pointer next to free WQE */ - wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue); - if (unlikely(!wqe_p)) { - /* too many posted work requests: queue overflow */ - ret = -ENOMEM; - ehca_err(dev, "Too many posted WQEs " - "qp_num=%x", my_qp->real_qp_num); - goto post_recv_exit0; - } - /* - * Get the index of the WQE in the recv queue. The same index - * is used for writing into the rq_map. - */ - rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size; - - /* write a RECV WQE into the QUEUE */ - ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr, - rq_map_idx); - /* - * if something failed, - * reset the free entry pointer to the start value - */ - if (unlikely(ret)) { - my_qp->ipz_rqueue.current_q_offset = start_offset; - ret = -EINVAL; - ehca_err(dev, "Could not write WQE " - "qp_num=%x", my_qp->real_qp_num); - goto post_recv_exit0; - } - - qmap_entry = &my_qp->rq_map.map[rq_map_idx]; - qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id); - qmap_entry->reported = 0; - qmap_entry->cqe_req = 1; - - wqe_cnt++; - recv_wr = recv_wr->next; - } /* eof for recv_wr */ - -post_recv_exit0: - iosync(); /* serialize GAL register access */ - hipz_update_rqa(my_qp, wqe_cnt); - if (unlikely(ret || ehca_debug_level >= 2)) - ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", - my_qp, my_qp->real_qp_num, wqe_cnt, ret); - spin_unlock_irqrestore(&my_qp->spinlock_r, flags); - -out: - if (ret) - *bad_recv_wr = recv_wr; - - return ret; -} - -int ehca_post_recv(struct ib_qp *qp, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) -{ - struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); - - /* Reject WR if QP is in RESET state */ - if (unlikely(my_qp->state == IB_QPS_RESET)) { - ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", - my_qp->state, qp->qp_num); - *bad_recv_wr = recv_wr; - return -EINVAL; - } - - return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr); -} - -int ehca_post_srq_recv(struct ib_srq *srq, - struct ib_recv_wr *recv_wr, - struct ib_recv_wr **bad_recv_wr) -{ - return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq), - srq->device, recv_wr, bad_recv_wr); -} - -/* - * ib_wc_opcode table converts ehca wc opcode to ib - * Since we use zero to indicate invalid opcode, the actual ib opcode must - * be decremented!!! - */ -static const u8 ib_wc_opcode[255] = { - [0x01] = IB_WC_RECV+1, - [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1, - [0x04] = IB_WC_BIND_MW+1, - [0x08] = IB_WC_FETCH_ADD+1, - [0x10] = IB_WC_COMP_SWAP+1, - [0x20] = IB_WC_RDMA_WRITE+1, - [0x40] = IB_WC_RDMA_READ+1, - [0x80] = IB_WC_SEND+1 -}; - -/* internal function to poll one entry of cq */ -static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) -{ - int ret = 0, qmap_tail_idx; - struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); - struct ehca_cqe *cqe; - struct ehca_qp *my_qp; - struct ehca_qmap_entry *qmap_entry; - struct ehca_queue_map *qmap; - int cqe_count = 0, is_error; - -repoll: - cqe = (struct ehca_cqe *) - ipz_qeit_get_inc_valid(&my_cq->ipz_queue); - if (!cqe) { - ret = -EAGAIN; - if (ehca_debug_level >= 3) - ehca_dbg(cq->device, "Completion queue is empty " - "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number); - goto poll_cq_one_exit0; - } - - /* prevents loads being reordered across this point */ - rmb(); - - cqe_count++; - if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { - struct ehca_qp *qp; - int purgeflag; - unsigned long flags; - - qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number); - if (!qp) { - ehca_err(cq->device, "cq_num=%x qp_num=%x " - "could not find qp -> ignore cqe", - my_cq->cq_number, cqe->local_qp_number); - ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x", - my_cq->cq_number, cqe->local_qp_number); - /* ignore this purged cqe */ - goto repoll; - } - spin_lock_irqsave(&qp->spinlock_s, flags); - purgeflag = qp->sqerr_purgeflag; - spin_unlock_irqrestore(&qp->spinlock_s, flags); - - if (purgeflag) { - ehca_dbg(cq->device, - "Got CQE with purged bit qp_num=%x src_qp=%x", - cqe->local_qp_number, cqe->remote_qp_number); - if (ehca_debug_level >= 2) - ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x", - cqe->local_qp_number, - cqe->remote_qp_number); - /* - * ignore this to avoid double cqes of bad wqe - * that caused sqe and turn off purge flag - */ - qp->sqerr_purgeflag = 0; - goto repoll; - } - } - - is_error = cqe->status & WC_STATUS_ERROR_BIT; - - /* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */ - if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) { - ehca_dbg(cq->device, - "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----", - is_error ? "ERROR " : "", my_cq, my_cq->cq_number); - ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", - my_cq, my_cq->cq_number); - ehca_dbg(cq->device, - "ehca_cq=%p cq_num=%x -------------------------", - my_cq, my_cq->cq_number); - } - - read_lock(&ehca_qp_idr_lock); - my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); - read_unlock(&ehca_qp_idr_lock); - if (!my_qp) - goto repoll; - wc->qp = &my_qp->ib_qp; - - qmap_tail_idx = get_app_wr_id(cqe->work_request_id); - if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) - /* We got a send completion. */ - qmap = &my_qp->sq_map; - else - /* We got a receive completion. */ - qmap = &my_qp->rq_map; - - /* advance the tail pointer */ - qmap->tail = qmap_tail_idx; - - if (is_error) { - /* - * set left_to_poll to 0 because in error state, we will not - * get any additional CQEs - */ - my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, - my_qp->sq_map.entries); - my_qp->sq_map.left_to_poll = 0; - ehca_add_to_err_list(my_qp, 1); - - my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, - my_qp->rq_map.entries); - my_qp->rq_map.left_to_poll = 0; - if (HAS_RQ(my_qp)) - ehca_add_to_err_list(my_qp, 0); - } - - qmap_entry = &qmap->map[qmap_tail_idx]; - if (qmap_entry->reported) { - ehca_warn(cq->device, "Double cqe on qp_num=%#x", - my_qp->real_qp_num); - /* found a double cqe, discard it and read next one */ - goto repoll; - } - - wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); - qmap_entry->reported = 1; - - /* if left_to_poll is decremented to 0, add the QP to the error list */ - if (qmap->left_to_poll > 0) { - qmap->left_to_poll--; - if ((my_qp->sq_map.left_to_poll == 0) && - (my_qp->rq_map.left_to_poll == 0)) { - ehca_add_to_err_list(my_qp, 1); - if (HAS_RQ(my_qp)) - ehca_add_to_err_list(my_qp, 0); - } - } - - /* eval ib_wc_opcode */ - wc->opcode = ib_wc_opcode[cqe->optype]-1; - if (unlikely(wc->opcode == -1)) { - ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x " - "ehca_cq=%p cq_num=%x", - cqe->optype, cqe->status, my_cq, my_cq->cq_number); - /* dump cqe for other infos */ - ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", - my_cq, my_cq->cq_number); - /* update also queue adder to throw away this entry!!! */ - goto repoll; - } - - /* eval ib_wc_status */ - if (unlikely(is_error)) { - /* complete with errors */ - map_ib_wc_status(cqe->status, &wc->status); - wc->vendor_err = wc->status; - } else - wc->status = IB_WC_SUCCESS; - - wc->byte_len = cqe->nr_bytes_transferred; - wc->pkey_index = cqe->pkey_index; - wc->slid = cqe->rlid; - wc->dlid_path_bits = cqe->dlid; - wc->src_qp = cqe->remote_qp_number; - /* - * HW has "Immed data present" and "GRH present" in bits 6 and 5. - * SW defines those in bits 1 and 0, so we can just shift and mask. - */ - wc->wc_flags = (cqe->w_completion_flags >> 5) & 3; - wc->ex.imm_data = cpu_to_be32(cqe->immediate_data); - wc->sl = cqe->service_level; - -poll_cq_one_exit0: - if (cqe_count > 0) - hipz_update_feca(my_cq, cqe_count); - - return ret; -} - -static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq, - struct ib_wc *wc, int num_entries, - struct ipz_queue *ipz_queue, int on_sq) -{ - int nr = 0; - struct ehca_wqe *wqe; - u64 offset; - struct ehca_queue_map *qmap; - struct ehca_qmap_entry *qmap_entry; - - if (on_sq) - qmap = &my_qp->sq_map; - else - qmap = &my_qp->rq_map; - - qmap_entry = &qmap->map[qmap->next_wqe_idx]; - - while ((nr < num_entries) && (qmap_entry->reported == 0)) { - /* generate flush CQE */ - - memset(wc, 0, sizeof(*wc)); - - offset = qmap->next_wqe_idx * ipz_queue->qe_size; - wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); - if (!wqe) { - ehca_err(cq->device, "Invalid wqe offset=%#llx on " - "qp_num=%#x", offset, my_qp->real_qp_num); - return nr; - } - - wc->wr_id = replace_wr_id(wqe->work_request_id, - qmap_entry->app_wr_id); - - if (on_sq) { - switch (wqe->optype) { - case WQE_OPTYPE_SEND: - wc->opcode = IB_WC_SEND; - break; - case WQE_OPTYPE_RDMAWRITE: - wc->opcode = IB_WC_RDMA_WRITE; - break; - case WQE_OPTYPE_RDMAREAD: - wc->opcode = IB_WC_RDMA_READ; - break; - default: - ehca_err(cq->device, "Invalid optype=%x", - wqe->optype); - return nr; - } - } else - wc->opcode = IB_WC_RECV; - - if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) { - wc->ex.imm_data = wqe->immediate_data; - wc->wc_flags |= IB_WC_WITH_IMM; - } - - wc->status = IB_WC_WR_FLUSH_ERR; - - wc->qp = &my_qp->ib_qp; - - /* mark as reported and advance next_wqe pointer */ - qmap_entry->reported = 1; - qmap->next_wqe_idx = next_index(qmap->next_wqe_idx, - qmap->entries); - qmap_entry = &qmap->map[qmap->next_wqe_idx]; - - wc++; nr++; - } - - return nr; - -} - -int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) -{ - struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); - int nr; - struct ehca_qp *err_qp; - struct ib_wc *current_wc = wc; - int ret = 0; - unsigned long flags; - int entries_left = num_entries; - - if (num_entries < 1) { - ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " - "cq_num=%x", num_entries, my_cq, my_cq->cq_number); - ret = -EINVAL; - goto poll_cq_exit0; - } - - spin_lock_irqsave(&my_cq->spinlock, flags); - - /* generate flush cqes for send queues */ - list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) { - nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, - &err_qp->ipz_squeue, 1); - entries_left -= nr; - current_wc += nr; - - if (entries_left == 0) - break; - } - - /* generate flush cqes for receive queues */ - list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) { - nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, - &err_qp->ipz_rqueue, 0); - entries_left -= nr; - current_wc += nr; - - if (entries_left == 0) - break; - } - - for (nr = 0; nr < entries_left; nr++) { - ret = ehca_poll_cq_one(cq, current_wc); - if (ret) - break; - current_wc++; - } /* eof for nr */ - entries_left -= nr; - - spin_unlock_irqrestore(&my_cq->spinlock, flags); - if (ret == -EAGAIN || !ret) - ret = num_entries - entries_left; - -poll_cq_exit0: - return ret; -} - -int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags) -{ - struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); - int ret = 0; - - switch (notify_flags & IB_CQ_SOLICITED_MASK) { - case IB_CQ_SOLICITED: - hipz_set_cqx_n0(my_cq, 1); - break; - case IB_CQ_NEXT_COMP: - hipz_set_cqx_n1(my_cq, 1); - break; - default: - return -EINVAL; - } - - if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { - unsigned long spl_flags; - spin_lock_irqsave(&my_cq->spinlock, spl_flags); - ret = ipz_qeit_is_valid(&my_cq->ipz_queue); - spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); - } - - return ret; -} diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c deleted file mode 100644 index 376b031c2c7f..000000000000 --- a/drivers/infiniband/hw/ehca/ehca_sqp.c +++ /dev/null @@ -1,245 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * SQP functions - * - * Authors: Khadija Souissi - * Heiko J Schick - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_classes.h" -#include "ehca_tools.h" -#include "ehca_iverbs.h" -#include "hcp_if.h" - -#define IB_MAD_STATUS_REDIRECT cpu_to_be16(0x0002) -#define IB_MAD_STATUS_UNSUP_VERSION cpu_to_be16(0x0004) -#define IB_MAD_STATUS_UNSUP_METHOD cpu_to_be16(0x0008) - -#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001) - -/** - * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue - * pair is created successfully, the corresponding port gets active. - * - * Define Special Queue pair 0 (SMI QP) is still not supported. - * - * @qp_init_attr: Queue pair init attributes with port and queue pair type - */ - -u64 ehca_define_sqp(struct ehca_shca *shca, - struct ehca_qp *ehca_qp, - struct ib_qp_init_attr *qp_init_attr) -{ - u32 pma_qp_nr, bma_qp_nr; - u64 ret; - u8 port = qp_init_attr->port_num; - int counter; - - shca->sport[port - 1].port_state = IB_PORT_DOWN; - - switch (qp_init_attr->qp_type) { - case IB_QPT_SMI: - /* function not supported yet */ - break; - case IB_QPT_GSI: - ret = hipz_h_define_aqp1(shca->ipz_hca_handle, - ehca_qp->ipz_qp_handle, - ehca_qp->galpas.kernel, - (u32) qp_init_attr->port_num, - &pma_qp_nr, &bma_qp_nr); - - if (ret != H_SUCCESS) { - ehca_err(&shca->ib_device, - "Can't define AQP1 for port %x. h_ret=%lli", - port, ret); - return ret; - } - shca->sport[port - 1].pma_qp_nr = pma_qp_nr; - ehca_dbg(&shca->ib_device, "port=%x pma_qp_nr=%x", - port, pma_qp_nr); - break; - default: - ehca_err(&shca->ib_device, "invalid qp_type=%x", - qp_init_attr->qp_type); - return H_PARAMETER; - } - - if (ehca_nr_ports < 0) /* autodetect mode */ - return H_SUCCESS; - - for (counter = 0; - shca->sport[port - 1].port_state != IB_PORT_ACTIVE && - counter < ehca_port_act_time; - counter++) { - ehca_dbg(&shca->ib_device, "... wait until port %x is active", - port); - msleep_interruptible(1000); - } - - if (counter == ehca_port_act_time) { - ehca_err(&shca->ib_device, "Port %x is not active.", port); - return H_HARDWARE; - } - - return H_SUCCESS; -} - -struct ib_perf { - struct ib_mad_hdr mad_hdr; - u8 reserved[40]; - u8 data[192]; -} __attribute__ ((packed)); - -/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */ -struct tcslfl { - u32 tc:8; - u32 sl:4; - u32 fl:20; -} __attribute__ ((packed)); - -/* IP Version/TC/FL packed into 32 bits, as in GRH */ -struct vertcfl { - u32 ver:4; - u32 tc:8; - u32 fl:20; -} __attribute__ ((packed)); - -static int ehca_process_perf(struct ib_device *ibdev, u8 port_num, - const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad *in_mad, struct ib_mad *out_mad) -{ - const struct ib_perf *in_perf = (const struct ib_perf *)in_mad; - struct ib_perf *out_perf = (struct ib_perf *)out_mad; - struct ib_class_port_info *poi = - (struct ib_class_port_info *)out_perf->data; - struct tcslfl *tcslfl = - (struct tcslfl *)&poi->redirect_tcslfl; - struct ehca_shca *shca = - container_of(ibdev, struct ehca_shca, ib_device); - struct ehca_sport *sport = &shca->sport[port_num - 1]; - - ehca_dbg(ibdev, "method=%x", in_perf->mad_hdr.method); - - *out_mad = *in_mad; - - if (in_perf->mad_hdr.class_version != 1) { - ehca_warn(ibdev, "Unsupported class_version=%x", - in_perf->mad_hdr.class_version); - out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_VERSION; - goto perf_reply; - } - - switch (in_perf->mad_hdr.method) { - case IB_MGMT_METHOD_GET: - case IB_MGMT_METHOD_SET: - /* set class port info for redirection */ - out_perf->mad_hdr.attr_id = IB_PMA_CLASS_PORT_INFO; - out_perf->mad_hdr.status = IB_MAD_STATUS_REDIRECT; - memset(poi, 0, sizeof(*poi)); - poi->base_version = 1; - poi->class_version = 1; - poi->resp_time_value = 18; - - /* copy local routing information from WC where applicable */ - tcslfl->sl = in_wc->sl; - poi->redirect_lid = - sport->saved_attr.lid | in_wc->dlid_path_bits; - poi->redirect_qp = sport->pma_qp_nr; - poi->redirect_qkey = IB_QP1_QKEY; - - ehca_query_pkey(ibdev, port_num, in_wc->pkey_index, - &poi->redirect_pkey); - - /* if request was globally routed, copy route info */ - if (in_grh) { - const struct vertcfl *vertcfl = - (const struct vertcfl *)&in_grh->version_tclass_flow; - memcpy(poi->redirect_gid, in_grh->dgid.raw, - sizeof(poi->redirect_gid)); - tcslfl->tc = vertcfl->tc; - tcslfl->fl = vertcfl->fl; - } else - /* else only fill in default GID */ - ehca_query_gid(ibdev, port_num, 0, - (union ib_gid *)&poi->redirect_gid); - - ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x", - sport->saved_attr.lid, sport->pma_qp_nr); - break; - - case IB_MGMT_METHOD_GET_RESP: - return IB_MAD_RESULT_FAILURE; - - default: - out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_METHOD; - break; - } - -perf_reply: - out_perf->mad_hdr.method = IB_MGMT_METHOD_GET_RESP; - - return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; -} - -int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, - const struct ib_wc *in_wc, const struct ib_grh *in_grh, - const struct ib_mad_hdr *in, size_t in_mad_size, - struct ib_mad_hdr *out, size_t *out_mad_size, - u16 *out_mad_pkey_index) -{ - int ret; - const struct ib_mad *in_mad = (const struct ib_mad *)in; - struct ib_mad *out_mad = (struct ib_mad *)out; - - if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || - *out_mad_size != sizeof(*out_mad))) - return IB_MAD_RESULT_FAILURE; - - if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc) - return IB_MAD_RESULT_FAILURE; - - /* accept only pma request */ - if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) - return IB_MAD_RESULT_SUCCESS; - - ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp); - ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh, - in_mad, out_mad); - - return ret; -} diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h deleted file mode 100644 index d280b12aae64..000000000000 --- a/drivers/infiniband/hw/ehca/ehca_tools.h +++ /dev/null @@ -1,155 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * auxiliary functions - * - * Authors: Christoph Raisch - * Hoang-Nam Nguyen - * Khadija Souissi - * Waleri Fomin - * Heiko J Schick - * - * Copyright (c) 2005 IBM Corporation - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - - -#ifndef EHCA_TOOLS_H -#define EHCA_TOOLS_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -extern int ehca_debug_level; - -#define ehca_dbg(ib_dev, format, arg...) \ - do { \ - if (unlikely(ehca_debug_level)) \ - dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \ - "PU%04x EHCA_DBG:%s " format "\n", \ - raw_smp_processor_id(), __func__, \ - ## arg); \ - } while (0) - -#define ehca_info(ib_dev, format, arg...) \ - dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg) - -#define ehca_warn(ib_dev, format, arg...) \ - dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg) - -#define ehca_err(ib_dev, format, arg...) \ - dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg) - -/* use this one only if no ib_dev available */ -#define ehca_gen_dbg(format, arg...) \ - do { \ - if (unlikely(ehca_debug_level)) \ - printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg); \ - } while (0) - -#define ehca_gen_warn(format, arg...) \ - printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg) - -#define ehca_gen_err(format, arg...) \ - printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \ - raw_smp_processor_id(), __func__, ## arg) - -/** - * ehca_dmp - printk a memory block, whose length is n*8 bytes. - * Each line has the following layout: - * adr=X ofs=Y <8 bytes hex> <8 bytes hex> - */ -#define ehca_dmp(adr, len, format, args...) \ - do { \ - unsigned int x; \ - unsigned int l = (unsigned int)(len); \ - unsigned char *deb = (unsigned char *)(adr); \ - for (x = 0; x < l; x += 16) { \ - printk(KERN_INFO "EHCA_DMP:%s " format \ - " adr=%p ofs=%04x %016llx %016llx\n", \ - __func__, ##args, deb, x, \ - *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ - deb += 16; \ - } \ - } while (0) - -/* define a bitmask, little endian version */ -#define EHCA_BMASK(pos, length) (((pos) << 16) + (length)) - -/* define a bitmask, the ibm way... */ -#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1)) - -/* internal function, don't use */ -#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff) - -/* internal function, don't use */ -#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff)) - -/** - * EHCA_BMASK_SET - return value shifted and masked by mask - * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable - * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask - * in variable - */ -#define EHCA_BMASK_SET(mask, value) \ - ((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask)) - -/** - * EHCA_BMASK_GET - extract a parameter from value by mask - */ -#define EHCA_BMASK_GET(mask, value) \ - (EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask))) - -/* Converts ehca to ib return code */ -int ehca2ib_return_code(u64 ehca_rc); - -#endif /* EHCA_TOOLS_H */ diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c deleted file mode 100644 index 1a1d5d99fcf9..000000000000 --- a/drivers/infiniband/hw/ehca/ehca_uverbs.c +++ /dev/null @@ -1,309 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * userspace support verbs - * - * Authors: Christoph Raisch - * Hoang-Nam Nguyen - * Heiko J Schick - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_classes.h" -#include "ehca_iverbs.h" -#include "ehca_mrmw.h" -#include "ehca_tools.h" -#include "hcp_if.h" - -struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, - struct ib_udata *udata) -{ - struct ehca_ucontext *my_context; - - my_context = kzalloc(sizeof *my_context, GFP_KERNEL); - if (!my_context) { - ehca_err(device, "Out of memory device=%p", device); - return ERR_PTR(-ENOMEM); - } - - return &my_context->ib_ucontext; -} - -int ehca_dealloc_ucontext(struct ib_ucontext *context) -{ - kfree(container_of(context, struct ehca_ucontext, ib_ucontext)); - return 0; -} - -static void ehca_mm_open(struct vm_area_struct *vma) -{ - u32 *count = (u32 *)vma->vm_private_data; - if (!count) { - ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", - vma->vm_start, vma->vm_end); - return; - } - (*count)++; - if (!(*count)) - ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx", - vma->vm_start, vma->vm_end); - ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", - vma->vm_start, vma->vm_end, *count); -} - -static void ehca_mm_close(struct vm_area_struct *vma) -{ - u32 *count = (u32 *)vma->vm_private_data; - if (!count) { - ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", - vma->vm_start, vma->vm_end); - return; - } - (*count)--; - ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", - vma->vm_start, vma->vm_end, *count); -} - -static const struct vm_operations_struct vm_ops = { - .open = ehca_mm_open, - .close = ehca_mm_close, -}; - -static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas, - u32 *mm_count) -{ - int ret; - u64 vsize, physical; - - vsize = vma->vm_end - vma->vm_start; - if (vsize < EHCA_PAGESIZE) { - ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start); - return -EINVAL; - } - - physical = galpas->user.fw_handle; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical); - /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ - ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT, - vma->vm_page_prot); - if (unlikely(ret)) { - ehca_gen_err("remap_pfn_range() failed ret=%i", ret); - return -ENOMEM; - } - - vma->vm_private_data = mm_count; - (*mm_count)++; - vma->vm_ops = &vm_ops; - - return 0; -} - -static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue, - u32 *mm_count) -{ - int ret; - u64 start, ofs; - struct page *page; - - vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; - start = vma->vm_start; - for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) { - u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs); - page = virt_to_page(virt_addr); - ret = vm_insert_page(vma, start, page); - if (unlikely(ret)) { - ehca_gen_err("vm_insert_page() failed rc=%i", ret); - return ret; - } - start += PAGE_SIZE; - } - vma->vm_private_data = mm_count; - (*mm_count)++; - vma->vm_ops = &vm_ops; - - return 0; -} - -static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq, - u32 rsrc_type) -{ - int ret; - - switch (rsrc_type) { - case 0: /* galpa fw handle */ - ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number); - ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa); - if (unlikely(ret)) { - ehca_err(cq->ib_cq.device, - "ehca_mmap_fw() failed rc=%i cq_num=%x", - ret, cq->cq_number); - return ret; - } - break; - - case 1: /* cq queue_addr */ - ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number); - ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue); - if (unlikely(ret)) { - ehca_err(cq->ib_cq.device, - "ehca_mmap_queue() failed rc=%i cq_num=%x", - ret, cq->cq_number); - return ret; - } - break; - - default: - ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x", - rsrc_type, cq->cq_number); - return -EINVAL; - } - - return 0; -} - -static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, - u32 rsrc_type) -{ - int ret; - - switch (rsrc_type) { - case 0: /* galpa fw handle */ - ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num); - ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa); - if (unlikely(ret)) { - ehca_err(qp->ib_qp.device, - "remap_pfn_range() failed ret=%i qp_num=%x", - ret, qp->ib_qp.qp_num); - return -ENOMEM; - } - break; - - case 1: /* qp rqueue_addr */ - ehca_dbg(qp->ib_qp.device, "qp_num=%x rq", qp->ib_qp.qp_num); - ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, - &qp->mm_count_rqueue); - if (unlikely(ret)) { - ehca_err(qp->ib_qp.device, - "ehca_mmap_queue(rq) failed rc=%i qp_num=%x", - ret, qp->ib_qp.qp_num); - return ret; - } - break; - - case 2: /* qp squeue_addr */ - ehca_dbg(qp->ib_qp.device, "qp_num=%x sq", qp->ib_qp.qp_num); - ret = ehca_mmap_queue(vma, &qp->ipz_squeue, - &qp->mm_count_squeue); - if (unlikely(ret)) { - ehca_err(qp->ib_qp.device, - "ehca_mmap_queue(sq) failed rc=%i qp_num=%x", - ret, qp->ib_qp.qp_num); - return ret; - } - break; - - default: - ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x", - rsrc_type, qp->ib_qp.qp_num); - return -EINVAL; - } - - return 0; -} - -int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) -{ - u64 fileoffset = vma->vm_pgoff; - u32 idr_handle = fileoffset & 0x1FFFFFF; - u32 q_type = (fileoffset >> 27) & 0x1; /* CQ, QP,... */ - u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */ - u32 ret; - struct ehca_cq *cq; - struct ehca_qp *qp; - struct ib_uobject *uobject; - - switch (q_type) { - case 0: /* CQ */ - read_lock(&ehca_cq_idr_lock); - cq = idr_find(&ehca_cq_idr, idr_handle); - read_unlock(&ehca_cq_idr_lock); - - /* make sure this mmap really belongs to the authorized user */ - if (!cq) - return -EINVAL; - - if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context) - return -EINVAL; - - ret = ehca_mmap_cq(vma, cq, rsrc_type); - if (unlikely(ret)) { - ehca_err(cq->ib_cq.device, - "ehca_mmap_cq() failed rc=%i cq_num=%x", - ret, cq->cq_number); - return ret; - } - break; - - case 1: /* QP */ - read_lock(&ehca_qp_idr_lock); - qp = idr_find(&ehca_qp_idr, idr_handle); - read_unlock(&ehca_qp_idr_lock); - - /* make sure this mmap really belongs to the authorized user */ - if (!qp) - return -EINVAL; - - uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject; - if (!uobject || uobject->context != context) - return -EINVAL; - - ret = ehca_mmap_qp(vma, qp, rsrc_type); - if (unlikely(ret)) { - ehca_err(qp->ib_qp.device, - "ehca_mmap_qp() failed rc=%i qp_num=%x", - ret, qp->ib_qp.qp_num); - return ret; - } - break; - - default: - ehca_gen_err("bad queue type %x", q_type); - return -EINVAL; - } - - return 0; -} diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c deleted file mode 100644 index 89517ffb4389..000000000000 --- a/drivers/infiniband/hw/ehca/hcp_if.c +++ /dev/null @@ -1,949 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Firmware Infiniband Interface code for POWER - * - * Authors: Christoph Raisch - * Hoang-Nam Nguyen - * Joachim Fenkes - * Gerd Bayer - * Waleri Fomin - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include "ehca_tools.h" -#include "hcp_if.h" -#include "hcp_phyp.h" -#include "hipz_fns.h" -#include "ipz_pt_fn.h" - -#define H_ALL_RES_QP_ENHANCED_OPS EHCA_BMASK_IBM(9, 11) -#define H_ALL_RES_QP_PTE_PIN EHCA_BMASK_IBM(12, 12) -#define H_ALL_RES_QP_SERVICE_TYPE EHCA_BMASK_IBM(13, 15) -#define H_ALL_RES_QP_STORAGE EHCA_BMASK_IBM(16, 17) -#define H_ALL_RES_QP_LL_RQ_CQE_POSTING EHCA_BMASK_IBM(18, 18) -#define H_ALL_RES_QP_LL_SQ_CQE_POSTING EHCA_BMASK_IBM(19, 21) -#define H_ALL_RES_QP_SIGNALING_TYPE EHCA_BMASK_IBM(22, 23) -#define H_ALL_RES_QP_UD_AV_LKEY_CTRL EHCA_BMASK_IBM(31, 31) -#define H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE EHCA_BMASK_IBM(32, 35) -#define H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE EHCA_BMASK_IBM(36, 39) -#define H_ALL_RES_QP_RESOURCE_TYPE EHCA_BMASK_IBM(56, 63) - -#define H_ALL_RES_QP_MAX_OUTST_SEND_WR EHCA_BMASK_IBM(0, 15) -#define H_ALL_RES_QP_MAX_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31) -#define H_ALL_RES_QP_MAX_SEND_SGE EHCA_BMASK_IBM(32, 39) -#define H_ALL_RES_QP_MAX_RECV_SGE EHCA_BMASK_IBM(40, 47) - -#define H_ALL_RES_QP_UD_AV_LKEY EHCA_BMASK_IBM(32, 63) -#define H_ALL_RES_QP_SRQ_QP_TOKEN EHCA_BMASK_IBM(0, 31) -#define H_ALL_RES_QP_SRQ_QP_HANDLE EHCA_BMASK_IBM(0, 64) -#define H_ALL_RES_QP_SRQ_LIMIT EHCA_BMASK_IBM(48, 63) -#define H_ALL_RES_QP_SRQ_QPN EHCA_BMASK_IBM(40, 63) - -#define H_ALL_RES_QP_ACT_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) -#define H_ALL_RES_QP_ACT_OUTST_RECV_WR EHCA_BMASK_IBM(48, 63) -#define H_ALL_RES_QP_ACT_SEND_SGE EHCA_BMASK_IBM(8, 15) -#define H_ALL_RES_QP_ACT_RECV_SGE EHCA_BMASK_IBM(24, 31) - -#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31) -#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63) - -#define H_MP_INIT_TYPE EHCA_BMASK_IBM(44, 47) -#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48) -#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49) - -#define HCALL4_REGS_FORMAT "r4=%lx r5=%lx r6=%lx r7=%lx" -#define HCALL7_REGS_FORMAT HCALL4_REGS_FORMAT " r8=%lx r9=%lx r10=%lx" -#define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx" - -static DEFINE_SPINLOCK(hcall_lock); - -static long ehca_plpar_hcall_norets(unsigned long opcode, - unsigned long arg1, - unsigned long arg2, - unsigned long arg3, - unsigned long arg4, - unsigned long arg5, - unsigned long arg6, - unsigned long arg7) -{ - long ret; - int i, sleep_msecs; - unsigned long flags = 0; - - if (unlikely(ehca_debug_level >= 2)) - ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT, - opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7); - - for (i = 0; i < 5; i++) { - /* serialize hCalls to work around firmware issue */ - if (ehca_lock_hcalls) - spin_lock_irqsave(&hcall_lock, flags); - - ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4, - arg5, arg6, arg7); - - if (ehca_lock_hcalls) - spin_unlock_irqrestore(&hcall_lock, flags); - - if (H_IS_LONG_BUSY(ret)) { - sleep_msecs = get_longbusy_msecs(ret); - msleep_interruptible(sleep_msecs); - continue; - } - - if (ret < H_SUCCESS) - ehca_gen_err("opcode=%lx ret=%li " HCALL7_REGS_FORMAT, - opcode, ret, arg1, arg2, arg3, - arg4, arg5, arg6, arg7); - else - if (unlikely(ehca_debug_level >= 2)) - ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret); - - return ret; - } - - return H_BUSY; -} - -static long ehca_plpar_hcall9(unsigned long opcode, - unsigned long *outs, /* array of 9 outputs */ - unsigned long arg1, - unsigned long arg2, - unsigned long arg3, - unsigned long arg4, - unsigned long arg5, - unsigned long arg6, - unsigned long arg7, - unsigned long arg8, - unsigned long arg9) -{ - long ret; - int i, sleep_msecs; - unsigned long flags = 0; - - if (unlikely(ehca_debug_level >= 2)) - ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode, - arg1, arg2, arg3, arg4, arg5, - arg6, arg7, arg8, arg9); - - for (i = 0; i < 5; i++) { - /* serialize hCalls to work around firmware issue */ - if (ehca_lock_hcalls) - spin_lock_irqsave(&hcall_lock, flags); - - ret = plpar_hcall9(opcode, outs, - arg1, arg2, arg3, arg4, arg5, - arg6, arg7, arg8, arg9); - - if (ehca_lock_hcalls) - spin_unlock_irqrestore(&hcall_lock, flags); - - if (H_IS_LONG_BUSY(ret)) { - sleep_msecs = get_longbusy_msecs(ret); - msleep_interruptible(sleep_msecs); - continue; - } - - if (ret < H_SUCCESS) { - ehca_gen_err("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, - opcode, arg1, arg2, arg3, arg4, arg5, - arg6, arg7, arg8, arg9); - ehca_gen_err("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT, - ret, outs[0], outs[1], outs[2], outs[3], - outs[4], outs[5], outs[6], outs[7], - outs[8]); - } else if (unlikely(ehca_debug_level >= 2)) - ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT, - ret, outs[0], outs[1], outs[2], outs[3], - outs[4], outs[5], outs[6], outs[7], - outs[8]); - return ret; - } - - return H_BUSY; -} - -u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, - struct ehca_pfeq *pfeq, - const u32 neq_control, - const u32 number_of_entries, - struct ipz_eq_handle *eq_handle, - u32 *act_nr_of_entries, - u32 *act_pages, - u32 *eq_ist) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - u64 allocate_controls; - - /* resource type */ - allocate_controls = 3ULL; - - /* ISN is associated */ - if (neq_control != 1) - allocate_controls = (1ULL << (63 - 7)) | allocate_controls; - else /* notification event queue */ - allocate_controls = (1ULL << 63) | allocate_controls; - - ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, - adapter_handle.handle, /* r4 */ - allocate_controls, /* r5 */ - number_of_entries, /* r6 */ - 0, 0, 0, 0, 0, 0); - eq_handle->handle = outs[0]; - *act_nr_of_entries = (u32)outs[3]; - *act_pages = (u32)outs[4]; - *eq_ist = (u32)outs[5]; - - if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resource - ret=%lli ", ret); - - return ret; -} - -u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, - struct ipz_eq_handle eq_handle, - const u64 event_mask) -{ - return ehca_plpar_hcall_norets(H_RESET_EVENTS, - adapter_handle.handle, /* r4 */ - eq_handle.handle, /* r5 */ - event_mask, /* r6 */ - 0, 0, 0, 0); -} - -u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, - struct ehca_cq *cq, - struct ehca_alloc_cq_parms *param) -{ - int rc; - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, - adapter_handle.handle, /* r4 */ - 2, /* r5 */ - param->eq_handle.handle, /* r6 */ - cq->token, /* r7 */ - param->nr_cqe, /* r8 */ - 0, 0, 0, 0); - cq->ipz_cq_handle.handle = outs[0]; - param->act_nr_of_entries = (u32)outs[3]; - param->act_pages = (u32)outs[4]; - - if (ret == H_SUCCESS) { - rc = hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]); - if (rc) { - ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", - rc, outs[5]); - - ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - cq->ipz_cq_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); - ret = H_NO_MEM; - } - } - - if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resources. ret=%lli", ret); - - return ret; -} - -u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_alloc_qp_parms *parms, int is_user) -{ - int rc; - u64 ret; - u64 allocate_controls, max_r10_reg, r11, r12; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - allocate_controls = - EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type) - | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0) - | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype) - | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype) - | EHCA_BMASK_SET(H_ALL_RES_QP_STORAGE, parms->qp_storage) - | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE, - parms->squeue.page_size) - | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE, - parms->rqueue.page_size) - | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING, - !!(parms->ll_comp_flags & LLQP_RECV_COMP)) - | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING, - !!(parms->ll_comp_flags & LLQP_SEND_COMP)) - | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL, - parms->ud_av_l_key_ctl) - | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1); - - max_r10_reg = - EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR, - parms->squeue.max_wr + 1) - | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR, - parms->rqueue.max_wr + 1) - | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE, - parms->squeue.max_sge) - | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE, - parms->rqueue.max_sge); - - r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token); - - if (parms->ext_type == EQPT_SRQ) - r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit); - else - r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn); - - ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, - adapter_handle.handle, /* r4 */ - allocate_controls, /* r5 */ - parms->send_cq_handle.handle, - parms->recv_cq_handle.handle, - parms->eq_handle.handle, - ((u64)parms->token << 32) | parms->pd.value, - max_r10_reg, r11, r12); - - parms->qp_handle.handle = outs[0]; - parms->real_qp_num = (u32)outs[1]; - parms->squeue.act_nr_wqes = - (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]); - parms->rqueue.act_nr_wqes = - (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]); - parms->squeue.act_nr_sges = - (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]); - parms->rqueue.act_nr_sges = - (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]); - parms->squeue.queue_size = - (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]); - parms->rqueue.queue_size = - (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); - - if (ret == H_SUCCESS) { - rc = hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]); - if (rc) { - ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", - rc, outs[6]); - - ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - parms->qp_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); - ret = H_NO_MEM; - } - } - - if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resources. ret=%lli", ret); - - return ret; -} - -u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, - const u8 port_id, - struct hipz_query_port *query_port_response_block) -{ - u64 ret; - u64 r_cb = __pa(query_port_response_block); - - if (r_cb & (EHCA_PAGESIZE-1)) { - ehca_gen_err("response block not page aligned"); - return H_PARAMETER; - } - - ret = ehca_plpar_hcall_norets(H_QUERY_PORT, - adapter_handle.handle, /* r4 */ - port_id, /* r5 */ - r_cb, /* r6 */ - 0, 0, 0, 0); - - if (ehca_debug_level >= 2) - ehca_dmp(query_port_response_block, 64, "response_block"); - - return ret; -} - -u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, - const u8 port_id, const u32 port_cap, - const u8 init_type, const int modify_mask) -{ - u64 port_attributes = port_cap; - - if (modify_mask & IB_PORT_SHUTDOWN) - port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1); - if (modify_mask & IB_PORT_INIT_TYPE) - port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type); - if (modify_mask & IB_PORT_RESET_QKEY_CNTR) - port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1); - - return ehca_plpar_hcall_norets(H_MODIFY_PORT, - adapter_handle.handle, /* r4 */ - port_id, /* r5 */ - port_attributes, /* r6 */ - 0, 0, 0, 0); -} - -u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, - struct hipz_query_hca *query_hca_rblock) -{ - u64 r_cb = __pa(query_hca_rblock); - - if (r_cb & (EHCA_PAGESIZE-1)) { - ehca_gen_err("response_block=%p not page aligned", - query_hca_rblock); - return H_PARAMETER; - } - - return ehca_plpar_hcall_norets(H_QUERY_HCA, - adapter_handle.handle, /* r4 */ - r_cb, /* r5 */ - 0, 0, 0, 0, 0); -} - -u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, - const u8 pagesize, - const u8 queue_type, - const u64 resource_handle, - const u64 logical_address_of_page, - u64 count) -{ - return ehca_plpar_hcall_norets(H_REGISTER_RPAGES, - adapter_handle.handle, /* r4 */ - (u64)queue_type | ((u64)pagesize) << 8, - /* r5 */ - resource_handle, /* r6 */ - logical_address_of_page, /* r7 */ - count, /* r8 */ - 0, 0); -} - -u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, - const struct ipz_eq_handle eq_handle, - struct ehca_pfeq *pfeq, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count) -{ - if (count != 1) { - ehca_gen_err("Ppage counter=%llx", count); - return H_PARAMETER; - } - return hipz_h_register_rpage(adapter_handle, - pagesize, - queue_type, - eq_handle.handle, - logical_address_of_page, count); -} - -u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle, - u32 ist) -{ - u64 ret; - ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE, - adapter_handle.handle, /* r4 */ - ist, /* r5 */ - 0, 0, 0, 0, 0); - - if (ret != H_SUCCESS && ret != H_BUSY) - ehca_gen_err("Could not query interrupt state."); - - return ret; -} - -u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, - const struct ipz_cq_handle cq_handle, - struct ehca_pfcq *pfcq, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count, - const struct h_galpa gal) -{ - if (count != 1) { - ehca_gen_err("Page counter=%llx", count); - return H_PARAMETER; - } - - return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, - cq_handle.handle, logical_address_of_page, - count); -} - -u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count, - const struct h_galpa galpa) -{ - if (count > 1) { - ehca_gen_err("Page counter=%llx", count); - return H_PARAMETER; - } - - return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, - qp_handle.handle, logical_address_of_page, - count); -} - -u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - void **log_addr_next_sq_wqe2processed, - void **log_addr_next_rq_wqe2processed, - int dis_and_get_function_code) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, - adapter_handle.handle, /* r4 */ - dis_and_get_function_code, /* r5 */ - qp_handle.handle, /* r6 */ - 0, 0, 0, 0, 0, 0); - if (log_addr_next_sq_wqe2processed) - *log_addr_next_sq_wqe2processed = (void *)outs[0]; - if (log_addr_next_rq_wqe2processed) - *log_addr_next_rq_wqe2processed = (void *)outs[1]; - - return ret; -} - -u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - const u64 update_mask, - struct hcp_modify_qp_control_block *mqpcb, - struct h_galpa gal) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - ret = ehca_plpar_hcall9(H_MODIFY_QP, outs, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - update_mask, /* r6 */ - __pa(mqpcb), /* r7 */ - 0, 0, 0, 0, 0); - - if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Insufficient resources ret=%lli", ret); - - return ret; -} - -u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - struct hcp_modify_qp_control_block *qqpcb, - struct h_galpa gal) -{ - return ehca_plpar_hcall_norets(H_QUERY_QP, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - __pa(qqpcb), /* r6 */ - 0, 0, 0, 0); -} - -u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_qp *qp) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = hcp_galpas_dtor(&qp->galpas); - if (ret) { - ehca_gen_err("Could not destruct qp->galpas"); - return H_RESOURCE; - } - ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, - adapter_handle.handle, /* r4 */ - /* function code */ - 1, /* r5 */ - qp->ipz_qp_handle.handle, /* r6 */ - 0, 0, 0, 0, 0, 0); - if (ret == H_HARDWARE) - ehca_gen_err("HCA not operational. ret=%lli", ret); - - ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - qp->ipz_qp_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); - - if (ret == H_RESOURCE) - ehca_gen_err("Resource still in use. ret=%lli", ret); - - return ret; -} - -u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u32 port) -{ - return ehca_plpar_hcall_norets(H_DEFINE_AQP0, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - port, /* r6 */ - 0, 0, 0, 0); -} - -u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u32 port, u32 * pma_qp_nr, - u32 * bma_qp_nr) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - port, /* r6 */ - 0, 0, 0, 0, 0, 0); - *pma_qp_nr = (u32)outs[0]; - *bma_qp_nr = (u32)outs[1]; - - if (ret == H_ALIAS_EXIST) - ehca_gen_err("AQP1 already exists. ret=%lli", ret); - - return ret; -} - -u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u16 mcg_dlid, - u64 subnet_prefix, u64 interface_id) -{ - u64 ret; - - ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - mcg_dlid, /* r6 */ - interface_id, /* r7 */ - subnet_prefix, /* r8 */ - 0, 0); - - if (ret == H_NOT_ENOUGH_RESOURCES) - ehca_gen_err("Not enough resources. ret=%lli", ret); - - return ret; -} - -u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u16 mcg_dlid, - u64 subnet_prefix, u64 interface_id) -{ - return ehca_plpar_hcall_norets(H_DETACH_MCQP, - adapter_handle.handle, /* r4 */ - qp_handle.handle, /* r5 */ - mcg_dlid, /* r6 */ - interface_id, /* r7 */ - subnet_prefix, /* r8 */ - 0, 0); -} - -u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, - struct ehca_cq *cq, - u8 force_flag) -{ - u64 ret; - - ret = hcp_galpas_dtor(&cq->galpas); - if (ret) { - ehca_gen_err("Could not destruct cp->galpas"); - return H_RESOURCE; - } - - ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - cq->ipz_cq_handle.handle, /* r5 */ - force_flag != 0 ? 1L : 0L, /* r6 */ - 0, 0, 0, 0); - - if (ret == H_RESOURCE) - ehca_gen_err("H_FREE_RESOURCE failed ret=%lli ", ret); - - return ret; -} - -u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, - struct ehca_eq *eq) -{ - u64 ret; - - ret = hcp_galpas_dtor(&eq->galpas); - if (ret) { - ehca_gen_err("Could not destruct eq->galpas"); - return H_RESOURCE; - } - - ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - eq->ipz_eq_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); - - if (ret == H_RESOURCE) - ehca_gen_err("Resource in use. ret=%lli ", ret); - - return ret; -} - -u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u64 vaddr, - const u64 length, - const u32 access_ctrl, - const struct ipz_pd pd, - struct ehca_mr_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, - adapter_handle.handle, /* r4 */ - 5, /* r5 */ - vaddr, /* r6 */ - length, /* r7 */ - (((u64)access_ctrl) << 32ULL), /* r8 */ - pd.value, /* r9 */ - 0, 0, 0); - outparms->handle.handle = outs[0]; - outparms->lkey = (u32)outs[2]; - outparms->rkey = (u32)outs[3]; - - return ret; -} - -u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count) -{ - u64 ret; - - if (unlikely(ehca_debug_level >= 3)) { - if (count > 1) { - u64 *kpage; - int i; - kpage = __va(logical_address_of_page); - for (i = 0; i < count; i++) - ehca_gen_dbg("kpage[%d]=%p", - i, (void *)kpage[i]); - } else - ehca_gen_dbg("kpage=%p", - (void *)logical_address_of_page); - } - - if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) { - ehca_gen_err("logical_address_of_page not on a 4k boundary " - "adapter_handle=%llx mr=%p mr_handle=%llx " - "pagesize=%x queue_type=%x " - "logical_address_of_page=%llx count=%llx", - adapter_handle.handle, mr, - mr->ipz_mr_handle.handle, pagesize, queue_type, - logical_address_of_page, count); - ret = H_PARAMETER; - } else - ret = hipz_h_register_rpage(adapter_handle, pagesize, - queue_type, - mr->ipz_mr_handle.handle, - logical_address_of_page, count); - return ret; -} - -u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - struct ehca_mr_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_QUERY_MR, outs, - adapter_handle.handle, /* r4 */ - mr->ipz_mr_handle.handle, /* r5 */ - 0, 0, 0, 0, 0, 0, 0); - outparms->len = outs[0]; - outparms->vaddr = outs[1]; - outparms->acl = outs[4] >> 32; - outparms->lkey = (u32)(outs[5] >> 32); - outparms->rkey = (u32)(outs[5] & (0xffffffff)); - - return ret; -} - -u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr) -{ - return ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - mr->ipz_mr_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); -} - -u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u64 vaddr_in, - const u64 length, - const u32 access_ctrl, - const struct ipz_pd pd, - const u64 mr_addr_cb, - struct ehca_mr_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs, - adapter_handle.handle, /* r4 */ - mr->ipz_mr_handle.handle, /* r5 */ - vaddr_in, /* r6 */ - length, /* r7 */ - /* r8 */ - ((((u64)access_ctrl) << 32ULL) | pd.value), - mr_addr_cb, /* r9 */ - 0, 0, 0); - outparms->vaddr = outs[1]; - outparms->lkey = (u32)outs[2]; - outparms->rkey = (u32)outs[3]; - - return ret; -} - -u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const struct ehca_mr *orig_mr, - const u64 vaddr_in, - const u32 access_ctrl, - const struct ipz_pd pd, - struct ehca_mr_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs, - adapter_handle.handle, /* r4 */ - orig_mr->ipz_mr_handle.handle, /* r5 */ - vaddr_in, /* r6 */ - (((u64)access_ctrl) << 32ULL), /* r7 */ - pd.value, /* r8 */ - 0, 0, 0, 0); - outparms->handle.handle = outs[0]; - outparms->lkey = (u32)outs[2]; - outparms->rkey = (u32)outs[3]; - - return ret; -} - -u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw, - const struct ipz_pd pd, - struct ehca_mw_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, - adapter_handle.handle, /* r4 */ - 6, /* r5 */ - pd.value, /* r6 */ - 0, 0, 0, 0, 0, 0); - outparms->handle.handle = outs[0]; - outparms->rkey = (u32)outs[3]; - - return ret; -} - -u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw, - struct ehca_mw_hipzout_parms *outparms) -{ - u64 ret; - unsigned long outs[PLPAR_HCALL9_BUFSIZE]; - - ret = ehca_plpar_hcall9(H_QUERY_MW, outs, - adapter_handle.handle, /* r4 */ - mw->ipz_mw_handle.handle, /* r5 */ - 0, 0, 0, 0, 0, 0, 0); - outparms->rkey = (u32)outs[3]; - - return ret; -} - -u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw) -{ - return ehca_plpar_hcall_norets(H_FREE_RESOURCE, - adapter_handle.handle, /* r4 */ - mw->ipz_mw_handle.handle, /* r5 */ - 0, 0, 0, 0, 0); -} - -u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, - const u64 ressource_handle, - void *rblock, - unsigned long *byte_count) -{ - u64 r_cb = __pa(rblock); - - if (r_cb & (EHCA_PAGESIZE-1)) { - ehca_gen_err("rblock not page aligned."); - return H_PARAMETER; - } - - return ehca_plpar_hcall_norets(H_ERROR_DATA, - adapter_handle.handle, - ressource_handle, - r_cb, - 0, 0, 0, 0); -} - -u64 hipz_h_eoi(int irq) -{ - unsigned long xirr; - - iosync(); - xirr = (0xffULL << 24) | irq; - - return plpar_hcall_norets(H_EOI, xirr); -} diff --git a/drivers/infiniband/hw/ehca/hcp_if.h b/drivers/infiniband/hw/ehca/hcp_if.h deleted file mode 100644 index a46e514c367b..000000000000 --- a/drivers/infiniband/hw/ehca/hcp_if.h +++ /dev/null @@ -1,265 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Firmware Infiniband Interface code for POWER - * - * Authors: Christoph Raisch - * Hoang-Nam Nguyen - * Gerd Bayer - * Waleri Fomin - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __HCP_IF_H__ -#define __HCP_IF_H__ - -#include "ehca_classes.h" -#include "ehca_tools.h" -#include "hipz_hw.h" - -/* - * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initialize - * resources, create the empty EQPT (ring). - */ -u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, - struct ehca_pfeq *pfeq, - const u32 neq_control, - const u32 number_of_entries, - struct ipz_eq_handle *eq_handle, - u32 * act_nr_of_entries, - u32 * act_pages, - u32 * eq_ist); - -u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, - struct ipz_eq_handle eq_handle, - const u64 event_mask); -/* - * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize - * resources, create the empty CQPT (ring). - */ -u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, - struct ehca_cq *cq, - struct ehca_alloc_cq_parms *param); - - -/* - * hipz_h_alloc_resource_qp allocates QP resources in HW and FW, - * initialize resources, create empty QPPTs (2 rings). - */ -u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_alloc_qp_parms *parms, int is_user); - -u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, - const u8 port_id, - struct hipz_query_port *query_port_response_block); - -u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, - const u8 port_id, const u32 port_cap, - const u8 init_type, const int modify_mask); - -u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, - struct hipz_query_hca *query_hca_rblock); - -/* - * hipz_h_register_rpage internal function in hcp_if.h for all - * hcp_H_REGISTER_RPAGE calls. - */ -u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, - const u8 pagesize, - const u8 queue_type, - const u64 resource_handle, - const u64 logical_address_of_page, - u64 count); - -u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, - const struct ipz_eq_handle eq_handle, - struct ehca_pfeq *pfeq, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count); - -u64 hipz_h_query_int_state(const struct ipz_adapter_handle - hcp_adapter_handle, - u32 ist); - -u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, - const struct ipz_cq_handle cq_handle, - struct ehca_pfcq *pfcq, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count, - const struct h_galpa gal); - -u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count, - const struct h_galpa galpa); - -u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - void **log_addr_next_sq_wqe_tb_processed, - void **log_addr_next_rq_wqe_tb_processed, - int dis_and_get_function_code); -enum hcall_sigt { - HCALL_SIGT_NO_CQE = 0, - HCALL_SIGT_BY_WQE = 1, - HCALL_SIGT_EVERY = 2 -}; - -u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - const u64 update_mask, - struct hcp_modify_qp_control_block *mqpcb, - struct h_galpa gal); - -u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct ehca_pfqp *pfqp, - struct hcp_modify_qp_control_block *qqpcb, - struct h_galpa gal); - -u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, - struct ehca_qp *qp); - -u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u32 port); - -u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u32 port, u32 * pma_qp_nr, - u32 * bma_qp_nr); - -u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u16 mcg_dlid, - u64 subnet_prefix, u64 interface_id); - -u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, - const struct ipz_qp_handle qp_handle, - struct h_galpa gal, - u16 mcg_dlid, - u64 subnet_prefix, u64 interface_id); - -u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, - struct ehca_cq *cq, - u8 force_flag); - -u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, - struct ehca_eq *eq); - -/* - * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize - * resources. - */ -u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u64 vaddr, - const u64 length, - const u32 access_ctrl, - const struct ipz_pd pd, - struct ehca_mr_hipzout_parms *outparms); - -/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */ -u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u8 pagesize, - const u8 queue_type, - const u64 logical_address_of_page, - const u64 count); - -/* hipz_h_query_mr queries MR in HW and FW */ -u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - struct ehca_mr_hipzout_parms *outparms); - -/* hipz_h_free_resource_mr frees MR resources in HW and FW */ -u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr); - -/* hipz_h_reregister_pmr reregisters MR in HW and FW */ -u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const u64 vaddr_in, - const u64 length, - const u32 access_ctrl, - const struct ipz_pd pd, - const u64 mr_addr_cb, - struct ehca_mr_hipzout_parms *outparms); - -/* hipz_h_register_smr register shared MR in HW and FW */ -u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mr *mr, - const struct ehca_mr *orig_mr, - const u64 vaddr_in, - const u32 access_ctrl, - const struct ipz_pd pd, - struct ehca_mr_hipzout_parms *outparms); - -/* - * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize - * resources. - */ -u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw, - const struct ipz_pd pd, - struct ehca_mw_hipzout_parms *outparms); - -/* hipz_h_query_mw queries MW in HW and FW */ -u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw, - struct ehca_mw_hipzout_parms *outparms); - -/* hipz_h_free_resource_mw frees MW resources in HW and FW */ -u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, - const struct ehca_mw *mw); - -u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, - const u64 ressource_handle, - void *rblock, - unsigned long *byte_count); -u64 hipz_h_eoi(int irq); - -#endif /* __HCP_IF_H__ */ diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.c b/drivers/infiniband/hw/ehca/hcp_phyp.c deleted file mode 100644 index 077376ff3d28..000000000000 --- a/drivers/infiniband/hw/ehca/hcp_phyp.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * load store abstraction for ehca register access with tracing - * - * Authors: Christoph Raisch - * Hoang-Nam Nguyen - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "ehca_classes.h" -#include "hipz_hw.h" - -u64 hcall_map_page(u64 physaddr) -{ - return (u64)ioremap(physaddr, EHCA_PAGESIZE); -} - -int hcall_unmap_page(u64 mapaddr) -{ - iounmap((volatile void __iomem *) mapaddr); - return 0; -} - -int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, - u64 paddr_kernel, u64 paddr_user) -{ - if (!is_user) { - galpas->kernel.fw_handle = hcall_map_page(paddr_kernel); - if (!galpas->kernel.fw_handle) - return -ENOMEM; - } else - galpas->kernel.fw_handle = 0; - - galpas->user.fw_handle = paddr_user; - - return 0; -} - -int hcp_galpas_dtor(struct h_galpas *galpas) -{ - if (galpas->kernel.fw_handle) { - int ret = hcall_unmap_page(galpas->kernel.fw_handle); - if (ret) - return ret; - } - - galpas->user.fw_handle = galpas->kernel.fw_handle = 0; - - return 0; -} diff --git a/drivers/infiniband/hw/ehca/hcp_phyp.h b/drivers/infiniband/hw/ehca/hcp_phyp.h deleted file mode 100644 index d1b029910249..000000000000 --- a/drivers/infiniband/hw/ehca/hcp_phyp.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * Firmware calls - * - * Authors: Christoph Raisch - * Hoang-Nam Nguyen - * Waleri Fomin - * Gerd Bayer - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __HCP_PHYP_H__ -#define __HCP_PHYP_H__ - - -/* - * eHCA page (mapped into memory) - * resource to access eHCA register pages in CPU address space -*/ -struct h_galpa { - u64 fw_handle; - /* for pSeries this is a 64bit memory address where - I/O memory is mapped into CPU address space (kv) */ -}; - -/* - * resource to access eHCA address space registers, all types - */ -struct h_galpas { - u32 pid; /*PID of userspace galpa checking */ - struct h_galpa user; /* user space accessible resource, - set to 0 if unused */ - struct h_galpa kernel; /* kernel space accessible resource, - set to 0 if unused */ -}; - -static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset) -{ - u64 addr = galpa.fw_handle + offset; - return *(volatile u64 __force *)addr; -} - -static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value) -{ - u64 addr = galpa.fw_handle + offset; - *(volatile u64 __force *)addr = value; -} - -int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, - u64 paddr_kernel, u64 paddr_user); - -int hcp_galpas_dtor(struct h_galpas *galpas); - -u64 hcall_map_page(u64 physaddr); - -int hcall_unmap_page(u64 mapaddr); - -#endif diff --git a/drivers/infiniband/hw/ehca/hipz_fns.h b/drivers/infiniband/hw/ehca/hipz_fns.h deleted file mode 100644 index 9dac93d02140..000000000000 --- a/drivers/infiniband/hw/ehca/hipz_fns.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * HW abstraction register functions - * - * Authors: Christoph Raisch - * Reinhard Ernst - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __HIPZ_FNS_H__ -#define __HIPZ_FNS_H__ - -#include "ehca_classes.h" -#include "hipz_hw.h" - -#include "hipz_fns_core.h" - -#define hipz_galpa_store_eq(gal, offset, value) \ - hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value) - -#define hipz_galpa_load_eq(gal, offset) \ - hipz_galpa_load(gal, EQTEMM_OFFSET(offset)) - -#define hipz_galpa_store_qped(gal, offset, value) \ - hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value) - -#define hipz_galpa_load_qped(gal, offset) \ - hipz_galpa_load(gal, QPEDMM_OFFSET(offset)) - -#define hipz_galpa_store_mrmw(gal, offset, value) \ - hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value) - -#define hipz_galpa_load_mrmw(gal, offset) \ - hipz_galpa_load(gal, MRMWMM_OFFSET(offset)) - -#endif diff --git a/drivers/infiniband/hw/ehca/hipz_fns_core.h b/drivers/infiniband/hw/ehca/hipz_fns_core.h deleted file mode 100644 index 868735fd3187..000000000000 --- a/drivers/infiniband/hw/ehca/hipz_fns_core.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * HW abstraction register functions - * - * Authors: Christoph Raisch - * Heiko J Schick - * Hoang-Nam Nguyen - * Reinhard Ernst - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __HIPZ_FNS_CORE_H__ -#define __HIPZ_FNS_CORE_H__ - -#include "hcp_phyp.h" -#include "hipz_hw.h" - -#define hipz_galpa_store_cq(gal, offset, value) \ - hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value) - -#define hipz_galpa_load_cq(gal, offset) \ - hipz_galpa_load(gal, CQTEMM_OFFSET(offset)) - -#define hipz_galpa_store_qp(gal, offset, value) \ - hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value) -#define hipz_galpa_load_qp(gal, offset) \ - hipz_galpa_load(gal, QPTEMM_OFFSET(offset)) - -static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes) -{ - /* ringing doorbell :-) */ - hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa, - EHCA_BMASK_SET(QPX_SQADDER, nr_wqes)); -} - -static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes) -{ - /* ringing doorbell :-) */ - hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa, - EHCA_BMASK_SET(QPX_RQADDER, nr_wqes)); -} - -static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes) -{ - hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca, - EHCA_BMASK_SET(CQX_FECADDER, nr_cqes)); -} - -static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value) -{ - u64 cqx_n0_reg; - - hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0, - EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT, - value)); - cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0); -} - -static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value) -{ - u64 cqx_n1_reg; - - hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1, - EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value)); - cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1); -} - -#endif /* __HIPZ_FNC_CORE_H__ */ diff --git a/drivers/infiniband/hw/ehca/hipz_hw.h b/drivers/infiniband/hw/ehca/hipz_hw.h deleted file mode 100644 index bf996c7acc42..000000000000 --- a/drivers/infiniband/hw/ehca/hipz_hw.h +++ /dev/null @@ -1,414 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * eHCA register definitions - * - * Authors: Waleri Fomin - * Christoph Raisch - * Reinhard Ernst - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __HIPZ_HW_H__ -#define __HIPZ_HW_H__ - -#include "ehca_tools.h" - -#define EHCA_MAX_MTU 4 - -/* QP Table Entry Memory Map */ -struct hipz_qptemm { - u64 qpx_hcr; - u64 qpx_c; - u64 qpx_herr; - u64 qpx_aer; -/* 0x20*/ - u64 qpx_sqa; - u64 qpx_sqc; - u64 qpx_rqa; - u64 qpx_rqc; -/* 0x40*/ - u64 qpx_st; - u64 qpx_pmstate; - u64 qpx_pmfa; - u64 qpx_pkey; -/* 0x60*/ - u64 qpx_pkeya; - u64 qpx_pkeyb; - u64 qpx_pkeyc; - u64 qpx_pkeyd; -/* 0x80*/ - u64 qpx_qkey; - u64 qpx_dqp; - u64 qpx_dlidp; - u64 qpx_portp; -/* 0xa0*/ - u64 qpx_slidp; - u64 qpx_slidpp; - u64 qpx_dlida; - u64 qpx_porta; -/* 0xc0*/ - u64 qpx_slida; - u64 qpx_slidpa; - u64 qpx_slvl; - u64 qpx_ipd; -/* 0xe0*/ - u64 qpx_mtu; - u64 qpx_lato; - u64 qpx_rlimit; - u64 qpx_rnrlimit; -/* 0x100*/ - u64 qpx_t; - u64 qpx_sqhp; - u64 qpx_sqptp; - u64 qpx_nspsn; -/* 0x120*/ - u64 qpx_nspsnhwm; - u64 reserved1; - u64 qpx_sdsi; - u64 qpx_sdsbc; -/* 0x140*/ - u64 qpx_sqwsize; - u64 qpx_sqwts; - u64 qpx_lsn; - u64 qpx_nssn; -/* 0x160 */ - u64 qpx_mor; - u64 qpx_cor; - u64 qpx_sqsize; - u64 qpx_erc; -/* 0x180*/ - u64 qpx_rnrrc; - u64 qpx_ernrwt; - u64 qpx_rnrresp; - u64 qpx_lmsna; -/* 0x1a0 */ - u64 qpx_sqhpc; - u64 qpx_sqcptp; - u64 qpx_sigt; - u64 qpx_wqecnt; -/* 0x1c0*/ - u64 qpx_rqhp; - u64 qpx_rqptp; - u64 qpx_rqsize; - u64 qpx_nrr; -/* 0x1e0*/ - u64 qpx_rdmac; - u64 qpx_nrpsn; - u64 qpx_lapsn; - u64 qpx_lcr; -/* 0x200*/ - u64 qpx_rwc; - u64 qpx_rwva; - u64 qpx_rdsi; - u64 qpx_rdsbc; -/* 0x220*/ - u64 qpx_rqwsize; - u64 qpx_crmsn; - u64 qpx_rdd; - u64 qpx_larpsn; -/* 0x240*/ - u64 qpx_pd; - u64 qpx_scqn; - u64 qpx_rcqn; - u64 qpx_aeqn; -/* 0x260*/ - u64 qpx_aaelog; - u64 qpx_ram; - u64 qpx_rdmaqe0; - u64 qpx_rdmaqe1; -/* 0x280*/ - u64 qpx_rdmaqe2; - u64 qpx_rdmaqe3; - u64 qpx_nrpsnhwm; -/* 0x298*/ - u64 reserved[(0x400 - 0x298) / 8]; -/* 0x400 extended data */ - u64 reserved_ext[(0x500 - 0x400) / 8]; -/* 0x500 */ - u64 reserved2[(0x1000 - 0x500) / 8]; -/* 0x1000 */ -}; - -#define QPX_SQADDER EHCA_BMASK_IBM(48, 63) -#define QPX_RQADDER EHCA_BMASK_IBM(48, 63) -#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3) - -#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x) - -/* MRMWPT Entry Memory Map */ -struct hipz_mrmwmm { - /* 0x00 */ - u64 mrx_hcr; - - u64 mrx_c; - u64 mrx_herr; - u64 mrx_aer; - /* 0x20 */ - u64 mrx_pp; - u64 reserved1; - u64 reserved2; - u64 reserved3; - /* 0x40 */ - u64 reserved4[(0x200 - 0x40) / 8]; - /* 0x200 */ - u64 mrx_ctl[64]; - -}; - -#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x) - -struct hipz_qpedmm { - /* 0x00 */ - u64 reserved0[(0x400) / 8]; - /* 0x400 */ - u64 qpedx_phh; - u64 qpedx_ppsgp; - /* 0x410 */ - u64 qpedx_ppsgu; - u64 qpedx_ppdgp; - /* 0x420 */ - u64 qpedx_ppdgu; - u64 qpedx_aph; - /* 0x430 */ - u64 qpedx_apsgp; - u64 qpedx_apsgu; - /* 0x440 */ - u64 qpedx_apdgp; - u64 qpedx_apdgu; - /* 0x450 */ - u64 qpedx_apav; - u64 qpedx_apsav; - /* 0x460 */ - u64 qpedx_hcr; - u64 reserved1[4]; - /* 0x488 */ - u64 qpedx_rrl0; - /* 0x490 */ - u64 qpedx_rrrkey0; - u64 qpedx_rrva0; - /* 0x4a0 */ - u64 reserved2; - u64 qpedx_rrl1; - /* 0x4b0 */ - u64 qpedx_rrrkey1; - u64 qpedx_rrva1; - /* 0x4c0 */ - u64 reserved3; - u64 qpedx_rrl2; - /* 0x4d0 */ - u64 qpedx_rrrkey2; - u64 qpedx_rrva2; - /* 0x4e0 */ - u64 reserved4; - u64 qpedx_rrl3; - /* 0x4f0 */ - u64 qpedx_rrrkey3; - u64 qpedx_rrva3; -}; - -#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x) - -/* CQ Table Entry Memory Map */ -struct hipz_cqtemm { - u64 cqx_hcr; - u64 cqx_c; - u64 cqx_herr; - u64 cqx_aer; -/* 0x20 */ - u64 cqx_ptp; - u64 cqx_tp; - u64 cqx_fec; - u64 cqx_feca; -/* 0x40 */ - u64 cqx_ep; - u64 cqx_eq; -/* 0x50 */ - u64 reserved1; - u64 cqx_n0; -/* 0x60 */ - u64 cqx_n1; - u64 reserved2[(0x1000 - 0x60) / 8]; -/* 0x1000 */ -}; - -#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32, 63) -#define CQX_FECADDER EHCA_BMASK_IBM(32, 63) -#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0) -#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0) - -#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x) - -/* EQ Table Entry Memory Map */ -struct hipz_eqtemm { - u64 eqx_hcr; - u64 eqx_c; - - u64 eqx_herr; - u64 eqx_aer; -/* 0x20 */ - u64 eqx_ptp; - u64 eqx_tp; - u64 eqx_ssba; - u64 eqx_psba; - -/* 0x40 */ - u64 eqx_cec; - u64 eqx_meql; - u64 eqx_xisbi; - u64 eqx_xisc; -/* 0x60 */ - u64 eqx_it; - -}; - -#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x) - -/* access control defines for MR/MW */ -#define HIPZ_ACCESSCTRL_L_WRITE 0x00800000 -#define HIPZ_ACCESSCTRL_R_WRITE 0x00400000 -#define HIPZ_ACCESSCTRL_R_READ 0x00200000 -#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000 -#define HIPZ_ACCESSCTRL_MW_BIND 0x00080000 - -/* query hca response block */ -struct hipz_query_hca { - u32 cur_reliable_dg; - u32 cur_qp; - u32 cur_cq; - u32 cur_eq; - u32 cur_mr; - u32 cur_mw; - u32 cur_ee_context; - u32 cur_mcast_grp; - u32 cur_qp_attached_mcast_grp; - u32 reserved1; - u32 cur_ipv6_qp; - u32 cur_eth_qp; - u32 cur_hp_mr; - u32 reserved2[3]; - u32 max_rd_domain; - u32 max_qp; - u32 max_cq; - u32 max_eq; - u32 max_mr; - u32 max_hp_mr; - u32 max_mw; - u32 max_mrwpte; - u32 max_special_mrwpte; - u32 max_rd_ee_context; - u32 max_mcast_grp; - u32 max_total_mcast_qp_attach; - u32 max_mcast_qp_attach; - u32 max_raw_ipv6_qp; - u32 max_raw_ethy_qp; - u32 internal_clock_frequency; - u32 max_pd; - u32 max_ah; - u32 max_cqe; - u32 max_wqes_wq; - u32 max_partitions; - u32 max_rr_ee_context; - u32 max_rr_qp; - u32 max_rr_hca; - u32 max_act_wqs_ee_context; - u32 max_act_wqs_qp; - u32 max_sge; - u32 max_sge_rd; - u32 memory_page_size_supported; - u64 max_mr_size; - u32 local_ca_ack_delay; - u32 num_ports; - u32 vendor_id; - u32 vendor_part_id; - u32 hw_ver; - u64 node_guid; - u64 hca_cap_indicators; - u32 data_counter_register_size; - u32 max_shared_rq; - u32 max_isns_eq; - u32 max_neq; -} __attribute__ ((packed)); - -#define HCA_CAP_AH_PORT_NR_CHECK EHCA_BMASK_IBM( 0, 0) -#define HCA_CAP_ATOMIC EHCA_BMASK_IBM( 1, 1) -#define HCA_CAP_AUTO_PATH_MIG EHCA_BMASK_IBM( 2, 2) -#define HCA_CAP_BAD_P_KEY_CTR EHCA_BMASK_IBM( 3, 3) -#define HCA_CAP_SQD_RTS_PORT_CHANGE EHCA_BMASK_IBM( 4, 4) -#define HCA_CAP_CUR_QP_STATE_MOD EHCA_BMASK_IBM( 5, 5) -#define HCA_CAP_INIT_TYPE EHCA_BMASK_IBM( 6, 6) -#define HCA_CAP_PORT_ACTIVE_EVENT EHCA_BMASK_IBM( 7, 7) -#define HCA_CAP_Q_KEY_VIOL_CTR EHCA_BMASK_IBM( 8, 8) -#define HCA_CAP_WQE_RESIZE EHCA_BMASK_IBM( 9, 9) -#define HCA_CAP_RAW_PACKET_MCAST EHCA_BMASK_IBM(10, 10) -#define HCA_CAP_SHUTDOWN_PORT EHCA_BMASK_IBM(11, 11) -#define HCA_CAP_RC_LL_QP EHCA_BMASK_IBM(12, 12) -#define HCA_CAP_SRQ EHCA_BMASK_IBM(13, 13) -#define HCA_CAP_UD_LL_QP EHCA_BMASK_IBM(16, 16) -#define HCA_CAP_RESIZE_MR EHCA_BMASK_IBM(17, 17) -#define HCA_CAP_MINI_QP EHCA_BMASK_IBM(18, 18) -#define HCA_CAP_H_ALLOC_RES_SYNC EHCA_BMASK_IBM(19, 19) - -/* query port response block */ -struct hipz_query_port { - u32 state; - u32 bad_pkey_cntr; - u32 lmc; - u32 lid; - u32 subnet_timeout; - u32 qkey_viol_cntr; - u32 sm_sl; - u32 sm_lid; - u32 capability_mask; - u32 init_type_reply; - u32 pkey_tbl_len; - u32 gid_tbl_len; - u64 gid_prefix; - u32 port_nr; - u16 pkey_entries[16]; - u8 reserved1[32]; - u32 trent_size; - u32 trbuf_size; - u64 max_msg_sz; - u32 max_mtu; - u32 vl_cap; - u32 phys_pstate; - u32 phys_state; - u32 phys_speed; - u32 phys_width; - u8 reserved2[1884]; - u64 guid_entries[255]; -} __attribute__ ((packed)); - -#endif diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c deleted file mode 100644 index 7ffc748cb973..000000000000 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c +++ /dev/null @@ -1,289 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * internal queue handling - * - * Authors: Waleri Fomin - * Reinhard Ernst - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include "ehca_tools.h" -#include "ipz_pt_fn.h" -#include "ehca_classes.h" - -#define PAGES_PER_KPAGE (PAGE_SIZE >> EHCA_PAGESHIFT) - -struct kmem_cache *small_qp_cache; - -void *ipz_qpageit_get_inc(struct ipz_queue *queue) -{ - void *ret = ipz_qeit_get(queue); - queue->current_q_offset += queue->pagesize; - if (queue->current_q_offset > queue->queue_length) { - queue->current_q_offset -= queue->pagesize; - ret = NULL; - } - if (((u64)ret) % queue->pagesize) { - ehca_gen_err("ERROR!! not at PAGE-Boundary"); - return NULL; - } - return ret; -} - -void *ipz_qeit_eq_get_inc(struct ipz_queue *queue) -{ - void *ret = ipz_qeit_get(queue); - u64 last_entry_in_q = queue->queue_length - queue->qe_size; - - queue->current_q_offset += queue->qe_size; - if (queue->current_q_offset > last_entry_in_q) { - queue->current_q_offset = 0; - queue->toggle_state = (~queue->toggle_state) & 1; - } - - return ret; -} - -int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset) -{ - int i; - for (i = 0; i < queue->queue_length / queue->pagesize; i++) { - u64 page = __pa(queue->queue_pages[i]); - if (addr >= page && addr < page + queue->pagesize) { - *q_offset = addr - page + i * queue->pagesize; - return 0; - } - } - return -EINVAL; -} - -#if PAGE_SHIFT < EHCA_PAGESHIFT -#error Kernel pages must be at least as large than eHCA pages (4K) ! -#endif - -/* - * allocate pages for queue: - * outer loop allocates whole kernel pages (page aligned) and - * inner loop divides a kernel page into smaller hca queue pages - */ -static int alloc_queue_pages(struct ipz_queue *queue, const u32 nr_of_pages) -{ - int k, f = 0; - u8 *kpage; - - while (f < nr_of_pages) { - kpage = (u8 *)get_zeroed_page(GFP_KERNEL); - if (!kpage) - goto out; - - for (k = 0; k < PAGES_PER_KPAGE && f < nr_of_pages; k++) { - queue->queue_pages[f] = (struct ipz_page *)kpage; - kpage += EHCA_PAGESIZE; - f++; - } - } - return 1; - -out: - for (f = 0; f < nr_of_pages && queue->queue_pages[f]; - f += PAGES_PER_KPAGE) - free_page((unsigned long)(queue->queue_pages)[f]); - return 0; -} - -static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) -{ - int order = ilog2(queue->pagesize) - 9; - struct ipz_small_queue_page *page; - unsigned long bit; - - mutex_lock(&pd->lock); - - if (!list_empty(&pd->free[order])) - page = list_entry(pd->free[order].next, - struct ipz_small_queue_page, list); - else { - page = kmem_cache_zalloc(small_qp_cache, GFP_KERNEL); - if (!page) - goto out; - - page->page = get_zeroed_page(GFP_KERNEL); - if (!page->page) { - kmem_cache_free(small_qp_cache, page); - goto out; - } - - list_add(&page->list, &pd->free[order]); - } - - bit = find_first_zero_bit(page->bitmap, IPZ_SPAGE_PER_KPAGE >> order); - __set_bit(bit, page->bitmap); - page->fill++; - - if (page->fill == IPZ_SPAGE_PER_KPAGE >> order) - list_move(&page->list, &pd->full[order]); - - mutex_unlock(&pd->lock); - - queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9))); - queue->small_page = page; - queue->offset = bit << (order + 9); - return 1; - -out: - ehca_err(pd->ib_pd.device, "failed to allocate small queue page"); - mutex_unlock(&pd->lock); - return 0; -} - -static void free_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) -{ - int order = ilog2(queue->pagesize) - 9; - struct ipz_small_queue_page *page = queue->small_page; - unsigned long bit; - int free_page = 0; - - bit = ((unsigned long)queue->queue_pages[0] & ~PAGE_MASK) - >> (order + 9); - - mutex_lock(&pd->lock); - - __clear_bit(bit, page->bitmap); - page->fill--; - - if (page->fill == 0) { - list_del(&page->list); - free_page = 1; - } - - if (page->fill == (IPZ_SPAGE_PER_KPAGE >> order) - 1) - /* the page was full until we freed the chunk */ - list_move_tail(&page->list, &pd->free[order]); - - mutex_unlock(&pd->lock); - - if (free_page) { - free_page(page->page); - kmem_cache_free(small_qp_cache, page); - } -} - -int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, - const u32 nr_of_pages, const u32 pagesize, - const u32 qe_size, const u32 nr_of_sg, - int is_small) -{ - if (pagesize > PAGE_SIZE) { - ehca_gen_err("FATAL ERROR: pagesize=%x " - "is greater than kernel page size", pagesize); - return 0; - } - - /* init queue fields */ - queue->queue_length = nr_of_pages * pagesize; - queue->pagesize = pagesize; - queue->qe_size = qe_size; - queue->act_nr_of_sg = nr_of_sg; - queue->current_q_offset = 0; - queue->toggle_state = 1; - queue->small_page = NULL; - - /* allocate queue page pointers */ - queue->queue_pages = kzalloc(nr_of_pages * sizeof(void *), - GFP_KERNEL | __GFP_NOWARN); - if (!queue->queue_pages) { - queue->queue_pages = vzalloc(nr_of_pages * sizeof(void *)); - if (!queue->queue_pages) { - ehca_gen_err("Couldn't allocate queue page list"); - return 0; - } - } - - /* allocate actual queue pages */ - if (is_small) { - if (!alloc_small_queue_page(queue, pd)) - goto ipz_queue_ctor_exit0; - } else - if (!alloc_queue_pages(queue, nr_of_pages)) - goto ipz_queue_ctor_exit0; - - return 1; - -ipz_queue_ctor_exit0: - ehca_gen_err("Couldn't alloc pages queue=%p " - "nr_of_pages=%x", queue, nr_of_pages); - kvfree(queue->queue_pages); - - return 0; -} - -int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue) -{ - int i, nr_pages; - - if (!queue || !queue->queue_pages) { - ehca_gen_dbg("queue or queue_pages is NULL"); - return 0; - } - - if (queue->small_page) - free_small_queue_page(queue, pd); - else { - nr_pages = queue->queue_length / queue->pagesize; - for (i = 0; i < nr_pages; i += PAGES_PER_KPAGE) - free_page((unsigned long)queue->queue_pages[i]); - } - - kvfree(queue->queue_pages); - - return 1; -} - -int ehca_init_small_qp_cache(void) -{ - small_qp_cache = kmem_cache_create("ehca_cache_small_qp", - sizeof(struct ipz_small_queue_page), - 0, SLAB_HWCACHE_ALIGN, NULL); - if (!small_qp_cache) - return -ENOMEM; - - return 0; -} - -void ehca_cleanup_small_qp_cache(void) -{ - kmem_cache_destroy(small_qp_cache); -} diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.h b/drivers/infiniband/hw/ehca/ipz_pt_fn.h deleted file mode 100644 index a801274ea337..000000000000 --- a/drivers/infiniband/hw/ehca/ipz_pt_fn.h +++ /dev/null @@ -1,289 +0,0 @@ -/* - * IBM eServer eHCA Infiniband device driver for Linux on POWER - * - * internal queue handling - * - * Authors: Waleri Fomin - * Reinhard Ernst - * Christoph Raisch - * - * Copyright (c) 2005 IBM Corporation - * - * All rights reserved. - * - * This source code is distributed under a dual license of GPL v2.0 and OpenIB - * BSD. - * - * OpenIB BSD License - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * Redistributions of source code must retain the above copyright notice, this - * list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER - * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __IPZ_PT_FN_H__ -#define __IPZ_PT_FN_H__ - -#define EHCA_PAGESHIFT 12 -#define EHCA_PAGESIZE 4096UL -#define EHCA_PAGEMASK (~(EHCA_PAGESIZE-1)) -#define EHCA_PT_ENTRIES 512UL - -#include "ehca_tools.h" -#include "ehca_qes.h" - -struct ehca_pd; -struct ipz_small_queue_page; - -extern struct kmem_cache *small_qp_cache; - -/* struct generic ehca page */ -struct ipz_page { - u8 entries[EHCA_PAGESIZE]; -}; - -#define IPZ_SPAGE_PER_KPAGE (PAGE_SIZE / 512) - -struct ipz_small_queue_page { - unsigned long page; - unsigned long bitmap[IPZ_SPAGE_PER_KPAGE / BITS_PER_LONG]; - int fill; - void *mapped_addr; - u32 mmap_count; - struct list_head list; -}; - -/* struct generic queue in linux kernel virtual memory (kv) */ -struct ipz_queue { - u64 current_q_offset; /* current queue entry */ - - struct ipz_page **queue_pages; /* array of pages belonging to queue */ - u32 qe_size; /* queue entry size */ - u32 act_nr_of_sg; - u32 queue_length; /* queue length allocated in bytes */ - u32 pagesize; - u32 toggle_state; /* toggle flag - per page */ - u32 offset; /* save offset within page for small_qp */ - struct ipz_small_queue_page *small_page; -}; - -/* - * return current Queue Entry for a certain q_offset - * returns address (kv) of Queue Entry - */ -static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset) -{ - struct ipz_page *current_page; - if (q_offset >= queue->queue_length) - return NULL; - current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT]; - return ¤t_page->entries[q_offset & (EHCA_PAGESIZE - 1)]; -} - -/* - * return current Queue Entry - * returns address (kv) of Queue Entry - */ -static inline void *ipz_qeit_get(struct ipz_queue *queue) -{ - return ipz_qeit_calc(queue, queue->current_q_offset); -} - -/* - * return current Queue Page , increment Queue Page iterator from - * page to page in struct ipz_queue, last increment will return 0! and - * NOT wrap - * returns address (kv) of Queue Page - * warning don't use in parallel with ipz_QE_get_inc() - */ -void *ipz_qpageit_get_inc(struct ipz_queue *queue); - -/* - * return current Queue Entry, increment Queue Entry iterator by one - * step in struct ipz_queue, will wrap in ringbuffer - * returns address (kv) of Queue Entry BEFORE increment - * warning don't use in parallel with ipz_qpageit_get_inc() - */ -static inline void *ipz_qeit_get_inc(struct ipz_queue *queue) -{ - void *ret = ipz_qeit_get(queue); - queue->current_q_offset += queue->qe_size; - if (queue->current_q_offset >= queue->queue_length) { - queue->current_q_offset = 0; - /* toggle the valid flag */ - queue->toggle_state = (~queue->toggle_state) & 1; - } - - return ret; -} - -/* - * return a bool indicating whether current Queue Entry is valid - */ -static inline int ipz_qeit_is_valid(struct ipz_queue *queue) -{ - struct ehca_cqe *cqe = ipz_qeit_get(queue); - return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1)); -} - -/* - * return current Queue Entry, increment Queue Entry iterator by one - * step in struct ipz_queue, will wrap in ringbuffer - * returns address (kv) of Queue Entry BEFORE increment - * returns 0 and does not increment, if wrong valid state - * warning don't use in parallel with ipz_qpageit_get_inc() - */ -static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue) -{ - return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL; -} - -/* - * returns and resets Queue Entry iterator - * returns address (kv) of first Queue Entry - */ -static inline void *ipz_qeit_reset(struct ipz_queue *queue) -{ - queue->current_q_offset = 0; - return ipz_qeit_get(queue); -} - -/* - * return the q_offset corresponding to an absolute address - */ -int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset); - -/* - * return the next queue offset. don't modify the queue. - */ -static inline u64 ipz_queue_advance_offset(struct ipz_queue *queue, u64 offset) -{ - offset += queue->qe_size; - if (offset >= queue->queue_length) offset = 0; - return offset; -} - -/* struct generic page table */ -struct ipz_pt { - u64 entries[EHCA_PT_ENTRIES]; -}; - -/* struct page table for a queue, only to be used in pf */ -struct ipz_qpt { - /* queue page tables (kv), use u64 because we know the element length */ - u64 *qpts; - u32 n_qpts; - u32 n_ptes; /* number of page table entries */ - u64 *current_pte_addr; -}; - -/* - * constructor for a ipz_queue_t, placement new for ipz_queue_t, - * new for all dependent datastructors - * all QP Tables are the same - * flow: - * allocate+pin queue - * see ipz_qpt_ctor() - * returns true if ok, false if out of memory - */ -int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, - const u32 nr_of_pages, const u32 pagesize, - const u32 qe_size, const u32 nr_of_sg, - int is_small); - -/* - * destructor for a ipz_queue_t - * -# free queue - * see ipz_queue_ctor() - * returns true if ok, false if queue was NULL-ptr of free failed - */ -int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue); - -/* - * constructor for a ipz_qpt_t, - * placement new for struct ipz_queue, new for all dependent datastructors - * all QP Tables are the same, - * flow: - * -# allocate+pin queue - * -# initialise ptcb - * -# allocate+pin PTs - * -# link PTs to a ring, according to HCA Arch, set bit62 id needed - * -# the ring must have room for exactly nr_of_PTEs - * see ipz_qpt_ctor() - */ -void ipz_qpt_ctor(struct ipz_qpt *qpt, - const u32 nr_of_qes, - const u32 pagesize, - const u32 qe_size, - const u8 lowbyte, const u8 toggle, - u32 * act_nr_of_QEs, u32 * act_nr_of_pages); - -/* - * return current Queue Entry, increment Queue Entry iterator by one - * step in struct ipz_queue, will wrap in ringbuffer - * returns address (kv) of Queue Entry BEFORE increment - * warning don't use in parallel with ipz_qpageit_get_inc() - * warning unpredictable results may occur if steps>act_nr_of_queue_entries - * fix EQ page problems - */ -void *ipz_qeit_eq_get_inc(struct ipz_queue *queue); - -/* - * return current Event Queue Entry, increment Queue Entry iterator - * by one step in struct ipz_queue if valid, will wrap in ringbuffer - * returns address (kv) of Queue Entry BEFORE increment - * returns 0 and does not increment, if wrong valid state - * warning don't use in parallel with ipz_queue_QPageit_get_inc() - * warning unpredictable results may occur if steps>act_nr_of_queue_entries - */ -static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) -{ - void *ret = ipz_qeit_get(queue); - u32 qe = *(u8 *)ret; - if ((qe >> 7) != (queue->toggle_state & 1)) - return NULL; - ipz_qeit_eq_get_inc(queue); /* this is a good one */ - return ret; -} - -static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue) -{ - void *ret = ipz_qeit_get(queue); - u32 qe = *(u8 *)ret; - if ((qe >> 7) != (queue->toggle_state & 1)) - return NULL; - return ret; -} - -/* returns address (GX) of first queue entry */ -static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt) -{ - return be64_to_cpu(qpt->qpts[0]); -} - -/* returns address (kv) of first page of queue page table */ -static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt) -{ - return qpt->qpts; -} - -#endif /* __IPZ_PT_FN_H__ */ diff --git a/drivers/irqchip/exynos-combiner.c b/drivers/irqchip/exynos-combiner.c index e9c6f2a5b52d..cd7d3bc78e34 100644 --- a/drivers/irqchip/exynos-combiner.c +++ b/drivers/irqchip/exynos-combiner.c @@ -65,12 +65,10 @@ static void combiner_unmask_irq(struct irq_data *data) __raw_writel(mask, combiner_base(data) + COMBINER_ENABLE_SET); } -static void combiner_handle_cascade_irq(unsigned int __irq, - struct irq_desc *desc) +static void combiner_handle_cascade_irq(struct irq_desc *desc) { struct combiner_chip_data *chip_data = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); - unsigned int irq = irq_desc_get_irq(desc); unsigned int cascade_irq, combiner_irq; unsigned long status; @@ -88,7 +86,7 @@ static void combiner_handle_cascade_irq(unsigned int __irq, cascade_irq = irq_find_mapping(combiner_irq_domain, combiner_irq); if (unlikely(!cascade_irq)) - handle_bad_irq(irq, desc); + handle_bad_irq(desc); else generic_handle_irq(cascade_irq); @@ -165,7 +163,7 @@ static int combiner_irq_domain_map(struct irq_domain *d, unsigned int irq, irq_set_chip_and_handler(irq, &combiner_chip, handle_level_irq); irq_set_chip_data(irq, &combiner_data[hw >> 3]); - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); + irq_set_probe(irq); return 0; } diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index 39b72da0c143..655cb967a1f2 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -200,7 +200,6 @@ static int armada_370_xp_msi_map(struct irq_domain *domain, unsigned int virq, { irq_set_chip_and_handler(virq, &armada_370_xp_msi_irq_chip, handle_simple_irq); - set_irq_flags(virq, IRQF_VALID); return 0; } @@ -317,7 +316,7 @@ static int armada_370_xp_mpic_irq_map(struct irq_domain *h, irq_set_chip_and_handler(virq, &armada_370_xp_irq_chip, handle_level_irq); } - set_irq_flags(virq, IRQF_VALID | IRQF_PROBE); + irq_set_probe(virq); return 0; } @@ -447,8 +446,7 @@ static void armada_370_xp_handle_msi_irq(struct pt_regs *regs, bool is_chained) static void armada_370_xp_handle_msi_irq(struct pt_regs *r, bool b) {} #endif -static void armada_370_xp_mpic_handle_cascade_irq(unsigned int irq, - struct irq_desc *desc) +static void armada_370_xp_mpic_handle_cascade_irq(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); unsigned long irqmap, irqn, irqsrc, cpuid; diff --git a/drivers/irqchip/irq-bcm2835.c b/drivers/irqchip/irq-bcm2835.c index ed4ca9deca70..bf9cc5f2e839 100644 --- a/drivers/irqchip/irq-bcm2835.c +++ b/drivers/irqchip/irq-bcm2835.c @@ -96,7 +96,7 @@ struct armctrl_ic { static struct armctrl_ic intc __read_mostly; static void __exception_irq_entry bcm2835_handle_irq( struct pt_regs *regs); -static void bcm2836_chained_handle_irq(unsigned int irq, struct irq_desc *desc); +static void bcm2836_chained_handle_irq(struct irq_desc *desc); static void armctrl_mask_irq(struct irq_data *d) { @@ -166,7 +166,7 @@ static int __init armctrl_of_init(struct device_node *node, BUG_ON(irq <= 0); irq_set_chip_and_handler(irq, &armctrl_chip, handle_level_irq); - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); + irq_set_probe(irq); } } @@ -245,7 +245,7 @@ static void __exception_irq_entry bcm2835_handle_irq( handle_IRQ(irq_linear_revmap(intc.domain, hwirq), regs); } -static void bcm2836_chained_handle_irq(unsigned int irq, struct irq_desc *desc) +static void bcm2836_chained_handle_irq(struct irq_desc *desc) { u32 hwirq; diff --git a/drivers/irqchip/irq-bcm7038-l1.c b/drivers/irqchip/irq-bcm7038-l1.c index 409bdc6366c2..0fea985ef1dc 100644 --- a/drivers/irqchip/irq-bcm7038-l1.c +++ b/drivers/irqchip/irq-bcm7038-l1.c @@ -115,7 +115,7 @@ static inline void l1_writel(u32 val, void __iomem *reg) writel(val, reg); } -static void bcm7038_l1_irq_handle(unsigned int irq, struct irq_desc *desc) +static void bcm7038_l1_irq_handle(struct irq_desc *desc) { struct bcm7038_l1_chip *intc = irq_desc_get_handler_data(desc); struct bcm7038_l1_cpu *cpu; diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c index d3f976913a6f..61b18ab33ad9 100644 --- a/drivers/irqchip/irq-bcm7120-l2.c +++ b/drivers/irqchip/irq-bcm7120-l2.c @@ -56,7 +56,7 @@ struct bcm7120_l2_intc_data { const __be32 *map_mask_prop; }; -static void bcm7120_l2_intc_irq_handle(unsigned int irq, struct irq_desc *desc) +static void bcm7120_l2_intc_irq_handle(struct irq_desc *desc) { struct bcm7120_l1_intc_data *data = irq_desc_get_handler_data(desc); struct bcm7120_l2_intc_data *b = data->b; diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c index aedda06191eb..65cd341f331a 100644 --- a/drivers/irqchip/irq-brcmstb-l2.c +++ b/drivers/irqchip/irq-brcmstb-l2.c @@ -49,13 +49,12 @@ struct brcmstb_l2_intc_data { u32 saved_mask; /* for suspend/resume */ }; -static void brcmstb_l2_intc_irq_handle(unsigned int __irq, - struct irq_desc *desc) +static void brcmstb_l2_intc_irq_handle(struct irq_desc *desc) { struct brcmstb_l2_intc_data *b = irq_desc_get_handler_data(desc); struct irq_chip_generic *gc = irq_get_domain_generic_chip(b->domain, 0); struct irq_chip *chip = irq_desc_get_chip(desc); - unsigned int irq = irq_desc_get_irq(desc); + unsigned int irq; u32 status; chained_irq_enter(chip, desc); @@ -65,7 +64,7 @@ static void brcmstb_l2_intc_irq_handle(unsigned int __irq, if (status == 0) { raw_spin_lock(&desc->lock); - handle_bad_irq(irq, desc); + handle_bad_irq(desc); raw_spin_unlock(&desc->lock); goto out; } diff --git a/drivers/irqchip/irq-clps711x.c b/drivers/irqchip/irq-clps711x.c index 2dd929eed9e0..eb5eb0cd414d 100644 --- a/drivers/irqchip/irq-clps711x.c +++ b/drivers/irqchip/irq-clps711x.c @@ -132,14 +132,14 @@ static int __init clps711x_intc_irq_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) { irq_flow_handler_t handler = handle_level_irq; - unsigned int flags = IRQF_VALID | IRQF_PROBE; + unsigned int flags = 0; if (!clps711x_irqs[hw].flags) return 0; if (clps711x_irqs[hw].flags & CLPS711X_FLAG_FIQ) { handler = handle_bad_irq; - flags |= IRQF_NOAUTOEN; + flags |= IRQ_NOAUTOEN; } else if (clps711x_irqs[hw].eoi) { handler = handle_fasteoi_irq; } @@ -149,7 +149,7 @@ static int __init clps711x_intc_irq_map(struct irq_domain *h, unsigned int virq, writel_relaxed(0, clps711x_intc->base + clps711x_irqs[hw].eoi); irq_set_chip_and_handler(virq, &clps711x_intc_chip, handler); - set_irq_flags(virq, flags); + irq_modify_status(virq, IRQ_NOPROBE, flags); return 0; } diff --git a/drivers/irqchip/irq-dw-apb-ictl.c b/drivers/irqchip/irq-dw-apb-ictl.c index efd95d9955e7..052f266364c0 100644 --- a/drivers/irqchip/irq-dw-apb-ictl.c +++ b/drivers/irqchip/irq-dw-apb-ictl.c @@ -26,7 +26,7 @@ #define APB_INT_FINALSTATUS_H 0x34 #define APB_INT_BASE_OFFSET 0x04 -static void dw_apb_ictl_handler(unsigned int irq, struct irq_desc *desc) +static void dw_apb_ictl_handler(struct irq_desc *desc) { struct irq_domain *d = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index db04fc1f56b2..12985daa66ab 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -95,8 +95,8 @@ static void gicv2m_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) struct v2m_data *v2m = irq_data_get_irq_chip_data(data); phys_addr_t addr = v2m->res.start + V2M_MSI_SETSPI_NS; - msg->address_hi = (u32) (addr >> 32); - msg->address_lo = (u32) (addr); + msg->address_hi = upper_32_bits(addr); + msg->address_lo = lower_32_bits(addr); msg->data = data->hwirq; } diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 26b55c53755f..ac7ae2b3cb83 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -898,8 +898,10 @@ retry_baser: * non-cacheable as well. */ shr = tmp & GITS_BASER_SHAREABILITY_MASK; - if (!shr) + if (!shr) { cache = GITS_BASER_nC; + __flush_dcache_area(base, alloc_size); + } goto retry_baser; } @@ -1140,6 +1142,8 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id, return NULL; } + __flush_dcache_area(itt, sz); + dev->its = its; dev->itt = itt; dev->nr_ites = nr_ites; diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 7deed6ef54c2..36ecfc870e5a 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -70,11 +70,6 @@ static inline int gic_irq_in_rdist(struct irq_data *d) return gic_irq(d) < 32; } -static inline bool forwarded_irq(struct irq_data *d) -{ - return d->handler_data != NULL; -} - static inline void __iomem *gic_dist_base(struct irq_data *d) { if (gic_irq_in_rdist(d)) /* SGI+PPI -> SGI_base for this CPU */ @@ -249,7 +244,7 @@ static void gic_eoimode1_mask_irq(struct irq_data *d) * disabled/masked will not get "stuck", because there is * noone to deactivate it (guest is being terminated). */ - if (forwarded_irq(d)) + if (irqd_is_forwarded_to_vcpu(d)) gic_poke_irq(d, GICD_ICACTIVER); } @@ -324,7 +319,7 @@ static void gic_eoimode1_eoi_irq(struct irq_data *d) * No need to deactivate an LPI, or an interrupt that * is is getting forwarded to a vcpu. */ - if (gic_irq(d) >= 8192 || forwarded_irq(d)) + if (gic_irq(d) >= 8192 || irqd_is_forwarded_to_vcpu(d)) return; gic_write_dir(gic_irq(d)); } @@ -357,7 +352,10 @@ static int gic_set_type(struct irq_data *d, unsigned int type) static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) { - d->handler_data = vcpu; + if (vcpu) + irqd_set_forwarded_to_vcpu(d); + else + irqd_clr_forwarded_to_vcpu(d); return 0; } @@ -754,13 +752,13 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, irq_set_percpu_devid(irq); irq_domain_set_info(d, irq, hw, chip, d->host_data, handle_percpu_devid_irq, NULL, NULL); - set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN); + irq_set_status_flags(irq, IRQ_NOAUTOEN); } /* SPIs */ if (hw >= 32 && hw < gic_data.irq_nr) { irq_domain_set_info(d, irq, hw, chip, d->host_data, handle_fasteoi_irq, NULL, NULL); - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); + irq_set_probe(irq); } /* LPIs */ if (hw >= 8192 && hw < GIC_ID_NR) { @@ -768,7 +766,6 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, return -EPERM; irq_domain_set_info(d, irq, hw, chip, d->host_data, handle_fasteoi_irq, NULL, NULL); - set_irq_flags(irq, IRQF_VALID); } return 0; diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index e6b7ed537952..982c09c2d791 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -145,29 +145,10 @@ static inline bool cascading_gic_irq(struct irq_data *d) void *data = irq_data_get_irq_handler_data(d); /* - * If handler_data pointing to one of the secondary GICs, then - * this is a cascading interrupt, and it cannot possibly be - * forwarded. + * If handler_data is set, this is a cascading interrupt, and + * it cannot possibly be forwarded. */ - if (data >= (void *)(gic_data + 1) && - data < (void *)(gic_data + MAX_GIC_NR)) - return true; - - return false; -} - -static inline bool forwarded_irq(struct irq_data *d) -{ - /* - * A forwarded interrupt: - * - is on the primary GIC - * - has its handler_data set to a value - * - that isn't a secondary GIC - */ - if (d->handler_data && !cascading_gic_irq(d)) - return true; - - return false; + return data != NULL; } /* @@ -201,7 +182,7 @@ static void gic_eoimode1_mask_irq(struct irq_data *d) * disabled/masked will not get "stuck", because there is * noone to deactivate it (guest is being terminated). */ - if (forwarded_irq(d)) + if (irqd_is_forwarded_to_vcpu(d)) gic_poke_irq(d, GIC_DIST_ACTIVE_CLEAR); } @@ -218,7 +199,7 @@ static void gic_eoi_irq(struct irq_data *d) static void gic_eoimode1_eoi_irq(struct irq_data *d) { /* Do not deactivate an IRQ forwarded to a vcpu. */ - if (forwarded_irq(d)) + if (irqd_is_forwarded_to_vcpu(d)) return; writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_DEACTIVATE); @@ -296,7 +277,10 @@ static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) if (cascading_gic_irq(d)) return -EINVAL; - d->handler_data = vcpu; + if (vcpu) + irqd_set_forwarded_to_vcpu(d); + else + irqd_clr_forwarded_to_vcpu(d); return 0; } @@ -357,7 +341,7 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) } while (1); } -static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc) +static void gic_handle_cascade_irq(struct irq_desc *desc) { struct gic_chip_data *chip_data = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); @@ -376,7 +360,7 @@ static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc) cascade_irq = irq_find_mapping(chip_data->domain, gic_irq); if (unlikely(gic_irq < 32 || gic_irq > 1020)) - handle_bad_irq(cascade_irq, desc); + handle_bad_irq(desc); else generic_handle_irq(cascade_irq); @@ -906,11 +890,11 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, irq_set_percpu_devid(irq); irq_domain_set_info(d, irq, hw, chip, d->host_data, handle_percpu_devid_irq, NULL, NULL); - set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN); + irq_set_status_flags(irq, IRQ_NOAUTOEN); } else { irq_domain_set_info(d, irq, hw, chip, d->host_data, handle_fasteoi_irq, NULL, NULL); - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); + irq_set_probe(irq); } return 0; } @@ -1119,12 +1103,49 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start, #ifdef CONFIG_OF static int gic_cnt __initdata; +static bool gic_check_eoimode(struct device_node *node, void __iomem **base) +{ + struct resource cpuif_res; + + of_address_to_resource(node, 1, &cpuif_res); + + if (!is_hyp_mode_available()) + return false; + if (resource_size(&cpuif_res) < SZ_8K) + return false; + if (resource_size(&cpuif_res) == SZ_128K) { + u32 val_low, val_high; + + /* + * Verify that we have the first 4kB of a GIC400 + * aliased over the first 64kB by checking the + * GICC_IIDR register on both ends. + */ + val_low = readl_relaxed(*base + GIC_CPU_IDENT); + val_high = readl_relaxed(*base + GIC_CPU_IDENT + 0xf000); + if ((val_low & 0xffff0fff) != 0x0202043B || + val_low != val_high) + return false; + + /* + * Move the base up by 60kB, so that we have a 8kB + * contiguous region, which allows us to use GICC_DIR + * at its normal offset. Please pass me that bucket. + */ + *base += 0xf000; + cpuif_res.start += 0xf000; + pr_warn("GIC: Adjusting CPU interface base to %pa", + &cpuif_res.start); + } + + return true; +} + static int __init gic_of_init(struct device_node *node, struct device_node *parent) { void __iomem *cpu_base; void __iomem *dist_base; - struct resource cpu_res; u32 percpu_offset; int irq; @@ -1137,14 +1158,11 @@ gic_of_init(struct device_node *node, struct device_node *parent) cpu_base = of_iomap(node, 1); WARN(!cpu_base, "unable to map gic cpu registers\n"); - of_address_to_resource(node, 1, &cpu_res); - /* * Disable split EOI/Deactivate if either HYP is not available * or the CPU interface is too small. */ - if (gic_cnt == 0 && (!is_hyp_mode_available() || - resource_size(&cpu_res) < SZ_8K)) + if (gic_cnt == 0 && !gic_check_eoimode(node, &cpu_base)) static_key_slow_dec(&supports_deactivate); if (of_property_read_u32(node, "cpu-offset", &percpu_offset)) diff --git a/drivers/irqchip/irq-hip04.c b/drivers/irqchip/irq-hip04.c index a0128c7c98dd..8f3ca8f3a62b 100644 --- a/drivers/irqchip/irq-hip04.c +++ b/drivers/irqchip/irq-hip04.c @@ -307,11 +307,11 @@ static int hip04_irq_domain_map(struct irq_domain *d, unsigned int irq, irq_set_percpu_devid(irq); irq_set_chip_and_handler(irq, &hip04_irq_chip, handle_percpu_devid_irq); - set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN); + irq_set_status_flags(irq, IRQ_NOAUTOEN); } else { irq_set_chip_and_handler(irq, &hip04_irq_chip, handle_fasteoi_irq); - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); + irq_set_probe(irq); } irq_set_chip_data(irq, d->host_data); return 0; diff --git a/drivers/irqchip/irq-i8259.c b/drivers/irqchip/irq-i8259.c index 4836102ba312..e484fd255321 100644 --- a/drivers/irqchip/irq-i8259.c +++ b/drivers/irqchip/irq-i8259.c @@ -352,7 +352,7 @@ void __init init_i8259_irqs(void) __init_i8259_irqs(NULL); } -static void i8259_irq_dispatch(unsigned int __irq, struct irq_desc *desc) +static void i8259_irq_dispatch(struct irq_desc *desc) { struct irq_domain *domain = irq_desc_get_handler_data(desc); int hwirq = i8259_irq(); diff --git a/drivers/irqchip/irq-imgpdc.c b/drivers/irqchip/irq-imgpdc.c index 841604b81004..c02d29c9dc05 100644 --- a/drivers/irqchip/irq-imgpdc.c +++ b/drivers/irqchip/irq-imgpdc.c @@ -218,7 +218,7 @@ static int pdc_irq_set_wake(struct irq_data *data, unsigned int on) return 0; } -static void pdc_intc_perip_isr(unsigned int __irq, struct irq_desc *desc) +static void pdc_intc_perip_isr(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct pdc_intc_priv *priv; @@ -240,7 +240,7 @@ found: generic_handle_irq(irq_no); } -static void pdc_intc_syswake_isr(unsigned int irq, struct irq_desc *desc) +static void pdc_intc_syswake_isr(struct irq_desc *desc) { struct pdc_intc_priv *priv; unsigned int syswake, irq_no; diff --git a/drivers/irqchip/irq-keystone.c b/drivers/irqchip/irq-keystone.c index c1517267b5db..deb89d63a728 100644 --- a/drivers/irqchip/irq-keystone.c +++ b/drivers/irqchip/irq-keystone.c @@ -83,7 +83,7 @@ static void keystone_irq_ack(struct irq_data *d) /* nothing to do here */ } -static void keystone_irq_handler(unsigned __irq, struct irq_desc *desc) +static void keystone_irq_handler(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct keystone_irq_device *kirq = irq_desc_get_handler_data(desc); @@ -127,7 +127,7 @@ static int keystone_irq_map(struct irq_domain *h, unsigned int virq, irq_set_chip_data(virq, kirq); irq_set_chip_and_handler(virq, &kirq->chip, handle_level_irq); - set_irq_flags(virq, IRQF_VALID | IRQF_PROBE); + irq_set_probe(virq); return 0; } diff --git a/drivers/irqchip/irq-metag-ext.c b/drivers/irqchip/irq-metag-ext.c index 5f4c52928d16..8c38b3d92e1c 100644 --- a/drivers/irqchip/irq-metag-ext.c +++ b/drivers/irqchip/irq-metag-ext.c @@ -446,7 +446,7 @@ static int meta_intc_irq_set_type(struct irq_data *data, unsigned int flow_type) * Whilst using TR2 to detect external interrupts is a software convention it is * (hopefully) unlikely to change. */ -static void meta_intc_irq_demux(unsigned int irq, struct irq_desc *desc) +static void meta_intc_irq_demux(struct irq_desc *desc) { struct meta_intc_priv *priv = &meta_intc_priv; irq_hw_number_t hw; diff --git a/drivers/irqchip/irq-metag.c b/drivers/irqchip/irq-metag.c index 3d23ce3edb5c..a5f053bd2f44 100644 --- a/drivers/irqchip/irq-metag.c +++ b/drivers/irqchip/irq-metag.c @@ -220,7 +220,7 @@ static int metag_internal_irq_set_affinity(struct irq_data *data, * occurred. It is this function's job to demux this irq and * figure out exactly which trigger needs servicing. */ -static void metag_internal_irq_demux(unsigned int irq, struct irq_desc *desc) +static void metag_internal_irq_demux(struct irq_desc *desc) { struct metag_internal_irq_priv *priv = irq_desc_get_handler_data(desc); irq_hw_number_t hw; diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 1764bcf8ee6b..af2f16bb8a94 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -546,7 +546,7 @@ static void __gic_irq_dispatch(void) gic_handle_shared_int(false); } -static void gic_irq_dispatch(unsigned int irq, struct irq_desc *desc) +static void gic_irq_dispatch(struct irq_desc *desc) { gic_handle_local_int(true); gic_handle_shared_int(true); diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c index 781ed6e71dbb..013fc9659a84 100644 --- a/drivers/irqchip/irq-mmp.c +++ b/drivers/irqchip/irq-mmp.c @@ -129,7 +129,7 @@ struct irq_chip icu_irq_chip = { .irq_unmask = icu_unmask_irq, }; -static void icu_mux_irq_demux(unsigned int __irq, struct irq_desc *desc) +static void icu_mux_irq_demux(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct irq_domain *domain; @@ -164,7 +164,6 @@ static int mmp_irq_domain_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { irq_set_chip_and_handler(irq, &icu_irq_chip, handle_level_irq); - set_irq_flags(irq, IRQF_VALID); return 0; } @@ -234,7 +233,6 @@ void __init icu_init_irq(void) for (irq = 0; irq < 64; irq++) { icu_mask_irq(irq_get_irq_data(irq)); irq_set_chip_and_handler(irq, &icu_irq_chip, handle_level_irq); - set_irq_flags(irq, IRQF_VALID); } irq_set_default_host(icu_data[0].domain); set_handle_irq(mmp_handle_irq); @@ -337,7 +335,6 @@ void __init mmp2_init_icu(void) irq_set_chip_and_handler(irq, &icu_irq_chip, handle_level_irq); } - set_irq_flags(irq, IRQF_VALID); } irq_set_default_host(icu_data[0].domain); set_handle_irq(mmp2_handle_irq); diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c index 1faf812f3dc8..604df63e2edf 100644 --- a/drivers/irqchip/irq-mxs.c +++ b/drivers/irqchip/irq-mxs.c @@ -84,7 +84,6 @@ static int icoll_irq_domain_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw) { irq_set_chip_and_handler(virq, &mxs_icoll_chip, handle_level_irq); - set_irq_flags(virq, IRQF_VALID); return 0; } diff --git a/drivers/irqchip/irq-orion.c b/drivers/irqchip/irq-orion.c index 5ea999a724b5..be4c5a8c9659 100644 --- a/drivers/irqchip/irq-orion.c +++ b/drivers/irqchip/irq-orion.c @@ -106,7 +106,7 @@ IRQCHIP_DECLARE(orion_intc, "marvell,orion-intc", orion_irq_init); #define ORION_BRIDGE_IRQ_CAUSE 0x00 #define ORION_BRIDGE_IRQ_MASK 0x04 -static void orion_bridge_irq_handler(unsigned int irq, struct irq_desc *desc) +static void orion_bridge_irq_handler(struct irq_desc *desc) { struct irq_domain *d = irq_desc_get_handler_data(desc); diff --git a/drivers/irqchip/irq-renesas-intc-irqpin.c b/drivers/irqchip/irq-renesas-intc-irqpin.c index 0670ab4e3897..9525335723f6 100644 --- a/drivers/irqchip/irq-renesas-intc-irqpin.c +++ b/drivers/irqchip/irq-renesas-intc-irqpin.c @@ -283,6 +283,9 @@ static int intc_irqpin_irq_set_type(struct irq_data *d, unsigned int type) static int intc_irqpin_irq_set_wake(struct irq_data *d, unsigned int on) { struct intc_irqpin_priv *p = irq_data_get_irq_chip_data(d); + int hw_irq = irqd_to_hwirq(d); + + irq_set_irq_wake(p->irq[hw_irq].requested_irq, on); if (!p->clk) return 0; @@ -332,6 +335,12 @@ static irqreturn_t intc_irqpin_shared_irq_handler(int irq, void *dev_id) return status; } +/* + * This lock class tells lockdep that INTC External IRQ Pin irqs are in a + * different category than their parents, so it won't report false recursion. + */ +static struct lock_class_key intc_irqpin_irq_lock_class; + static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) { @@ -342,8 +351,8 @@ static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq, intc_irqpin_dbg(&p->irq[hw], "map"); irq_set_chip_data(virq, h->host_data); + irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class); irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq); - set_irq_flags(virq, IRQF_VALID); /* kill me now */ return 0; } diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c index 2aa3add711a6..35bf97ba4a3d 100644 --- a/drivers/irqchip/irq-renesas-irqc.c +++ b/drivers/irqchip/irq-renesas-irqc.c @@ -121,6 +121,9 @@ static int irqc_irq_set_type(struct irq_data *d, unsigned int type) static int irqc_irq_set_wake(struct irq_data *d, unsigned int on) { struct irqc_priv *p = irq_data_get_irq_chip_data(d); + int hw_irq = irqd_to_hwirq(d); + + irq_set_irq_wake(p->irq[hw_irq].requested_irq, on); if (!p->clk) return 0; @@ -150,6 +153,12 @@ static irqreturn_t irqc_irq_handler(int irq, void *dev_id) return IRQ_NONE; } +/* + * This lock class tells lockdep that IRQC irqs are in a different + * category than their parents, so it won't report false recursion. + */ +static struct lock_class_key irqc_irq_lock_class; + static int irqc_irq_domain_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) { @@ -157,6 +166,7 @@ static int irqc_irq_domain_map(struct irq_domain *h, unsigned int virq, irqc_dbg(&p->irq[hw], "map"); irq_set_chip_data(virq, h->host_data); + irq_set_lockdep_class(virq, &irqc_irq_lock_class); irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq); return 0; } diff --git a/drivers/irqchip/irq-s3c24xx.c b/drivers/irqchip/irq-s3c24xx.c index 506d9f20ca51..7154b011ddd2 100644 --- a/drivers/irqchip/irq-s3c24xx.c +++ b/drivers/irqchip/irq-s3c24xx.c @@ -298,7 +298,7 @@ static struct irq_chip s3c_irq_eint0t4 = { .irq_set_type = s3c_irqext0_type, }; -static void s3c_irq_demux(unsigned int __irq, struct irq_desc *desc) +static void s3c_irq_demux(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct s3c_irq_data *irq_data = irq_desc_get_chip_data(desc); @@ -466,13 +466,11 @@ static int s3c24xx_irq_map(struct irq_domain *h, unsigned int virq, irq_set_chip_data(virq, irq_data); - set_irq_flags(virq, IRQF_VALID); - if (parent_intc && irq_data->type != S3C_IRQTYPE_NONE) { if (irq_data->parent_irq > 31) { pr_err("irq-s3c24xx: parent irq %lu is out of range\n", irq_data->parent_irq); - goto err; + return -EINVAL; } parent_irq_data = &parent_intc->irqs[irq_data->parent_irq]; @@ -485,18 +483,12 @@ static int s3c24xx_irq_map(struct irq_domain *h, unsigned int virq, if (!irqno) { pr_err("irq-s3c24xx: could not find mapping for parent irq %lu\n", irq_data->parent_irq); - goto err; + return -EINVAL; } irq_set_chained_handler(irqno, s3c_irq_demux); } return 0; - -err: - set_irq_flags(virq, 0); - - /* the only error can result from bad mapping data*/ - return -EINVAL; } static const struct irq_domain_ops s3c24xx_irq_ops = { @@ -1174,8 +1166,6 @@ static int s3c24xx_irq_map_of(struct irq_domain *h, unsigned int virq, irq_set_chip_data(virq, irq_data); - set_irq_flags(virq, IRQF_VALID); - return 0; } diff --git a/drivers/irqchip/irq-sun4i.c b/drivers/irqchip/irq-sun4i.c index 4ad3e7c69aa7..0704362f4c82 100644 --- a/drivers/irqchip/irq-sun4i.c +++ b/drivers/irqchip/irq-sun4i.c @@ -83,7 +83,7 @@ static int sun4i_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw) { irq_set_chip_and_handler(virq, &sun4i_irq_chip, handle_fasteoi_irq); - set_irq_flags(virq, IRQF_VALID | IRQF_PROBE); + irq_set_probe(virq); return 0; } diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c index 772a82cacbf7..c143dd58410c 100644 --- a/drivers/irqchip/irq-sunxi-nmi.c +++ b/drivers/irqchip/irq-sunxi-nmi.c @@ -58,7 +58,7 @@ static inline u32 sunxi_sc_nmi_read(struct irq_chip_generic *gc, u32 off) return irq_reg_readl(gc, off); } -static void sunxi_sc_nmi_handle_irq(unsigned int irq, struct irq_desc *desc) +static void sunxi_sc_nmi_handle_irq(struct irq_desc *desc) { struct irq_domain *domain = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); diff --git a/drivers/irqchip/irq-tb10x.c b/drivers/irqchip/irq-tb10x.c index 331829661366..848d782a2a3b 100644 --- a/drivers/irqchip/irq-tb10x.c +++ b/drivers/irqchip/irq-tb10x.c @@ -97,7 +97,7 @@ static int tb10x_irq_set_type(struct irq_data *data, unsigned int flow_type) return IRQ_SET_MASK_OK; } -static void tb10x_irq_cascade(unsigned int __irq, struct irq_desc *desc) +static void tb10x_irq_cascade(struct irq_desc *desc) { struct irq_domain *domain = irq_desc_get_handler_data(desc); unsigned int irq = irq_desc_get_irq(desc); diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c index 16123f688768..598ab3f0e0ac 100644 --- a/drivers/irqchip/irq-versatile-fpga.c +++ b/drivers/irqchip/irq-versatile-fpga.c @@ -65,19 +65,19 @@ static void fpga_irq_unmask(struct irq_data *d) writel(mask, f->base + IRQ_ENABLE_SET); } -static void fpga_irq_handle(unsigned int __irq, struct irq_desc *desc) +static void fpga_irq_handle(struct irq_desc *desc) { struct fpga_irq_data *f = irq_desc_get_handler_data(desc); - unsigned int irq = irq_desc_get_irq(desc); u32 status = readl(f->base + IRQ_STATUS); if (status == 0) { - do_bad_IRQ(irq, desc); + do_bad_IRQ(desc); return; } do { - irq = ffs(status) - 1; + unsigned int irq = ffs(status) - 1; + status &= ~(1 << irq); generic_handle_irq(irq_find_mapping(f->domain, irq)); } while (status); @@ -128,7 +128,7 @@ static int fpga_irqdomain_map(struct irq_domain *d, unsigned int irq, irq_set_chip_data(irq, f); irq_set_chip_and_handler(irq, &f->chip, handle_level_irq); - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); + irq_set_probe(irq); return 0; } diff --git a/drivers/irqchip/irq-vic.c b/drivers/irqchip/irq-vic.c index 03846dff4212..b956dfffe78c 100644 --- a/drivers/irqchip/irq-vic.c +++ b/drivers/irqchip/irq-vic.c @@ -201,7 +201,7 @@ static int vic_irqdomain_map(struct irq_domain *d, unsigned int irq, return -EPERM; irq_set_chip_and_handler(irq, &vic_chip, handle_level_irq); irq_set_chip_data(irq, v->base); - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); + irq_set_probe(irq); return 0; } @@ -225,7 +225,7 @@ static int handle_one_vic(struct vic_device *vic, struct pt_regs *regs) return handled; } -static void vic_handle_irq_cascaded(unsigned int irq, struct irq_desc *desc) +static void vic_handle_irq_cascaded(struct irq_desc *desc) { u32 stat, hwirq; struct irq_chip *host_chip = irq_desc_get_chip(desc); diff --git a/drivers/irqchip/irq-vt8500.c b/drivers/irqchip/irq-vt8500.c index 8371d9978d31..f9af0af21751 100644 --- a/drivers/irqchip/irq-vt8500.c +++ b/drivers/irqchip/irq-vt8500.c @@ -167,7 +167,6 @@ static int vt8500_irq_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) { irq_set_chip_and_handler(virq, &vt8500_irq_chip, handle_level_irq); - set_irq_flags(virq, IRQF_VALID); return 0; } diff --git a/drivers/irqchip/spear-shirq.c b/drivers/irqchip/spear-shirq.c index 4cbd9c5dc1e6..1ccd2abed65f 100644 --- a/drivers/irqchip/spear-shirq.c +++ b/drivers/irqchip/spear-shirq.c @@ -182,7 +182,7 @@ static struct spear_shirq *spear320_shirq_blocks[] = { &spear320_shirq_intrcomm_ras, }; -static void shirq_handler(unsigned __irq, struct irq_desc *desc) +static void shirq_handler(struct irq_desc *desc) { struct spear_shirq *shirq = irq_desc_get_handler_data(desc); u32 pend; @@ -211,7 +211,6 @@ static void __init spear_shirq_register(struct spear_shirq *shirq, for (i = 0; i < shirq->nr_irqs; i++) { irq_set_chip_and_handler(shirq->virq_base + i, shirq->irq_chip, handle_simple_irq); - set_irq_flags(shirq->virq_base + i, IRQF_VALID); irq_set_chip_data(shirq->virq_base + i, shirq); } } diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 70f4255ff291..42990f2d0317 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -170,6 +170,7 @@ config LEDS_SUNFIRE config LEDS_IPAQ_MICRO tristate "LED Support for the Compaq iPAQ h3xxx" + depends on LEDS_CLASS depends on MFD_IPAQ_MICRO help Choose this option if you want to use the notification LED on @@ -229,7 +230,7 @@ config LEDS_LP55XX_COMMON tristate "Common Driver for TI/National LP5521/5523/55231/5562/8501" depends on LEDS_LP5521 || LEDS_LP5523 || LEDS_LP5562 || LEDS_LP8501 select FW_LOADER - select FW_LOADER_USER_HELPER_FALLBACK + select FW_LOADER_USER_HELPER help This option supports common operations for LP5521/5523/55231/5562/8501 devices. diff --git a/drivers/leds/leds-aat1290.c b/drivers/leds/leds-aat1290.c index fd7c25fd29c1..ac77d36b630c 100644 --- a/drivers/leds/leds-aat1290.c +++ b/drivers/leds/leds-aat1290.c @@ -331,7 +331,7 @@ static void aat1290_led_validate_mm_current(struct aat1290_led *led, cfg->max_brightness = b + 1; } -int init_mm_current_scale(struct aat1290_led *led, +static int init_mm_current_scale(struct aat1290_led *led, struct aat1290_led_config_data *cfg) { int max_mm_current_percent[] = { 20, 22, 25, 28, 32, 36, 40, 45, 50, 56, @@ -559,6 +559,7 @@ static const struct of_device_id aat1290_led_dt_match[] = { { .compatible = "skyworks,aat1290" }, {}, }; +MODULE_DEVICE_TABLE(of, aat1290_led_dt_match); static struct platform_driver aat1290_led_driver = { .probe = aat1290_led_probe, diff --git a/drivers/leds/leds-bcm6328.c b/drivers/leds/leds-bcm6328.c index 986fe1e28f84..1793727bc9ae 100644 --- a/drivers/leds/leds-bcm6328.c +++ b/drivers/leds/leds-bcm6328.c @@ -395,6 +395,7 @@ static const struct of_device_id bcm6328_leds_of_match[] = { { .compatible = "brcm,bcm6328-leds", }, { }, }; +MODULE_DEVICE_TABLE(of, bcm6328_leds_of_match); static struct platform_driver bcm6328_leds_driver = { .probe = bcm6328_leds_probe, diff --git a/drivers/leds/leds-bcm6358.c b/drivers/leds/leds-bcm6358.c index 21f96930b3be..7ea3526702e0 100644 --- a/drivers/leds/leds-bcm6358.c +++ b/drivers/leds/leds-bcm6358.c @@ -226,6 +226,7 @@ static const struct of_device_id bcm6358_leds_of_match[] = { { .compatible = "brcm,bcm6358-leds", }, { }, }; +MODULE_DEVICE_TABLE(of, bcm6358_leds_of_match); static struct platform_driver bcm6358_leds_driver = { .probe = bcm6358_leds_probe, diff --git a/drivers/leds/leds-ktd2692.c b/drivers/leds/leds-ktd2692.c index 2ae8c4d17ff8..feca07be85f5 100644 --- a/drivers/leds/leds-ktd2692.c +++ b/drivers/leds/leds-ktd2692.c @@ -426,6 +426,7 @@ static const struct of_device_id ktd2692_match[] = { { .compatible = "kinetic,ktd2692", }, { /* sentinel */ }, }; +MODULE_DEVICE_TABLE(of, ktd2692_match); static struct platform_driver ktd2692_driver = { .driver = { diff --git a/drivers/leds/leds-max77693.c b/drivers/leds/leds-max77693.c index df348a06d8c7..afbb1409b2e2 100644 --- a/drivers/leds/leds-max77693.c +++ b/drivers/leds/leds-max77693.c @@ -1080,6 +1080,7 @@ static const struct of_device_id max77693_led_dt_match[] = { { .compatible = "maxim,max77693-led" }, {}, }; +MODULE_DEVICE_TABLE(of, max77693_led_dt_match); static struct platform_driver max77693_led_driver = { .probe = max77693_led_probe, diff --git a/drivers/leds/leds-ns2.c b/drivers/leds/leds-ns2.c index b33514d9f427..a95a61220169 100644 --- a/drivers/leds/leds-ns2.c +++ b/drivers/leds/leds-ns2.c @@ -337,6 +337,7 @@ static const struct of_device_id of_ns2_leds_match[] = { { .compatible = "lacie,ns2-leds", }, {}, }; +MODULE_DEVICE_TABLE(of, of_ns2_leds_match); #endif /* CONFIG_OF_GPIO */ struct ns2_led_priv { diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c index 4b54128bc78e..a726f01e3b02 100644 --- a/drivers/mfd/asic3.c +++ b/drivers/mfd/asic3.c @@ -138,7 +138,7 @@ static void asic3_irq_flip_edge(struct asic3 *asic, spin_unlock_irqrestore(&asic->lock, flags); } -static void asic3_irq_demux(unsigned int irq, struct irq_desc *desc) +static void asic3_irq_demux(struct irq_desc *desc) { struct asic3 *asic = irq_desc_get_handler_data(desc); struct irq_data *data = irq_desc_get_irq_data(desc); diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c index a76eb6ef47a0..b279205659a4 100644 --- a/drivers/mfd/ezx-pcap.c +++ b/drivers/mfd/ezx-pcap.c @@ -205,7 +205,7 @@ static void pcap_isr_work(struct work_struct *work) } while (gpio_get_value(pdata->gpio)); } -static void pcap_irq_handler(unsigned int irq, struct irq_desc *desc) +static void pcap_irq_handler(struct irq_desc *desc) { struct pcap_chip *pcap = irq_desc_get_handler_data(desc); diff --git a/drivers/mfd/htc-egpio.c b/drivers/mfd/htc-egpio.c index 9131cdcdc64a..6ccaf90d98fd 100644 --- a/drivers/mfd/htc-egpio.c +++ b/drivers/mfd/htc-egpio.c @@ -98,7 +98,7 @@ static struct irq_chip egpio_muxed_chip = { .irq_unmask = egpio_unmask, }; -static void egpio_handler(unsigned int irq, struct irq_desc *desc) +static void egpio_handler(struct irq_desc *desc) { struct egpio_info *ei = irq_desc_get_handler_data(desc); int irqpin; diff --git a/drivers/mfd/jz4740-adc.c b/drivers/mfd/jz4740-adc.c index 5bb49f08955d..798e44306382 100644 --- a/drivers/mfd/jz4740-adc.c +++ b/drivers/mfd/jz4740-adc.c @@ -65,7 +65,7 @@ struct jz4740_adc { spinlock_t lock; }; -static void jz4740_adc_irq_demux(unsigned int irq, struct irq_desc *desc) +static void jz4740_adc_irq_demux(struct irq_desc *desc) { struct irq_chip_generic *gc = irq_desc_get_handler_data(desc); uint8_t status; diff --git a/drivers/mfd/pm8921-core.c b/drivers/mfd/pm8921-core.c index 59502d02cd15..1b7ec0870c2a 100644 --- a/drivers/mfd/pm8921-core.c +++ b/drivers/mfd/pm8921-core.c @@ -156,7 +156,7 @@ static int pm8xxx_irq_master_handler(struct pm_irq_chip *chip, int master) return ret; } -static void pm8xxx_irq_handler(unsigned int irq, struct irq_desc *desc) +static void pm8xxx_irq_handler(struct irq_desc *desc) { struct pm_irq_chip *chip = irq_desc_get_handler_data(desc); struct irq_chip *irq_chip = irq_desc_get_chip(desc); diff --git a/drivers/mfd/t7l66xb.c b/drivers/mfd/t7l66xb.c index 16fc1adc4fa3..94bd89cb1f06 100644 --- a/drivers/mfd/t7l66xb.c +++ b/drivers/mfd/t7l66xb.c @@ -185,7 +185,7 @@ static struct mfd_cell t7l66xb_cells[] = { /*--------------------------------------------------------------------------*/ /* Handle the T7L66XB interrupt mux */ -static void t7l66xb_irq(unsigned int irq, struct irq_desc *desc) +static void t7l66xb_irq(struct irq_desc *desc) { struct t7l66xb *t7l66xb = irq_desc_get_handler_data(desc); unsigned int isr; diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c index 775b9aca871a..8c84a513016b 100644 --- a/drivers/mfd/tc6393xb.c +++ b/drivers/mfd/tc6393xb.c @@ -522,8 +522,7 @@ static int tc6393xb_register_gpio(struct tc6393xb *tc6393xb, int gpio_base) /*--------------------------------------------------------------------------*/ -static void -tc6393xb_irq(unsigned int irq, struct irq_desc *desc) +static void tc6393xb_irq(struct irq_desc *desc) { struct tc6393xb *tc6393xb = irq_desc_get_handler_data(desc); unsigned int isr; diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c index 9a2302129711..f691d7ecad52 100644 --- a/drivers/mfd/ucb1x00-core.c +++ b/drivers/mfd/ucb1x00-core.c @@ -282,7 +282,7 @@ void ucb1x00_adc_disable(struct ucb1x00 *ucb) * SIBCLK to talk to the chip. We leave the clock running until * we have finished processing all interrupts from the chip. */ -static void ucb1x00_irq(unsigned int __irq, struct irq_desc *desc) +static void ucb1x00_irq(struct irq_desc *desc) { struct ucb1x00 *ucb = irq_desc_get_handler_data(desc); unsigned int isr, i; diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile index 6f484dfe78f9..6982f603fadc 100644 --- a/drivers/misc/cxl/Makefile +++ b/drivers/misc/cxl/Makefile @@ -1,4 +1,4 @@ -ccflags-y := -Werror +ccflags-y := -Werror -Wno-unused-const-variable cxl-y += main.o file.o irq.o fault.o native.o cxl-y += context.o sysfs.o debugfs.o pci.o trace.o diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 02c85160bfe9..a5e977192b61 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1249,8 +1249,6 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) int slice; int rc; - pci_dev_get(dev); - if (cxl_verbose) dump_cxl_config_space(dev); diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index 6dd16a6d153f..94b520896b18 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -48,6 +48,12 @@ static bool cxl_pci_enable_device_hook(struct pci_dev *dev) phb = pci_bus_to_host(dev->bus); afu = (struct cxl_afu *)phb->private_data; + + if (!cxl_adapter_link_ok(afu->adapter)) { + dev_warn(&dev->dev, "%s: Device link is down, refusing to enable AFU\n", __func__); + return false; + } + set_dma_ops(&dev->dev, &dma_direct_ops); set_dma_offset(&dev->dev, PAGE_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 4402a1e48c9b..4c7de8c44659 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -1047,13 +1047,15 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) /* If we used up all the quota - we're probably not done yet... */ if (done == budget) { - int cpu_curr; const struct cpumask *aff; + struct irq_data *idata; + int cpu_curr; INC_PERF_COUNTER(priv->pstats.napi_quota); cpu_curr = smp_processor_id(); - aff = irq_desc_get_irq_data(cq->irq_desc)->affinity; + idata = irq_desc_get_irq_data(cq->irq_desc); + aff = irq_data_get_affinity_mask(idata); if (likely(cpumask_test_cpu(cpu_curr, aff))) return budget; diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 59ad54a63d9f..cb477518dd0e 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -128,13 +128,13 @@ static ssize_t namespace_store(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - nvdimm_bus_lock(dev); device_lock(dev); + nvdimm_bus_lock(dev); rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len); dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, rc, buf, buf[len - 1] == '\n' ? "" : "\n"); - device_unlock(dev); nvdimm_bus_unlock(dev); + device_unlock(dev); return rc; } diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 3fd7d0d81a47..71805a1aa0f3 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -148,13 +148,13 @@ static ssize_t namespace_store(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn(dev); ssize_t rc; - nvdimm_bus_lock(dev); device_lock(dev); + nvdimm_bus_lock(dev); rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, rc, buf, buf[len - 1] == '\n' ? "" : "\n"); - device_unlock(dev); nvdimm_bus_unlock(dev); + device_unlock(dev); return rc; } diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index b9525385c0dc..0ba6a978f227 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -92,6 +92,8 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, struct pmem_device *pmem = bdev->bd_disk->private_data; pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector); + if (rw & WRITE) + wmb_pmem(); page_endio(page, rw & WRITE, 0); return 0; diff --git a/drivers/pci/host/pci-keystone.c b/drivers/pci/host/pci-keystone.c index 81253e70b1c5..0aa81bd3de12 100644 --- a/drivers/pci/host/pci-keystone.c +++ b/drivers/pci/host/pci-keystone.c @@ -110,7 +110,7 @@ static int ks_pcie_establish_link(struct keystone_pcie *ks_pcie) return -EINVAL; } -static void ks_pcie_msi_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void ks_pcie_msi_irq_handler(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct keystone_pcie *ks_pcie = irq_desc_get_handler_data(desc); @@ -138,8 +138,7 @@ static void ks_pcie_msi_irq_handler(unsigned int __irq, struct irq_desc *desc) * Traverse through pending legacy interrupts and invoke handler for each. Also * takes care of interrupt controller level mask/ack operation. */ -static void ks_pcie_legacy_irq_handler(unsigned int __irq, - struct irq_desc *desc) +static void ks_pcie_legacy_irq_handler(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct keystone_pcie *ks_pcie = irq_desc_get_handler_data(desc); diff --git a/drivers/pci/host/pci-xgene-msi.c b/drivers/pci/host/pci-xgene-msi.c index 996327cfa1e1..e491681daf22 100644 --- a/drivers/pci/host/pci-xgene-msi.c +++ b/drivers/pci/host/pci-xgene-msi.c @@ -295,7 +295,7 @@ static int xgene_msi_init_allocator(struct xgene_msi *xgene_msi) return 0; } -static void xgene_msi_isr(unsigned int irq, struct irq_desc *desc) +static void xgene_msi_isr(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct xgene_msi_group *msi_groups; diff --git a/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c b/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c index 7d9482bf8252..1ca783098e47 100644 --- a/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c +++ b/drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c @@ -143,7 +143,7 @@ static inline bool cygnus_get_bit(struct cygnus_gpio *chip, unsigned int reg, return !!(readl(chip->base + offset) & BIT(shift)); } -static void cygnus_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) +static void cygnus_gpio_irq_handler(struct irq_desc *desc) { struct gpio_chip *gc = irq_desc_get_handler_data(desc); struct cygnus_gpio *chip = to_cygnus_gpio(gc); diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 69723e07036b..9638a00c67c2 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -349,6 +349,9 @@ static bool pinctrl_ready_for_gpio_range(unsigned gpio) struct pinctrl_gpio_range *range = NULL; struct gpio_chip *chip = gpio_to_chip(gpio); + if (WARN(!chip, "no gpio_chip for gpio%i?", gpio)) + return false; + mutex_lock(&pinctrldev_list_mutex); /* Loop over the pin controllers */ diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index dac4865f3203..f79ea430f651 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -425,7 +425,7 @@ static void byt_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) } } -static void byt_gpio_irq_handler(unsigned irq, struct irq_desc *desc) +static void byt_gpio_irq_handler(struct irq_desc *desc) { struct irq_data *data = irq_desc_get_irq_data(desc); struct byt_gpio *vg = to_byt_gpio(irq_desc_get_handler_data(desc)); diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 2d5d3ddc36e5..270c127e03ea 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1414,7 +1414,7 @@ static struct irq_chip chv_gpio_irqchip = { .flags = IRQCHIP_SKIP_SET_WAKE, }; -static void chv_gpio_irq_handler(unsigned irq, struct irq_desc *desc) +static void chv_gpio_irq_handler(struct irq_desc *desc) { struct gpio_chip *gc = irq_desc_get_handler_data(desc); struct chv_pinctrl *pctrl = gpiochip_to_pinctrl(gc); diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index bb377c110541..54848b8decef 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -836,7 +836,7 @@ static void intel_gpio_community_irq_handler(struct gpio_chip *gc, } } -static void intel_gpio_irq_handler(unsigned irq, struct irq_desc *desc) +static void intel_gpio_irq_handler(struct irq_desc *desc) { struct gpio_chip *gc = irq_desc_get_handler_data(desc); struct intel_pinctrl *pctrl = gpiochip_to_pinctrl(gc); diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index 7726c6caaf83..1b22f96ba839 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -1190,7 +1190,7 @@ mtk_eint_debounce_process(struct mtk_pinctrl *pctl, int index) } } -static void mtk_eint_irq_handler(unsigned irq, struct irq_desc *desc) +static void mtk_eint_irq_handler(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct mtk_pinctrl *pctl = irq_desc_get_handler_data(desc); diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c index 352ede13a9e9..96cf03908e93 100644 --- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c +++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c @@ -860,7 +860,7 @@ static void __nmk_gpio_irq_handler(struct irq_desc *desc, u32 status) chained_irq_exit(host_chip, desc); } -static void nmk_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) +static void nmk_gpio_irq_handler(struct irq_desc *desc) { struct gpio_chip *chip = irq_desc_get_handler_data(desc); struct nmk_gpio_chip *nmk_chip = container_of(chip, struct nmk_gpio_chip, chip); @@ -873,7 +873,7 @@ static void nmk_gpio_irq_handler(unsigned int irq, struct irq_desc *desc) __nmk_gpio_irq_handler(desc, status); } -static void nmk_gpio_latent_irq_handler(unsigned int irq, struct irq_desc *desc) +static void nmk_gpio_latent_irq_handler(struct irq_desc *desc) { struct gpio_chip *chip = irq_desc_get_handler_data(desc); struct nmk_gpio_chip *nmk_chip = container_of(chip, struct nmk_gpio_chip, chip); diff --git a/drivers/pinctrl/pinctrl-adi2.c b/drivers/pinctrl/pinctrl-adi2.c index a5976ebc4482..f6be68518c87 100644 --- a/drivers/pinctrl/pinctrl-adi2.c +++ b/drivers/pinctrl/pinctrl-adi2.c @@ -530,8 +530,7 @@ static inline void preflow_handler(struct irq_desc *desc) static inline void preflow_handler(struct irq_desc *desc) { } #endif -static void adi_gpio_handle_pint_irq(unsigned int inta_irq, - struct irq_desc *desc) +static void adi_gpio_handle_pint_irq(struct irq_desc *desc) { u32 request; u32 level_mask, hwirq; diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 5e86bb8ca80e..3318f1d6193c 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -492,15 +492,15 @@ static struct irq_chip amd_gpio_irqchip = { .irq_set_type = amd_gpio_irq_set_type, }; -static void amd_gpio_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void amd_gpio_irq_handler(struct irq_desc *desc) { - unsigned int irq = irq_desc_get_irq(desc); u32 i; u32 off; u32 reg; u32 pin_reg; u64 reg64; int handled = 0; + unsigned int irq; unsigned long flags; struct irq_chip *chip = irq_desc_get_chip(desc); struct gpio_chip *gc = irq_desc_get_handler_data(desc); @@ -541,7 +541,7 @@ static void amd_gpio_irq_handler(unsigned int __irq, struct irq_desc *desc) } if (handled == 0) - handle_bad_irq(irq, desc); + handle_bad_irq(desc); spin_lock_irqsave(&gpio_dev->lock, flags); reg = readl(gpio_dev->base + WAKE_INT_MASTER_REG); diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index bae0012ee356..b0fde0f385e6 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -1585,7 +1585,7 @@ static struct irq_chip gpio_irqchip = { .irq_set_wake = gpio_irq_set_wake, }; -static void gpio_irq_handler(unsigned irq, struct irq_desc *desc) +static void gpio_irq_handler(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct gpio_chip *gpio_chip = irq_desc_get_handler_data(desc); diff --git a/drivers/pinctrl/pinctrl-coh901.c b/drivers/pinctrl/pinctrl-coh901.c index 3731cc67a88b..9c9b88934bcc 100644 --- a/drivers/pinctrl/pinctrl-coh901.c +++ b/drivers/pinctrl/pinctrl-coh901.c @@ -519,7 +519,7 @@ static struct irq_chip u300_gpio_irqchip = { .irq_set_type = u300_gpio_irq_type, }; -static void u300_gpio_irq_handler(unsigned __irq, struct irq_desc *desc) +static void u300_gpio_irq_handler(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct irq_chip *parent_chip = irq_desc_get_chip(desc); diff --git a/drivers/pinctrl/pinctrl-digicolor.c b/drivers/pinctrl/pinctrl-digicolor.c index 461fffc4c62a..11f8b835d3b6 100644 --- a/drivers/pinctrl/pinctrl-digicolor.c +++ b/drivers/pinctrl/pinctrl-digicolor.c @@ -337,9 +337,9 @@ static int dc_pinctrl_probe(struct platform_device *pdev) pmap->dev = &pdev->dev; pmap->pctl = pinctrl_register(pctl_desc, &pdev->dev, pmap); - if (!pmap->pctl) { + if (IS_ERR(pmap->pctl)) { dev_err(&pdev->dev, "pinctrl driver registration failed\n"); - return -EINVAL; + return PTR_ERR(pmap->pctl); } ret = dc_gpiochip_add(pmap, pdev->dev.of_node); diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c index 3dc2ae15f3a1..952b1c623887 100644 --- a/drivers/pinctrl/pinctrl-pistachio.c +++ b/drivers/pinctrl/pinctrl-pistachio.c @@ -1303,20 +1303,18 @@ static int pistachio_gpio_irq_set_type(struct irq_data *data, unsigned int type) } if (type & IRQ_TYPE_LEVEL_MASK) - __irq_set_handler_locked(data->irq, handle_level_irq); + irq_set_handler_locked(data, handle_level_irq); else - __irq_set_handler_locked(data->irq, handle_edge_irq); + irq_set_handler_locked(data, handle_edge_irq); return 0; } -static void pistachio_gpio_irq_handler(unsigned int __irq, - struct irq_desc *desc) +static void pistachio_gpio_irq_handler(struct irq_desc *desc) { - unsigned int irq = irq_desc_get_irq(desc); struct gpio_chip *gc = irq_desc_get_handler_data(desc); struct pistachio_gpio_bank *bank = gc_to_bank(gc); - struct irq_chip *chip = irq_get_chip(irq); + struct irq_chip *chip = irq_desc_get_chip(desc); unsigned long pending; unsigned int pin; diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index c5246c05f70c..88bb707e107a 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -1475,7 +1475,7 @@ static const struct gpio_chip rockchip_gpiolib_chip = { * Interrupt handling */ -static void rockchip_irq_demux(unsigned int __irq, struct irq_desc *desc) +static void rockchip_irq_demux(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct rockchip_pin_bank *bank = irq_desc_get_handler_data(desc); diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index bf548c2a7a9d..ef04b962c3d5 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -1679,7 +1679,7 @@ static irqreturn_t pcs_irq_handler(int irq, void *d) * Use this if you have a separate interrupt for each * pinctrl-single instance. */ -static void pcs_irq_chain_handler(unsigned int irq, struct irq_desc *desc) +static void pcs_irq_chain_handler(struct irq_desc *desc) { struct pcs_soc_data *pcs_soc = irq_desc_get_handler_data(desc); struct irq_chip *chip; diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c index f8338d2e6b6b..389526e704fb 100644 --- a/drivers/pinctrl/pinctrl-st.c +++ b/drivers/pinctrl/pinctrl-st.c @@ -1460,7 +1460,7 @@ static void __gpio_irq_handler(struct st_gpio_bank *bank) } } -static void st_gpio_irq_handler(unsigned irq, struct irq_desc *desc) +static void st_gpio_irq_handler(struct irq_desc *desc) { /* interrupt dedicated per bank */ struct irq_chip *chip = irq_desc_get_chip(desc); @@ -1472,7 +1472,7 @@ static void st_gpio_irq_handler(unsigned irq, struct irq_desc *desc) chained_irq_exit(chip, desc); } -static void st_gpio_irqmux_handler(unsigned irq, struct irq_desc *desc) +static void st_gpio_irqmux_handler(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct st_pinctrl *info = irq_desc_get_handler_data(desc); diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c index 67e08cb315c4..29984b36926a 100644 --- a/drivers/pinctrl/pinmux.c +++ b/drivers/pinctrl/pinmux.c @@ -313,8 +313,7 @@ static int pinmux_func_name_to_selector(struct pinctrl_dev *pctldev, /* See if this pctldev has this function */ while (selector < nfuncs) { - const char *fname = ops->get_function_name(pctldev, - selector); + const char *fname = ops->get_function_name(pctldev, selector); if (!strcmp(function, fname)) return selector; diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index 492cdd51dc5c..a0c7407c1cac 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -765,9 +765,8 @@ static struct irq_chip msm_gpio_irq_chip = { .irq_set_wake = msm_gpio_irq_set_wake, }; -static void msm_gpio_irq_handler(unsigned int __irq, struct irq_desc *desc) +static void msm_gpio_irq_handler(struct irq_desc *desc) { - unsigned int irq = irq_desc_get_irq(desc); struct gpio_chip *gc = irq_desc_get_handler_data(desc); const struct msm_pingroup *g; struct msm_pinctrl *pctrl = to_msm_pinctrl(gc); @@ -795,7 +794,7 @@ static void msm_gpio_irq_handler(unsigned int __irq, struct irq_desc *desc) /* No interrupts were flagged */ if (handled == 0) - handle_bad_irq(irq, desc); + handle_bad_irq(desc); chained_irq_exit(chip, desc); } diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c index c978b311031b..e1a3721bc8e5 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c @@ -723,9 +723,9 @@ static int pm8xxx_gpio_probe(struct platform_device *pdev) #endif pctrl->pctrl = pinctrl_register(&pctrl->desc, &pdev->dev, pctrl); - if (!pctrl->pctrl) { + if (IS_ERR(pctrl->pctrl)) { dev_err(&pdev->dev, "couldn't register pm8xxx gpio driver\n"); - return -ENODEV; + return PTR_ERR(pctrl->pctrl); } pctrl->chip = pm8xxx_gpio_template; diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c index 2d1b69f171be..6652b8d7f707 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c @@ -814,9 +814,9 @@ static int pm8xxx_mpp_probe(struct platform_device *pdev) #endif pctrl->pctrl = pinctrl_register(&pctrl->desc, &pdev->dev, pctrl); - if (!pctrl->pctrl) { + if (IS_ERR(pctrl->pctrl)) { dev_err(&pdev->dev, "couldn't register pm8xxx mpp driver\n"); - return -ENODEV; + return PTR_ERR(pctrl->pctrl); } pctrl->chip = pm8xxx_mpp_template; diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c index 5f45caaef46d..71ccf6a90b22 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos.c @@ -419,7 +419,7 @@ static const struct of_device_id exynos_wkup_irq_ids[] = { }; /* interrupt handler for wakeup interrupts 0..15 */ -static void exynos_irq_eint0_15(unsigned int irq, struct irq_desc *desc) +static void exynos_irq_eint0_15(struct irq_desc *desc) { struct exynos_weint_data *eintd = irq_desc_get_handler_data(desc); struct samsung_pin_bank *bank = eintd->bank; @@ -451,7 +451,7 @@ static inline void exynos_irq_demux_eint(unsigned long pend, } /* interrupt handler for wakeup interrupt 16 */ -static void exynos_irq_demux_eint16_31(unsigned int irq, struct irq_desc *desc) +static void exynos_irq_demux_eint16_31(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct exynos_muxed_weint_data *eintd = irq_desc_get_handler_data(desc); diff --git a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c index 019844d479bb..3d92f827da7a 100644 --- a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c +++ b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c @@ -240,7 +240,7 @@ static struct irq_chip s3c2410_eint0_3_chip = { .irq_set_type = s3c24xx_eint_type, }; -static void s3c2410_demux_eint0_3(unsigned int irq, struct irq_desc *desc) +static void s3c2410_demux_eint0_3(struct irq_desc *desc) { struct irq_data *data = irq_desc_get_irq_data(desc); struct s3c24xx_eint_data *eint_data = irq_desc_get_handler_data(desc); @@ -295,7 +295,7 @@ static struct irq_chip s3c2412_eint0_3_chip = { .irq_set_type = s3c24xx_eint_type, }; -static void s3c2412_demux_eint0_3(unsigned int irq, struct irq_desc *desc) +static void s3c2412_demux_eint0_3(struct irq_desc *desc) { struct s3c24xx_eint_data *eint_data = irq_desc_get_handler_data(desc); struct irq_data *data = irq_desc_get_irq_data(desc); @@ -361,7 +361,7 @@ static inline void s3c24xx_demux_eint(struct irq_desc *desc, u32 offset, u32 range) { struct s3c24xx_eint_data *data = irq_desc_get_handler_data(desc); - struct irq_chip *chip = irq_desc_get_irq_chip(desc); + struct irq_chip *chip = irq_desc_get_chip(desc); struct samsung_pinctrl_drv_data *d = data->drvdata; unsigned int pend, mask; @@ -388,12 +388,12 @@ static inline void s3c24xx_demux_eint(struct irq_desc *desc, chained_irq_exit(chip, desc); } -static void s3c24xx_demux_eint4_7(unsigned int irq, struct irq_desc *desc) +static void s3c24xx_demux_eint4_7(struct irq_desc *desc) { s3c24xx_demux_eint(desc, 0, 0xf0); } -static void s3c24xx_demux_eint8_23(unsigned int irq, struct irq_desc *desc) +static void s3c24xx_demux_eint8_23(struct irq_desc *desc) { s3c24xx_demux_eint(desc, 8, 0xffff00); } diff --git a/drivers/pinctrl/samsung/pinctrl-s3c64xx.c b/drivers/pinctrl/samsung/pinctrl-s3c64xx.c index f5ea40a69711..43407ab248f5 100644 --- a/drivers/pinctrl/samsung/pinctrl-s3c64xx.c +++ b/drivers/pinctrl/samsung/pinctrl-s3c64xx.c @@ -407,7 +407,7 @@ static const struct irq_domain_ops s3c64xx_gpio_irqd_ops = { .xlate = irq_domain_xlate_twocell, }; -static void s3c64xx_eint_gpio_irq(unsigned int irq, struct irq_desc *desc) +static void s3c64xx_eint_gpio_irq(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct s3c64xx_eint_gpio_data *data = irq_desc_get_handler_data(desc); @@ -631,22 +631,22 @@ static inline void s3c64xx_irq_demux_eint(struct irq_desc *desc, u32 range) chained_irq_exit(chip, desc); } -static void s3c64xx_demux_eint0_3(unsigned int irq, struct irq_desc *desc) +static void s3c64xx_demux_eint0_3(struct irq_desc *desc) { s3c64xx_irq_demux_eint(desc, 0xf); } -static void s3c64xx_demux_eint4_11(unsigned int irq, struct irq_desc *desc) +static void s3c64xx_demux_eint4_11(struct irq_desc *desc) { s3c64xx_irq_demux_eint(desc, 0xff0); } -static void s3c64xx_demux_eint12_19(unsigned int irq, struct irq_desc *desc) +static void s3c64xx_demux_eint12_19(struct irq_desc *desc) { s3c64xx_irq_demux_eint(desc, 0xff000); } -static void s3c64xx_demux_eint20_27(unsigned int irq, struct irq_desc *desc) +static void s3c64xx_demux_eint20_27(struct irq_desc *desc) { s3c64xx_irq_demux_eint(desc, 0xff00000); } diff --git a/drivers/pinctrl/sirf/pinctrl-atlas7.c b/drivers/pinctrl/sirf/pinctrl-atlas7.c index 9df0c5f25824..0d24d9e4b70c 100644 --- a/drivers/pinctrl/sirf/pinctrl-atlas7.c +++ b/drivers/pinctrl/sirf/pinctrl-atlas7.c @@ -4489,7 +4489,7 @@ static struct irq_chip atlas7_gpio_irq_chip = { .irq_set_type = atlas7_gpio_irq_type, }; -static void atlas7_gpio_handle_irq(unsigned int __irq, struct irq_desc *desc) +static void atlas7_gpio_handle_irq(struct irq_desc *desc) { struct gpio_chip *gc = irq_desc_get_handler_data(desc); struct atlas7_gpio_chip *a7gc = to_atlas7_gpio(gc); @@ -4512,7 +4512,7 @@ static void atlas7_gpio_handle_irq(unsigned int __irq, struct irq_desc *desc) if (!status) { pr_warn("%s: gpio [%s] status %#x no interrupt is flaged\n", __func__, gc->label, status); - handle_bad_irq(irq, desc); + handle_bad_irq(desc); return; } diff --git a/drivers/pinctrl/sirf/pinctrl-sirf.c b/drivers/pinctrl/sirf/pinctrl-sirf.c index f8bd9fb52033..2a8d69725de8 100644 --- a/drivers/pinctrl/sirf/pinctrl-sirf.c +++ b/drivers/pinctrl/sirf/pinctrl-sirf.c @@ -545,7 +545,7 @@ static struct irq_chip sirfsoc_irq_chip = { .irq_set_type = sirfsoc_gpio_irq_type, }; -static void sirfsoc_gpio_handle_irq(unsigned int __irq, struct irq_desc *desc) +static void sirfsoc_gpio_handle_irq(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct gpio_chip *gc = irq_desc_get_handler_data(desc); @@ -570,7 +570,7 @@ static void sirfsoc_gpio_handle_irq(unsigned int __irq, struct irq_desc *desc) printk(KERN_WARNING "%s: gpio id %d status %#x no interrupt is flagged\n", __func__, bank->id, status); - handle_bad_irq(irq, desc); + handle_bad_irq(desc); return; } diff --git a/drivers/pinctrl/spear/pinctrl-plgpio.c b/drivers/pinctrl/spear/pinctrl-plgpio.c index ae8f29fb5536..1f0af250dbb5 100644 --- a/drivers/pinctrl/spear/pinctrl-plgpio.c +++ b/drivers/pinctrl/spear/pinctrl-plgpio.c @@ -356,7 +356,7 @@ static struct irq_chip plgpio_irqchip = { .irq_set_type = plgpio_irq_set_type, }; -static void plgpio_irq_handler(unsigned irq, struct irq_desc *desc) +static void plgpio_irq_handler(struct irq_desc *desc) { struct gpio_chip *gc = irq_desc_get_handler_data(desc); struct plgpio *plgpio = container_of(gc, struct plgpio, chip); diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index fb4669c0ce0e..38e0c7bdd2ac 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -617,13 +617,11 @@ static int sunxi_pinctrl_irq_set_type(struct irq_data *d, unsigned int type) spin_lock_irqsave(&pctl->lock, flags); if (type & IRQ_TYPE_LEVEL_MASK) - __irq_set_chip_handler_name_locked(d->irq, - &sunxi_pinctrl_level_irq_chip, - handle_fasteoi_irq, NULL); + irq_set_chip_handler_name_locked(d, &sunxi_pinctrl_level_irq_chip, + handle_fasteoi_irq, NULL); else - __irq_set_chip_handler_name_locked(d->irq, - &sunxi_pinctrl_edge_irq_chip, - handle_edge_irq, NULL); + irq_set_chip_handler_name_locked(d, &sunxi_pinctrl_edge_irq_chip, + handle_edge_irq, NULL); regval = readl(pctl->membase + reg); regval &= ~(IRQ_CFG_IRQ_MASK << index); @@ -742,7 +740,7 @@ static struct irq_domain_ops sunxi_pinctrl_irq_domain_ops = { .xlate = sunxi_pinctrl_irq_of_xlate, }; -static void sunxi_pinctrl_irq_handler(unsigned __irq, struct irq_desc *desc) +static void sunxi_pinctrl_irq_handler(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct irq_chip *chip = irq_desc_get_chip(desc); diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index abdaed34c728..131fee2b093e 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -126,6 +126,24 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_asus_wapf4, }, + { + .callback = dmi_matched, + .ident = "ASUSTeK COMPUTER INC. X456UA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X456UA"), + }, + .driver_data = &quirk_asus_wapf4, + }, + { + .callback = dmi_matched, + .ident = "ASUSTeK COMPUTER INC. X456UF", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X456UF"), + }, + .driver_data = &quirk_asus_wapf4, + }, { .callback = dmi_matched, .ident = "ASUSTeK COMPUTER INC. X501U", diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 06697315a088..fb4dd7b3ee71 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -54,8 +54,9 @@ MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4"); #define HPWMI_HARDWARE_QUERY 0x4 #define HPWMI_WIRELESS_QUERY 0x5 #define HPWMI_BIOS_QUERY 0x9 +#define HPWMI_FEATURE_QUERY 0xb #define HPWMI_HOTKEY_QUERY 0xc -#define HPWMI_FEATURE_QUERY 0xd +#define HPWMI_FEATURE2_QUERY 0xd #define HPWMI_WIRELESS2_QUERY 0x1b #define HPWMI_POSTCODEERROR_QUERY 0x2a @@ -295,25 +296,33 @@ static int hp_wmi_tablet_state(void) return (state & 0x4) ? 1 : 0; } -static int __init hp_wmi_bios_2009_later(void) +static int __init hp_wmi_bios_2008_later(void) { int state = 0; int ret = hp_wmi_perform_query(HPWMI_FEATURE_QUERY, 0, &state, sizeof(state), sizeof(state)); - if (ret) - return ret; + if (!ret) + return 1; - return (state & 0x10) ? 1 : 0; + return (ret == HPWMI_RET_UNKNOWN_CMDTYPE) ? 0 : -ENXIO; } -static int hp_wmi_enable_hotkeys(void) +static int __init hp_wmi_bios_2009_later(void) { - int ret; - int query = 0x6e; + int state = 0; + int ret = hp_wmi_perform_query(HPWMI_FEATURE2_QUERY, 0, &state, + sizeof(state), sizeof(state)); + if (!ret) + return 1; - ret = hp_wmi_perform_query(HPWMI_BIOS_QUERY, 1, &query, sizeof(query), - 0); + return (ret == HPWMI_RET_UNKNOWN_CMDTYPE) ? 0 : -ENXIO; +} +static int __init hp_wmi_enable_hotkeys(void) +{ + int value = 0x6e; + int ret = hp_wmi_perform_query(HPWMI_BIOS_QUERY, 1, &value, + sizeof(value), 0); if (ret) return -EINVAL; return 0; @@ -663,7 +672,7 @@ static int __init hp_wmi_input_setup(void) hp_wmi_tablet_state()); input_sync(hp_wmi_input_dev); - if (hp_wmi_bios_2009_later() == 4) + if (!hp_wmi_bios_2009_later() && hp_wmi_bios_2008_later()) hp_wmi_enable_hotkeys(); status = wmi_install_notify_handler(HPWMI_EVENT_GUID, hp_wmi_notify, NULL); diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index 6740c513919c..f2372f400ddb 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -938,7 +938,7 @@ static int toshiba_usb_sleep_music_get(struct toshiba_acpi_dev *dev, u32 *state) else if (result == TOS_NOT_SUPPORTED) return -ENODEV; - return result = TOS_SUCCESS ? 0 : -EIO; + return result == TOS_SUCCESS ? 0 : -EIO; } static int toshiba_usb_sleep_music_set(struct toshiba_acpi_dev *dev, u32 state) @@ -2398,11 +2398,9 @@ static int toshiba_acpi_setup_keyboard(struct toshiba_acpi_dev *dev) if (error) return error; - error = toshiba_hotkey_event_type_get(dev, &events_type); - if (error) { - pr_err("Unable to query Hotkey Event Type\n"); - return error; - } + if (toshiba_hotkey_event_type_get(dev, &events_type)) + pr_notice("Unable to query Hotkey Event Type\n"); + dev->hotkey_event_type = events_type; dev->hotkey_dev = input_allocate_device(); diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index aac47573f9ed..eb391a281833 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -194,34 +194,6 @@ static bool wmi_parse_guid(const u8 *src, u8 *dest) return true; } -/* - * Convert a raw GUID to the ACII string representation - */ -static int wmi_gtoa(const char *in, char *out) -{ - int i; - - for (i = 3; i >= 0; i--) - out += sprintf(out, "%02X", in[i] & 0xFF); - - out += sprintf(out, "-"); - out += sprintf(out, "%02X", in[5] & 0xFF); - out += sprintf(out, "%02X", in[4] & 0xFF); - out += sprintf(out, "-"); - out += sprintf(out, "%02X", in[7] & 0xFF); - out += sprintf(out, "%02X", in[6] & 0xFF); - out += sprintf(out, "-"); - out += sprintf(out, "%02X", in[8] & 0xFF); - out += sprintf(out, "%02X", in[9] & 0xFF); - out += sprintf(out, "-"); - - for (i = 10; i <= 15; i++) - out += sprintf(out, "%02X", in[i] & 0xFF); - - *out = '\0'; - return 0; -} - static bool find_guid(const char *guid_string, struct wmi_block **out) { char tmp[16], guid_input[16]; @@ -457,11 +429,7 @@ EXPORT_SYMBOL_GPL(wmi_set_block); static void wmi_dump_wdg(const struct guid_block *g) { - char guid_string[37]; - - wmi_gtoa(g->guid, guid_string); - - pr_info("%s:\n", guid_string); + pr_info("%pUL:\n", g->guid); pr_info("\tobject_id: %c%c\n", g->object_id[0], g->object_id[1]); pr_info("\tnotify_id: %02X\n", g->notify_id); pr_info("\treserved: %02X\n", g->reserved); @@ -661,7 +629,6 @@ EXPORT_SYMBOL_GPL(wmi_has_guid); static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { - char guid_string[37]; struct wmi_block *wblock; wblock = dev_get_drvdata(dev); @@ -670,9 +637,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, return strlen(buf); } - wmi_gtoa(wblock->gblock.guid, guid_string); - - return sprintf(buf, "wmi:%s\n", guid_string); + return sprintf(buf, "wmi:%pUL\n", wblock->gblock.guid); } static DEVICE_ATTR_RO(modalias); @@ -695,7 +660,7 @@ static int wmi_dev_uevent(struct device *dev, struct kobj_uevent_env *env) if (!wblock) return -ENOMEM; - wmi_gtoa(wblock->gblock.guid, guid_string); + sprintf(guid_string, "%pUL", wblock->gblock.guid); strcpy(&env->buf[env->buflen - 1], "wmi:"); memcpy(&env->buf[env->buflen - 1 + 4], guid_string, 36); @@ -721,12 +686,9 @@ static struct class wmi_class = { static int wmi_create_device(const struct guid_block *gblock, struct wmi_block *wblock, acpi_handle handle) { - char guid_string[37]; - wblock->dev.class = &wmi_class; - wmi_gtoa(gblock->guid, guid_string); - dev_set_name(&wblock->dev, "%s", guid_string); + dev_set_name(&wblock->dev, "%pUL", gblock->guid); dev_set_drvdata(&wblock->dev, wblock); @@ -877,7 +839,6 @@ static void acpi_wmi_notify(struct acpi_device *device, u32 event) struct guid_block *block; struct wmi_block *wblock; struct list_head *p; - char guid_string[37]; list_for_each(p, &wmi_block_list) { wblock = list_entry(p, struct wmi_block, list); @@ -888,8 +849,8 @@ static void acpi_wmi_notify(struct acpi_device *device, u32 event) if (wblock->handler) wblock->handler(event, wblock->handler_data); if (debug_event) { - wmi_gtoa(wblock->gblock.guid, guid_string); - pr_info("DEBUG Event GUID: %s\n", guid_string); + pr_info("DEBUG Event GUID: %pUL\n", + wblock->gblock.guid); } acpi_bus_generate_netlink_event( diff --git a/drivers/power/twl4030_charger.c b/drivers/power/twl4030_charger.c index f4f2c1f76c32..74f2d3ff1d7c 100644 --- a/drivers/power/twl4030_charger.c +++ b/drivers/power/twl4030_charger.c @@ -91,7 +91,7 @@ #define TWL4030_MSTATEC_COMPLETE1 0x0b #define TWL4030_MSTATEC_COMPLETE4 0x0e -#if IS_ENABLED(CONFIG_TWL4030_MADC) +#if IS_REACHABLE(CONFIG_TWL4030_MADC) /* * If AC (Accessory Charger) voltage exceeds 4.5V (MADC 11) * then AC is available. @@ -1057,13 +1057,9 @@ static int twl4030_bci_probe(struct platform_device *pdev) phynode = of_find_compatible_node(bci->dev->of_node->parent, NULL, "ti,twl4030-usb"); - if (phynode) { + if (phynode) bci->transceiver = devm_usb_get_phy_by_node( bci->dev, phynode, &bci->usb_nb); - if (IS_ERR(bci->transceiver) && - PTR_ERR(bci->transceiver) == -EPROBE_DEFER) - return -EPROBE_DEFER; - } } /* Enable interrupts now. */ diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index f8d8fdb26b72..e9fae30fafda 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -400,12 +400,16 @@ static bool virtio_ccw_kvm_notify(struct virtqueue *vq) static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev, struct ccw1 *ccw, int index) { + int ret; + vcdev->config_block->index = index; ccw->cmd_code = CCW_CMD_READ_VQ_CONF; ccw->flags = 0; ccw->count = sizeof(struct vq_config_block); ccw->cda = (__u32)(unsigned long)(vcdev->config_block); - ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_VQ_CONF); + ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_VQ_CONF); + if (ret) + return ret; return vcdev->config_block->num; } @@ -503,6 +507,10 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev, goto out_err; } info->num = virtio_ccw_read_vq_conf(vcdev, ccw, i); + if (info->num < 0) { + err = info->num; + goto out_err; + } size = PAGE_ALIGN(vring_size(info->num, KVM_VIRTIO_CCW_RING_ALIGN)); info->queue = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO); if (info->queue == NULL) { diff --git a/drivers/sh/intc/core.c b/drivers/sh/intc/core.c index 043419dcee92..8e72bcbd3d6d 100644 --- a/drivers/sh/intc/core.c +++ b/drivers/sh/intc/core.c @@ -65,7 +65,7 @@ void intc_set_prio_level(unsigned int irq, unsigned int level) raw_spin_unlock_irqrestore(&intc_big_lock, flags); } -static void intc_redirect_irq(unsigned int irq, struct irq_desc *desc) +static void intc_redirect_irq(struct irq_desc *desc) { generic_handle_irq((unsigned int)irq_desc_get_handler_data(desc)); } diff --git a/drivers/sh/intc/internals.h b/drivers/sh/intc/internals.h index 7dff08e2a071..6ce7f0d26dcf 100644 --- a/drivers/sh/intc/internals.h +++ b/drivers/sh/intc/internals.h @@ -99,15 +99,7 @@ static inline struct intc_desc_int *get_intc_desc(unsigned int irq) */ static inline void activate_irq(int irq) { -#ifdef CONFIG_ARM - /* ARM requires an extra step to clear IRQ_NOREQUEST, which it - * sets on behalf of every irq_chip. Also sets IRQ_NOPROBE. - */ - set_irq_flags(irq, IRQF_VALID); -#else - /* same effect on other architectures */ - irq_set_noprobe(irq); -#endif + irq_modify_status(irq, IRQ_NOREQUEST, IRQ_NOPROBE); } static inline int intc_handle_int_cmp(const void *a, const void *b) diff --git a/drivers/sh/intc/virq.c b/drivers/sh/intc/virq.c index bafc51c6f0ba..e7899624aa0b 100644 --- a/drivers/sh/intc/virq.c +++ b/drivers/sh/intc/virq.c @@ -109,7 +109,7 @@ static int add_virq_to_pirq(unsigned int irq, unsigned int virq) return 0; } -static void intc_virq_handler(unsigned int __irq, struct irq_desc *desc) +static void intc_virq_handler(struct irq_desc *desc) { unsigned int irq = irq_desc_get_irq(desc); struct irq_data *data = irq_desc_get_irq_data(desc); @@ -127,7 +127,7 @@ static void intc_virq_handler(unsigned int __irq, struct irq_desc *desc) handle = (unsigned long)irq_desc_get_handler_data(vdesc); addr = INTC_REG(d, _INTC_ADDR_E(handle), 0); if (intc_reg_fns[_INTC_FN(handle)](addr, handle, 0)) - generic_handle_irq_desc(entry->irq, vdesc); + generic_handle_irq_desc(vdesc); } } diff --git a/drivers/soc/dove/pmu.c b/drivers/soc/dove/pmu.c index 6792aae9e2e5..052aecf29893 100644 --- a/drivers/soc/dove/pmu.c +++ b/drivers/soc/dove/pmu.c @@ -222,9 +222,9 @@ static void __pmu_domain_register(struct pmu_domain *domain, } /* PMU IRQ controller */ -static void pmu_irq_handler(unsigned int irq, struct irq_desc *desc) +static void pmu_irq_handler(struct irq_desc *desc) { - struct pmu_data *pmu = irq_get_handler_data(irq); + struct pmu_data *pmu = irq_desc_get_handler_data(desc); struct irq_chip_generic *gc = pmu->irq_gc; struct irq_domain *domain = pmu->irq_domain; void __iomem *base = gc->reg_base; @@ -232,7 +232,7 @@ static void pmu_irq_handler(unsigned int irq, struct irq_desc *desc) u32 done = ~0; if (stat == 0) { - handle_bad_irq(irq, desc); + handle_bad_irq(desc); return; } diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c index bdfb3c84c3cb..4a3cf9ba152f 100644 --- a/drivers/spmi/spmi-pmic-arb.c +++ b/drivers/spmi/spmi-pmic-arb.c @@ -451,7 +451,7 @@ static void periph_interrupt(struct spmi_pmic_arb_dev *pa, u8 apid) } } -static void pmic_arb_chained_irq(unsigned int irq, struct irq_desc *desc) +static void pmic_arb_chained_irq(struct irq_desc *desc) { struct spmi_pmic_arb_dev *pa = irq_desc_get_handler_data(desc); struct irq_chip *chip = irq_desc_get_chip(desc); diff --git a/drivers/staging/rdma/Kconfig b/drivers/staging/rdma/Kconfig index cf5fe9bb87a1..d7f62359d743 100644 --- a/drivers/staging/rdma/Kconfig +++ b/drivers/staging/rdma/Kconfig @@ -24,6 +24,8 @@ if STAGING_RDMA source "drivers/staging/rdma/amso1100/Kconfig" +source "drivers/staging/rdma/ehca/Kconfig" + source "drivers/staging/rdma/hfi1/Kconfig" source "drivers/staging/rdma/ipath/Kconfig" diff --git a/drivers/staging/rdma/Makefile b/drivers/staging/rdma/Makefile index cbd915ac7f20..139d78ef2c24 100644 --- a/drivers/staging/rdma/Makefile +++ b/drivers/staging/rdma/Makefile @@ -1,4 +1,5 @@ # Entries for RDMA_STAGING tree obj-$(CONFIG_INFINIBAND_AMSO1100) += amso1100/ +obj-$(CONFIG_INFINIBAND_EHCA) += ehca/ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ obj-$(CONFIG_INFINIBAND_IPATH) += ipath/ diff --git a/drivers/staging/rdma/ehca/Kconfig b/drivers/staging/rdma/ehca/Kconfig new file mode 100644 index 000000000000..3fadd2ad6426 --- /dev/null +++ b/drivers/staging/rdma/ehca/Kconfig @@ -0,0 +1,10 @@ +config INFINIBAND_EHCA + tristate "eHCA support" + depends on IBMEBUS + ---help--- + This driver supports the deprecated IBM pSeries eHCA InfiniBand + adapter. + + To compile the driver as a module, choose M here. The module + will be called ib_ehca. + diff --git a/drivers/staging/rdma/ehca/Makefile b/drivers/staging/rdma/ehca/Makefile new file mode 100644 index 000000000000..74d284e46a40 --- /dev/null +++ b/drivers/staging/rdma/ehca/Makefile @@ -0,0 +1,16 @@ +# Authors: Heiko J Schick +# Christoph Raisch +# Joachim Fenkes +# +# Copyright (c) 2005 IBM Corporation +# +# All rights reserved. +# +# This source code is distributed under a dual license of GPL v2.0 and OpenIB BSD. + +obj-$(CONFIG_INFINIBAND_EHCA) += ib_ehca.o + +ib_ehca-objs = ehca_main.o ehca_hca.o ehca_mcast.o ehca_pd.o ehca_av.o ehca_eq.o \ + ehca_cq.o ehca_qp.o ehca_sqp.o ehca_mrmw.o ehca_reqs.o ehca_irq.o \ + ehca_uverbs.o ipz_pt_fn.o hcp_if.o hcp_phyp.o + diff --git a/drivers/staging/rdma/ehca/TODO b/drivers/staging/rdma/ehca/TODO new file mode 100644 index 000000000000..199a4a600142 --- /dev/null +++ b/drivers/staging/rdma/ehca/TODO @@ -0,0 +1,4 @@ +9/2015 + +The ehca driver has been deprecated and moved to drivers/staging/rdma. +It will be removed in the 4.6 merge window. diff --git a/drivers/staging/rdma/ehca/ehca_av.c b/drivers/staging/rdma/ehca/ehca_av.c new file mode 100644 index 000000000000..465926319f3d --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_av.c @@ -0,0 +1,277 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * address vector functions + * + * Authors: Hoang-Nam Nguyen + * Khadija Souissi + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_tools.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +static struct kmem_cache *av_cache; + +int ehca_calc_ipd(struct ehca_shca *shca, int port, + enum ib_rate path_rate, u32 *ipd) +{ + int path = ib_rate_to_mult(path_rate); + int link, ret; + struct ib_port_attr pa; + + if (path_rate == IB_RATE_PORT_CURRENT) { + *ipd = 0; + return 0; + } + + if (unlikely(path < 0)) { + ehca_err(&shca->ib_device, "Invalid static rate! path_rate=%x", + path_rate); + return -EINVAL; + } + + ret = ehca_query_port(&shca->ib_device, port, &pa); + if (unlikely(ret < 0)) { + ehca_err(&shca->ib_device, "Failed to query port ret=%i", ret); + return ret; + } + + link = ib_width_enum_to_int(pa.active_width) * pa.active_speed; + + if (path >= link) + /* no need to throttle if path faster than link */ + *ipd = 0; + else + /* IPD = round((link / path) - 1) */ + *ipd = ((link + (path >> 1)) / path) - 1; + + return 0; +} + +struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +{ + int ret; + struct ehca_av *av; + struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, + ib_device); + + av = kmem_cache_alloc(av_cache, GFP_KERNEL); + if (!av) { + ehca_err(pd->device, "Out of memory pd=%p ah_attr=%p", + pd, ah_attr); + return ERR_PTR(-ENOMEM); + } + + av->av.sl = ah_attr->sl; + av->av.dlid = ah_attr->dlid; + av->av.slid_path_bits = ah_attr->src_path_bits; + + if (ehca_static_rate < 0) { + u32 ipd; + if (ehca_calc_ipd(shca, ah_attr->port_num, + ah_attr->static_rate, &ipd)) { + ret = -EINVAL; + goto create_ah_exit1; + } + av->av.ipd = ipd; + } else + av->av.ipd = ehca_static_rate; + + av->av.lnh = ah_attr->ah_flags; + av->av.grh.word_0 = EHCA_BMASK_SET(GRH_IPVERSION_MASK, 6); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_TCLASS_MASK, + ah_attr->grh.traffic_class); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, + ah_attr->grh.flow_label); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, + ah_attr->grh.hop_limit); + av->av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1B); + /* set sgid in grh.word_1 */ + if (ah_attr->ah_flags & IB_AH_GRH) { + int rc; + struct ib_port_attr port_attr; + union ib_gid gid; + memset(&port_attr, 0, sizeof(port_attr)); + rc = ehca_query_port(pd->device, ah_attr->port_num, + &port_attr); + if (rc) { /* invalid port number */ + ret = -EINVAL; + ehca_err(pd->device, "Invalid port number " + "ehca_query_port() returned %x " + "pd=%p ah_attr=%p", rc, pd, ah_attr); + goto create_ah_exit1; + } + memset(&gid, 0, sizeof(gid)); + rc = ehca_query_gid(pd->device, + ah_attr->port_num, + ah_attr->grh.sgid_index, &gid); + if (rc) { + ret = -EINVAL; + ehca_err(pd->device, "Failed to retrieve sgid " + "ehca_query_gid() returned %x " + "pd=%p ah_attr=%p", rc, pd, ah_attr); + goto create_ah_exit1; + } + memcpy(&av->av.grh.word_1, &gid, sizeof(gid)); + } + av->av.pmtu = shca->max_mtu; + + /* dgid comes in grh.word_3 */ + memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid, + sizeof(ah_attr->grh.dgid)); + + return &av->ib_ah; + +create_ah_exit1: + kmem_cache_free(av_cache, av); + + return ERR_PTR(ret); +} + +int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +{ + struct ehca_av *av; + struct ehca_ud_av new_ehca_av; + struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca, + ib_device); + + memset(&new_ehca_av, 0, sizeof(new_ehca_av)); + new_ehca_av.sl = ah_attr->sl; + new_ehca_av.dlid = ah_attr->dlid; + new_ehca_av.slid_path_bits = ah_attr->src_path_bits; + new_ehca_av.ipd = ah_attr->static_rate; + new_ehca_av.lnh = EHCA_BMASK_SET(GRH_FLAG_MASK, + (ah_attr->ah_flags & IB_AH_GRH) > 0); + new_ehca_av.grh.word_0 = EHCA_BMASK_SET(GRH_TCLASS_MASK, + ah_attr->grh.traffic_class); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_FLOWLABEL_MASK, + ah_attr->grh.flow_label); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_HOPLIMIT_MASK, + ah_attr->grh.hop_limit); + new_ehca_av.grh.word_0 |= EHCA_BMASK_SET(GRH_NEXTHEADER_MASK, 0x1b); + + /* set sgid in grh.word_1 */ + if (ah_attr->ah_flags & IB_AH_GRH) { + int rc; + struct ib_port_attr port_attr; + union ib_gid gid; + memset(&port_attr, 0, sizeof(port_attr)); + rc = ehca_query_port(ah->device, ah_attr->port_num, + &port_attr); + if (rc) { /* invalid port number */ + ehca_err(ah->device, "Invalid port number " + "ehca_query_port() returned %x " + "ah=%p ah_attr=%p port_num=%x", + rc, ah, ah_attr, ah_attr->port_num); + return -EINVAL; + } + memset(&gid, 0, sizeof(gid)); + rc = ehca_query_gid(ah->device, + ah_attr->port_num, + ah_attr->grh.sgid_index, &gid); + if (rc) { + ehca_err(ah->device, "Failed to retrieve sgid " + "ehca_query_gid() returned %x " + "ah=%p ah_attr=%p port_num=%x " + "sgid_index=%x", + rc, ah, ah_attr, ah_attr->port_num, + ah_attr->grh.sgid_index); + return -EINVAL; + } + memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid)); + } + + new_ehca_av.pmtu = shca->max_mtu; + + memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid, + sizeof(ah_attr->grh.dgid)); + + av = container_of(ah, struct ehca_av, ib_ah); + av->av = new_ehca_av; + + return 0; +} + +int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) +{ + struct ehca_av *av = container_of(ah, struct ehca_av, ib_ah); + + memcpy(&ah_attr->grh.dgid, &av->av.grh.word_3, + sizeof(ah_attr->grh.dgid)); + ah_attr->sl = av->av.sl; + + ah_attr->dlid = av->av.dlid; + + ah_attr->src_path_bits = av->av.slid_path_bits; + ah_attr->static_rate = av->av.ipd; + ah_attr->ah_flags = EHCA_BMASK_GET(GRH_FLAG_MASK, av->av.lnh); + ah_attr->grh.traffic_class = EHCA_BMASK_GET(GRH_TCLASS_MASK, + av->av.grh.word_0); + ah_attr->grh.hop_limit = EHCA_BMASK_GET(GRH_HOPLIMIT_MASK, + av->av.grh.word_0); + ah_attr->grh.flow_label = EHCA_BMASK_GET(GRH_FLOWLABEL_MASK, + av->av.grh.word_0); + + return 0; +} + +int ehca_destroy_ah(struct ib_ah *ah) +{ + kmem_cache_free(av_cache, container_of(ah, struct ehca_av, ib_ah)); + + return 0; +} + +int ehca_init_av_cache(void) +{ + av_cache = kmem_cache_create("ehca_cache_av", + sizeof(struct ehca_av), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!av_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_av_cache(void) +{ + if (av_cache) + kmem_cache_destroy(av_cache); +} diff --git a/drivers/staging/rdma/ehca/ehca_classes.h b/drivers/staging/rdma/ehca/ehca_classes.h new file mode 100644 index 000000000000..bd45e0f3923f --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_classes.h @@ -0,0 +1,482 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Struct definition for eHCA internal structures + * + * Authors: Heiko J Schick + * Christoph Raisch + * Joachim Fenkes + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_CLASSES_H__ +#define __EHCA_CLASSES_H__ + +struct ehca_module; +struct ehca_qp; +struct ehca_cq; +struct ehca_eq; +struct ehca_mr; +struct ehca_mw; +struct ehca_pd; +struct ehca_av; + +#include +#include + +#include +#include + +#ifdef CONFIG_PPC64 +#include "ehca_classes_pSeries.h" +#endif +#include "ipz_pt_fn.h" +#include "ehca_qes.h" +#include "ehca_irq.h" + +#define EHCA_EQE_CACHE_SIZE 20 +#define EHCA_MAX_NUM_QUEUES 0xffff + +struct ehca_eqe_cache_entry { + struct ehca_eqe *eqe; + struct ehca_cq *cq; +}; + +struct ehca_eq { + u32 length; + struct ipz_queue ipz_queue; + struct ipz_eq_handle ipz_eq_handle; + struct work_struct work; + struct h_galpas galpas; + int is_initialized; + struct ehca_pfeq pf; + spinlock_t spinlock; + struct tasklet_struct interrupt_task; + u32 ist; + spinlock_t irq_spinlock; + struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE]; +}; + +struct ehca_sma_attr { + u16 lid, lmc, sm_sl, sm_lid; + u16 pkey_tbl_len, pkeys[16]; +}; + +struct ehca_sport { + struct ib_cq *ibcq_aqp1; + struct ib_qp *ibqp_sqp[2]; + /* lock to serialze modify_qp() calls for sqp in normal + * and irq path (when event PORT_ACTIVE is received first time) + */ + spinlock_t mod_sqp_lock; + enum ib_port_state port_state; + struct ehca_sma_attr saved_attr; + u32 pma_qp_nr; +}; + +#define HCA_CAP_MR_PGSIZE_4K 0x80000000 +#define HCA_CAP_MR_PGSIZE_64K 0x40000000 +#define HCA_CAP_MR_PGSIZE_1M 0x20000000 +#define HCA_CAP_MR_PGSIZE_16M 0x10000000 + +struct ehca_shca { + struct ib_device ib_device; + struct platform_device *ofdev; + u8 num_ports; + int hw_level; + struct list_head shca_list; + struct ipz_adapter_handle ipz_hca_handle; + struct ehca_sport sport[2]; + struct ehca_eq eq; + struct ehca_eq neq; + struct ehca_mr *maxmr; + struct ehca_pd *pd; + struct h_galpas galpas; + struct mutex modify_mutex; + u64 hca_cap; + /* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */ + u32 hca_cap_mr_pgsize; + int max_mtu; + int max_num_qps; + int max_num_cqs; + atomic_t num_cqs; + atomic_t num_qps; +}; + +struct ehca_pd { + struct ib_pd ib_pd; + struct ipz_pd fw_pd; + /* small queue mgmt */ + struct mutex lock; + struct list_head free[2]; + struct list_head full[2]; +}; + +enum ehca_ext_qp_type { + EQPT_NORMAL = 0, + EQPT_LLQP = 1, + EQPT_SRQBASE = 2, + EQPT_SRQ = 3, +}; + +/* struct to cache modify_qp()'s parms for GSI/SMI qp */ +struct ehca_mod_qp_parm { + int mask; + struct ib_qp_attr attr; +}; + +#define EHCA_MOD_QP_PARM_MAX 4 + +#define QMAP_IDX_MASK 0xFFFFULL + +/* struct for tracking if cqes have been reported to the application */ +struct ehca_qmap_entry { + u16 app_wr_id; + u8 reported; + u8 cqe_req; +}; + +struct ehca_queue_map { + struct ehca_qmap_entry *map; + unsigned int entries; + unsigned int tail; + unsigned int left_to_poll; + unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */ +}; + +/* function to calculate the next index for the qmap */ +static inline unsigned int next_index(unsigned int cur_index, unsigned int limit) +{ + unsigned int temp = cur_index + 1; + return (temp == limit) ? 0 : temp; +} + +struct ehca_qp { + union { + struct ib_qp ib_qp; + struct ib_srq ib_srq; + }; + u32 qp_type; + enum ehca_ext_qp_type ext_type; + enum ib_qp_state state; + struct ipz_queue ipz_squeue; + struct ehca_queue_map sq_map; + struct ipz_queue ipz_rqueue; + struct ehca_queue_map rq_map; + struct h_galpas galpas; + u32 qkey; + u32 real_qp_num; + u32 token; + spinlock_t spinlock_s; + spinlock_t spinlock_r; + u32 sq_max_inline_data_size; + struct ipz_qp_handle ipz_qp_handle; + struct ehca_pfqp pf; + struct ib_qp_init_attr init_attr; + struct ehca_cq *send_cq; + struct ehca_cq *recv_cq; + unsigned int sqerr_purgeflag; + struct hlist_node list_entries; + /* array to cache modify_qp()'s parms for GSI/SMI qp */ + struct ehca_mod_qp_parm *mod_qp_parm; + int mod_qp_parm_idx; + /* mmap counter for resources mapped into user space */ + u32 mm_count_squeue; + u32 mm_count_rqueue; + u32 mm_count_galpa; + /* unsolicited ack circumvention */ + int unsol_ack_circ; + int mtu_shift; + u32 message_count; + u32 packet_count; + atomic_t nr_events; /* events seen */ + wait_queue_head_t wait_completion; + int mig_armed; + struct list_head sq_err_node; + struct list_head rq_err_node; +}; + +#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ) +#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ) +#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE) + +/* must be power of 2 */ +#define QP_HASHTAB_LEN 8 + +struct ehca_cq { + struct ib_cq ib_cq; + struct ipz_queue ipz_queue; + struct h_galpas galpas; + spinlock_t spinlock; + u32 cq_number; + u32 token; + u32 nr_of_entries; + struct ipz_cq_handle ipz_cq_handle; + struct ehca_pfcq pf; + spinlock_t cb_lock; + struct hlist_head qp_hashtab[QP_HASHTAB_LEN]; + struct list_head entry; + u32 nr_callbacks; /* #events assigned to cpu by scaling code */ + atomic_t nr_events; /* #events seen */ + wait_queue_head_t wait_completion; + spinlock_t task_lock; + /* mmap counter for resources mapped into user space */ + u32 mm_count_queue; + u32 mm_count_galpa; + struct list_head sqp_err_list; + struct list_head rqp_err_list; +}; + +enum ehca_mr_flag { + EHCA_MR_FLAG_FMR = 0x80000000, /* FMR, created with ehca_alloc_fmr */ + EHCA_MR_FLAG_MAXMR = 0x40000000, /* max-MR */ +}; + +struct ehca_mr { + union { + struct ib_mr ib_mr; /* must always be first in ehca_mr */ + struct ib_fmr ib_fmr; /* must always be first in ehca_mr */ + } ib; + struct ib_umem *umem; + spinlock_t mrlock; + + enum ehca_mr_flag flags; + u32 num_kpages; /* number of kernel pages */ + u32 num_hwpages; /* number of hw pages to form MR */ + u64 hwpage_size; /* hw page size used for this MR */ + int acl; /* ACL (stored here for usage in reregister) */ + u64 *start; /* virtual start address (stored here for */ + /* usage in reregister) */ + u64 size; /* size (stored here for usage in reregister) */ + u32 fmr_page_size; /* page size for FMR */ + u32 fmr_max_pages; /* max pages for FMR */ + u32 fmr_max_maps; /* max outstanding maps for FMR */ + u32 fmr_map_cnt; /* map counter for FMR */ + /* fw specific data */ + struct ipz_mrmw_handle ipz_mr_handle; /* MR handle for h-calls */ + struct h_galpas galpas; +}; + +struct ehca_mw { + struct ib_mw ib_mw; /* gen2 mw, must always be first in ehca_mw */ + spinlock_t mwlock; + + u8 never_bound; /* indication MW was never bound */ + struct ipz_mrmw_handle ipz_mw_handle; /* MW handle for h-calls */ + struct h_galpas galpas; +}; + +enum ehca_mr_pgi_type { + EHCA_MR_PGI_PHYS = 1, /* type of ehca_reg_phys_mr, + * ehca_rereg_phys_mr, + * ehca_reg_internal_maxmr */ + EHCA_MR_PGI_USER = 2, /* type of ehca_reg_user_mr */ + EHCA_MR_PGI_FMR = 3 /* type of ehca_map_phys_fmr */ +}; + +struct ehca_mr_pginfo { + enum ehca_mr_pgi_type type; + u64 num_kpages; + u64 kpage_cnt; + u64 hwpage_size; /* hw page size used for this MR */ + u64 num_hwpages; /* number of hw pages */ + u64 hwpage_cnt; /* counter for hw pages */ + u64 next_hwpage; /* next hw page in buffer/chunk/listelem */ + + union { + struct { /* type EHCA_MR_PGI_PHYS section */ + int num_phys_buf; + struct ib_phys_buf *phys_buf_array; + u64 next_buf; + } phy; + struct { /* type EHCA_MR_PGI_USER section */ + struct ib_umem *region; + struct scatterlist *next_sg; + u64 next_nmap; + } usr; + struct { /* type EHCA_MR_PGI_FMR section */ + u64 fmr_pgsize; + u64 *page_list; + u64 next_listelem; + } fmr; + } u; +}; + +/* output parameters for MR/FMR hipz calls */ +struct ehca_mr_hipzout_parms { + struct ipz_mrmw_handle handle; + u32 lkey; + u32 rkey; + u64 len; + u64 vaddr; + u32 acl; +}; + +/* output parameters for MW hipz calls */ +struct ehca_mw_hipzout_parms { + struct ipz_mrmw_handle handle; + u32 rkey; +}; + +struct ehca_av { + struct ib_ah ib_ah; + struct ehca_ud_av av; +}; + +struct ehca_ucontext { + struct ib_ucontext ib_ucontext; +}; + +int ehca_init_pd_cache(void); +void ehca_cleanup_pd_cache(void); +int ehca_init_cq_cache(void); +void ehca_cleanup_cq_cache(void); +int ehca_init_qp_cache(void); +void ehca_cleanup_qp_cache(void); +int ehca_init_av_cache(void); +void ehca_cleanup_av_cache(void); +int ehca_init_mrmw_cache(void); +void ehca_cleanup_mrmw_cache(void); +int ehca_init_small_qp_cache(void); +void ehca_cleanup_small_qp_cache(void); + +extern rwlock_t ehca_qp_idr_lock; +extern rwlock_t ehca_cq_idr_lock; +extern struct idr ehca_qp_idr; +extern struct idr ehca_cq_idr; +extern spinlock_t shca_list_lock; + +extern int ehca_static_rate; +extern int ehca_port_act_time; +extern bool ehca_use_hp_mr; +extern bool ehca_scaling_code; +extern int ehca_lock_hcalls; +extern int ehca_nr_ports; +extern int ehca_max_cq; +extern int ehca_max_qp; + +struct ipzu_queue_resp { + u32 qe_size; /* queue entry size */ + u32 act_nr_of_sg; + u32 queue_length; /* queue length allocated in bytes */ + u32 pagesize; + u32 toggle_state; + u32 offset; /* save offset within a page for small_qp */ +}; + +struct ehca_create_cq_resp { + u32 cq_number; + u32 token; + struct ipzu_queue_resp ipz_queue; + u32 fw_handle_ofs; + u32 dummy; +}; + +struct ehca_create_qp_resp { + u32 qp_num; + u32 token; + u32 qp_type; + u32 ext_type; + u32 qkey; + /* qp_num assigned by ehca: sqp0/1 may have got different numbers */ + u32 real_qp_num; + u32 fw_handle_ofs; + u32 dummy; + struct ipzu_queue_resp ipz_squeue; + struct ipzu_queue_resp ipz_rqueue; +}; + +struct ehca_alloc_cq_parms { + u32 nr_cqe; + u32 act_nr_of_entries; + u32 act_pages; + struct ipz_eq_handle eq_handle; +}; + +enum ehca_service_type { + ST_RC = 0, + ST_UC = 1, + ST_RD = 2, + ST_UD = 3, +}; + +enum ehca_ll_comp_flags { + LLQP_SEND_COMP = 0x20, + LLQP_RECV_COMP = 0x40, + LLQP_COMP_MASK = 0x60, +}; + +struct ehca_alloc_queue_parms { + /* input parameters */ + int max_wr; + int max_sge; + int page_size; + int is_small; + + /* output parameters */ + u16 act_nr_wqes; + u8 act_nr_sges; + u32 queue_size; /* bytes for small queues, pages otherwise */ +}; + +struct ehca_alloc_qp_parms { + struct ehca_alloc_queue_parms squeue; + struct ehca_alloc_queue_parms rqueue; + + /* input parameters */ + enum ehca_service_type servicetype; + int qp_storage; + int sigtype; + enum ehca_ext_qp_type ext_type; + enum ehca_ll_comp_flags ll_comp_flags; + int ud_av_l_key_ctl; + + u32 token; + struct ipz_eq_handle eq_handle; + struct ipz_pd pd; + struct ipz_cq_handle send_cq_handle, recv_cq_handle; + + u32 srq_qpn, srq_token, srq_limit; + + /* output parameters */ + u32 real_qp_num; + struct ipz_qp_handle qp_handle; + struct h_galpas galpas; +}; + +int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp); +int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int qp_num); +struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int qp_num); + +#endif diff --git a/drivers/staging/rdma/ehca/ehca_classes_pSeries.h b/drivers/staging/rdma/ehca/ehca_classes_pSeries.h new file mode 100644 index 000000000000..689c35786dd2 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_classes_pSeries.h @@ -0,0 +1,208 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * pSeries interface definitions + * + * Authors: Waleri Fomin + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_CLASSES_PSERIES_H__ +#define __EHCA_CLASSES_PSERIES_H__ + +#include "hcp_phyp.h" +#include "ipz_pt_fn.h" + + +struct ehca_pfqp { + struct ipz_qpt sqpt; + struct ipz_qpt rqpt; +}; + +struct ehca_pfcq { + struct ipz_qpt qpt; + u32 cqnr; +}; + +struct ehca_pfeq { + struct ipz_qpt qpt; + struct h_galpa galpa; + u32 eqnr; +}; + +struct ipz_adapter_handle { + u64 handle; +}; + +struct ipz_cq_handle { + u64 handle; +}; + +struct ipz_eq_handle { + u64 handle; +}; + +struct ipz_qp_handle { + u64 handle; +}; +struct ipz_mrmw_handle { + u64 handle; +}; + +struct ipz_pd { + u32 value; +}; + +struct hcp_modify_qp_control_block { + u32 qkey; /* 00 */ + u32 rdd; /* reliable datagram domain */ + u32 send_psn; /* 02 */ + u32 receive_psn; /* 03 */ + u32 prim_phys_port; /* 04 */ + u32 alt_phys_port; /* 05 */ + u32 prim_p_key_idx; /* 06 */ + u32 alt_p_key_idx; /* 07 */ + u32 rdma_atomic_ctrl; /* 08 */ + u32 qp_state; /* 09 */ + u32 reserved_10; /* 10 */ + u32 rdma_nr_atomic_resp_res; /* 11 */ + u32 path_migration_state; /* 12 */ + u32 rdma_atomic_outst_dest_qp; /* 13 */ + u32 dest_qp_nr; /* 14 */ + u32 min_rnr_nak_timer_field; /* 15 */ + u32 service_level; /* 16 */ + u32 send_grh_flag; /* 17 */ + u32 retry_count; /* 18 */ + u32 timeout; /* 19 */ + u32 path_mtu; /* 20 */ + u32 max_static_rate; /* 21 */ + u32 dlid; /* 22 */ + u32 rnr_retry_count; /* 23 */ + u32 source_path_bits; /* 24 */ + u32 traffic_class; /* 25 */ + u32 hop_limit; /* 26 */ + u32 source_gid_idx; /* 27 */ + u32 flow_label; /* 28 */ + u32 reserved_29; /* 29 */ + union { /* 30 */ + u64 dw[2]; + u8 byte[16]; + } dest_gid; + u32 service_level_al; /* 34 */ + u32 send_grh_flag_al; /* 35 */ + u32 retry_count_al; /* 36 */ + u32 timeout_al; /* 37 */ + u32 max_static_rate_al; /* 38 */ + u32 dlid_al; /* 39 */ + u32 rnr_retry_count_al; /* 40 */ + u32 source_path_bits_al; /* 41 */ + u32 traffic_class_al; /* 42 */ + u32 hop_limit_al; /* 43 */ + u32 source_gid_idx_al; /* 44 */ + u32 flow_label_al; /* 45 */ + u32 reserved_46; /* 46 */ + u32 reserved_47; /* 47 */ + union { /* 48 */ + u64 dw[2]; + u8 byte[16]; + } dest_gid_al; + u32 max_nr_outst_send_wr; /* 52 */ + u32 max_nr_outst_recv_wr; /* 53 */ + u32 disable_ete_credit_check; /* 54 */ + u32 qp_number; /* 55 */ + u64 send_queue_handle; /* 56 */ + u64 recv_queue_handle; /* 58 */ + u32 actual_nr_sges_in_sq_wqe; /* 60 */ + u32 actual_nr_sges_in_rq_wqe; /* 61 */ + u32 qp_enable; /* 62 */ + u32 curr_srq_limit; /* 63 */ + u64 qp_aff_asyn_ev_log_reg; /* 64 */ + u64 shared_rq_hndl; /* 66 */ + u64 trigg_doorbell_qp_hndl; /* 68 */ + u32 reserved_70_127[58]; /* 70 */ +}; + +#define MQPCB_MASK_QKEY EHCA_BMASK_IBM( 0, 0) +#define MQPCB_MASK_SEND_PSN EHCA_BMASK_IBM( 2, 2) +#define MQPCB_MASK_RECEIVE_PSN EHCA_BMASK_IBM( 3, 3) +#define MQPCB_MASK_PRIM_PHYS_PORT EHCA_BMASK_IBM( 4, 4) +#define MQPCB_PRIM_PHYS_PORT EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_ALT_PHYS_PORT EHCA_BMASK_IBM( 5, 5) +#define MQPCB_MASK_PRIM_P_KEY_IDX EHCA_BMASK_IBM( 6, 6) +#define MQPCB_PRIM_P_KEY_IDX EHCA_BMASK_IBM(24, 31) +#define MQPCB_MASK_ALT_P_KEY_IDX EHCA_BMASK_IBM( 7, 7) +#define MQPCB_MASK_RDMA_ATOMIC_CTRL EHCA_BMASK_IBM( 8, 8) +#define MQPCB_MASK_QP_STATE EHCA_BMASK_IBM( 9, 9) +#define MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES EHCA_BMASK_IBM(11, 11) +#define MQPCB_MASK_PATH_MIGRATION_STATE EHCA_BMASK_IBM(12, 12) +#define MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP EHCA_BMASK_IBM(13, 13) +#define MQPCB_MASK_DEST_QP_NR EHCA_BMASK_IBM(14, 14) +#define MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD EHCA_BMASK_IBM(15, 15) +#define MQPCB_MASK_SERVICE_LEVEL EHCA_BMASK_IBM(16, 16) +#define MQPCB_MASK_SEND_GRH_FLAG EHCA_BMASK_IBM(17, 17) +#define MQPCB_MASK_RETRY_COUNT EHCA_BMASK_IBM(18, 18) +#define MQPCB_MASK_TIMEOUT EHCA_BMASK_IBM(19, 19) +#define MQPCB_MASK_PATH_MTU EHCA_BMASK_IBM(20, 20) +#define MQPCB_MASK_MAX_STATIC_RATE EHCA_BMASK_IBM(21, 21) +#define MQPCB_MASK_DLID EHCA_BMASK_IBM(22, 22) +#define MQPCB_MASK_RNR_RETRY_COUNT EHCA_BMASK_IBM(23, 23) +#define MQPCB_MASK_SOURCE_PATH_BITS EHCA_BMASK_IBM(24, 24) +#define MQPCB_MASK_TRAFFIC_CLASS EHCA_BMASK_IBM(25, 25) +#define MQPCB_MASK_HOP_LIMIT EHCA_BMASK_IBM(26, 26) +#define MQPCB_MASK_SOURCE_GID_IDX EHCA_BMASK_IBM(27, 27) +#define MQPCB_MASK_FLOW_LABEL EHCA_BMASK_IBM(28, 28) +#define MQPCB_MASK_DEST_GID EHCA_BMASK_IBM(30, 30) +#define MQPCB_MASK_SERVICE_LEVEL_AL EHCA_BMASK_IBM(31, 31) +#define MQPCB_MASK_SEND_GRH_FLAG_AL EHCA_BMASK_IBM(32, 32) +#define MQPCB_MASK_RETRY_COUNT_AL EHCA_BMASK_IBM(33, 33) +#define MQPCB_MASK_TIMEOUT_AL EHCA_BMASK_IBM(34, 34) +#define MQPCB_MASK_MAX_STATIC_RATE_AL EHCA_BMASK_IBM(35, 35) +#define MQPCB_MASK_DLID_AL EHCA_BMASK_IBM(36, 36) +#define MQPCB_MASK_RNR_RETRY_COUNT_AL EHCA_BMASK_IBM(37, 37) +#define MQPCB_MASK_SOURCE_PATH_BITS_AL EHCA_BMASK_IBM(38, 38) +#define MQPCB_MASK_TRAFFIC_CLASS_AL EHCA_BMASK_IBM(39, 39) +#define MQPCB_MASK_HOP_LIMIT_AL EHCA_BMASK_IBM(40, 40) +#define MQPCB_MASK_SOURCE_GID_IDX_AL EHCA_BMASK_IBM(41, 41) +#define MQPCB_MASK_FLOW_LABEL_AL EHCA_BMASK_IBM(42, 42) +#define MQPCB_MASK_DEST_GID_AL EHCA_BMASK_IBM(44, 44) +#define MQPCB_MASK_MAX_NR_OUTST_SEND_WR EHCA_BMASK_IBM(45, 45) +#define MQPCB_MASK_MAX_NR_OUTST_RECV_WR EHCA_BMASK_IBM(46, 46) +#define MQPCB_MASK_DISABLE_ETE_CREDIT_CHECK EHCA_BMASK_IBM(47, 47) +#define MQPCB_MASK_QP_ENABLE EHCA_BMASK_IBM(48, 48) +#define MQPCB_MASK_CURR_SRQ_LIMIT EHCA_BMASK_IBM(49, 49) +#define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG EHCA_BMASK_IBM(50, 50) +#define MQPCB_MASK_SHARED_RQ_HNDL EHCA_BMASK_IBM(51, 51) + +#endif /* __EHCA_CLASSES_PSERIES_H__ */ diff --git a/drivers/staging/rdma/ehca/ehca_cq.c b/drivers/staging/rdma/ehca/ehca_cq.c new file mode 100644 index 000000000000..9b68b175069b --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_cq.c @@ -0,0 +1,397 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Completion queue handling + * + * Authors: Waleri Fomin + * Khadija Souissi + * Reinhard Ernst + * Heiko J Schick + * Hoang-Nam Nguyen + * + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_iverbs.h" +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "hcp_if.h" + +static struct kmem_cache *cq_cache; + +int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp) +{ + unsigned int qp_num = qp->real_qp_num; + unsigned int key = qp_num & (QP_HASHTAB_LEN-1); + unsigned long flags; + + spin_lock_irqsave(&cq->spinlock, flags); + hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]); + spin_unlock_irqrestore(&cq->spinlock, flags); + + ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x", + cq->cq_number, qp_num); + + return 0; +} + +int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num) +{ + int ret = -EINVAL; + unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); + struct hlist_node *iter; + struct ehca_qp *qp; + unsigned long flags; + + spin_lock_irqsave(&cq->spinlock, flags); + hlist_for_each(iter, &cq->qp_hashtab[key]) { + qp = hlist_entry(iter, struct ehca_qp, list_entries); + if (qp->real_qp_num == real_qp_num) { + hlist_del(iter); + ehca_dbg(cq->ib_cq.device, + "removed qp from cq .cq_num=%x real_qp_num=%x", + cq->cq_number, real_qp_num); + ret = 0; + break; + } + } + spin_unlock_irqrestore(&cq->spinlock, flags); + if (ret) + ehca_err(cq->ib_cq.device, + "qp not found cq_num=%x real_qp_num=%x", + cq->cq_number, real_qp_num); + + return ret; +} + +struct ehca_qp *ehca_cq_get_qp(struct ehca_cq *cq, int real_qp_num) +{ + struct ehca_qp *ret = NULL; + unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1); + struct hlist_node *iter; + struct ehca_qp *qp; + hlist_for_each(iter, &cq->qp_hashtab[key]) { + qp = hlist_entry(iter, struct ehca_qp, list_entries); + if (qp->real_qp_num == real_qp_num) { + ret = qp; + break; + } + } + return ret; +} + +struct ib_cq *ehca_create_cq(struct ib_device *device, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + int cqe = attr->cqe; + static const u32 additional_cqe = 20; + struct ib_cq *cq; + struct ehca_cq *my_cq; + struct ehca_shca *shca = + container_of(device, struct ehca_shca, ib_device); + struct ipz_adapter_handle adapter_handle; + struct ehca_alloc_cq_parms param; /* h_call's out parameters */ + struct h_galpa gal; + void *vpage; + u32 counter; + u64 rpage, cqx_fec, h_ret; + int ipz_rc, i; + unsigned long flags; + + if (attr->flags) + return ERR_PTR(-EINVAL); + + if (cqe >= 0xFFFFFFFF - 64 - additional_cqe) + return ERR_PTR(-EINVAL); + + if (!atomic_add_unless(&shca->num_cqs, 1, shca->max_num_cqs)) { + ehca_err(device, "Unable to create CQ, max number of %i " + "CQs reached.", shca->max_num_cqs); + ehca_err(device, "To increase the maximum number of CQs " + "use the number_of_cqs module parameter.\n"); + return ERR_PTR(-ENOSPC); + } + + my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL); + if (!my_cq) { + ehca_err(device, "Out of memory for ehca_cq struct device=%p", + device); + atomic_dec(&shca->num_cqs); + return ERR_PTR(-ENOMEM); + } + + memset(¶m, 0, sizeof(struct ehca_alloc_cq_parms)); + + spin_lock_init(&my_cq->spinlock); + spin_lock_init(&my_cq->cb_lock); + spin_lock_init(&my_cq->task_lock); + atomic_set(&my_cq->nr_events, 0); + init_waitqueue_head(&my_cq->wait_completion); + + cq = &my_cq->ib_cq; + + adapter_handle = shca->ipz_hca_handle; + param.eq_handle = shca->eq.ipz_eq_handle; + + idr_preload(GFP_KERNEL); + write_lock_irqsave(&ehca_cq_idr_lock, flags); + my_cq->token = idr_alloc(&ehca_cq_idr, my_cq, 0, 0x2000000, GFP_NOWAIT); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); + idr_preload_end(); + + if (my_cq->token < 0) { + cq = ERR_PTR(-ENOMEM); + ehca_err(device, "Can't allocate new idr entry. device=%p", + device); + goto create_cq_exit1; + } + + /* + * CQs maximum depth is 4GB-64, but we need additional 20 as buffer + * for receiving errors CQEs. + */ + param.nr_cqe = cqe + additional_cqe; + h_ret = hipz_h_alloc_resource_cq(adapter_handle, my_cq, ¶m); + + if (h_ret != H_SUCCESS) { + ehca_err(device, "hipz_h_alloc_resource_cq() failed " + "h_ret=%lli device=%p", h_ret, device); + cq = ERR_PTR(ehca2ib_return_code(h_ret)); + goto create_cq_exit2; + } + + ipz_rc = ipz_queue_ctor(NULL, &my_cq->ipz_queue, param.act_pages, + EHCA_PAGESIZE, sizeof(struct ehca_cqe), 0, 0); + if (!ipz_rc) { + ehca_err(device, "ipz_queue_ctor() failed ipz_rc=%i device=%p", + ipz_rc, device); + cq = ERR_PTR(-EINVAL); + goto create_cq_exit3; + } + + for (counter = 0; counter < param.act_pages; counter++) { + vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); + if (!vpage) { + ehca_err(device, "ipz_qpageit_get_inc() " + "returns NULL device=%p", device); + cq = ERR_PTR(-EAGAIN); + goto create_cq_exit4; + } + rpage = __pa(vpage); + + h_ret = hipz_h_register_rpage_cq(adapter_handle, + my_cq->ipz_cq_handle, + &my_cq->pf, + 0, + 0, + rpage, + 1, + my_cq->galpas. + kernel); + + if (h_ret < H_SUCCESS) { + ehca_err(device, "hipz_h_register_rpage_cq() failed " + "ehca_cq=%p cq_num=%x h_ret=%lli counter=%i " + "act_pages=%i", my_cq, my_cq->cq_number, + h_ret, counter, param.act_pages); + cq = ERR_PTR(-EINVAL); + goto create_cq_exit4; + } + + if (counter == (param.act_pages - 1)) { + vpage = ipz_qpageit_get_inc(&my_cq->ipz_queue); + if ((h_ret != H_SUCCESS) || vpage) { + ehca_err(device, "Registration of pages not " + "complete ehca_cq=%p cq_num=%x " + "h_ret=%lli", my_cq, my_cq->cq_number, + h_ret); + cq = ERR_PTR(-EAGAIN); + goto create_cq_exit4; + } + } else { + if (h_ret != H_PAGE_REGISTERED) { + ehca_err(device, "Registration of page failed " + "ehca_cq=%p cq_num=%x h_ret=%lli " + "counter=%i act_pages=%i", + my_cq, my_cq->cq_number, + h_ret, counter, param.act_pages); + cq = ERR_PTR(-ENOMEM); + goto create_cq_exit4; + } + } + } + + ipz_qeit_reset(&my_cq->ipz_queue); + + gal = my_cq->galpas.kernel; + cqx_fec = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_fec)); + ehca_dbg(device, "ehca_cq=%p cq_num=%x CQX_FEC=%llx", + my_cq, my_cq->cq_number, cqx_fec); + + my_cq->ib_cq.cqe = my_cq->nr_of_entries = + param.act_nr_of_entries - additional_cqe; + my_cq->cq_number = (my_cq->ipz_cq_handle.handle) & 0xffff; + + for (i = 0; i < QP_HASHTAB_LEN; i++) + INIT_HLIST_HEAD(&my_cq->qp_hashtab[i]); + + INIT_LIST_HEAD(&my_cq->sqp_err_list); + INIT_LIST_HEAD(&my_cq->rqp_err_list); + + if (context) { + struct ipz_queue *ipz_queue = &my_cq->ipz_queue; + struct ehca_create_cq_resp resp; + memset(&resp, 0, sizeof(resp)); + resp.cq_number = my_cq->cq_number; + resp.token = my_cq->token; + resp.ipz_queue.qe_size = ipz_queue->qe_size; + resp.ipz_queue.act_nr_of_sg = ipz_queue->act_nr_of_sg; + resp.ipz_queue.queue_length = ipz_queue->queue_length; + resp.ipz_queue.pagesize = ipz_queue->pagesize; + resp.ipz_queue.toggle_state = ipz_queue->toggle_state; + resp.fw_handle_ofs = (u32) + (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1)); + if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { + ehca_err(device, "Copy to udata failed."); + cq = ERR_PTR(-EFAULT); + goto create_cq_exit4; + } + } + + return cq; + +create_cq_exit4: + ipz_queue_dtor(NULL, &my_cq->ipz_queue); + +create_cq_exit3: + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); + if (h_ret != H_SUCCESS) + ehca_err(device, "hipz_h_destroy_cq() failed ehca_cq=%p " + "cq_num=%x h_ret=%lli", my_cq, my_cq->cq_number, h_ret); + +create_cq_exit2: + write_lock_irqsave(&ehca_cq_idr_lock, flags); + idr_remove(&ehca_cq_idr, my_cq->token); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); + +create_cq_exit1: + kmem_cache_free(cq_cache, my_cq); + + atomic_dec(&shca->num_cqs); + return cq; +} + +int ehca_destroy_cq(struct ib_cq *cq) +{ + u64 h_ret; + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + int cq_num = my_cq->cq_number; + struct ib_device *device = cq->device; + struct ehca_shca *shca = container_of(device, struct ehca_shca, + ib_device); + struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; + unsigned long flags; + + if (cq->uobject) { + if (my_cq->mm_count_galpa || my_cq->mm_count_queue) { + ehca_err(device, "Resources still referenced in " + "user space cq_num=%x", my_cq->cq_number); + return -EINVAL; + } + } + + /* + * remove the CQ from the idr first to make sure + * no more interrupt tasklets will touch this CQ + */ + write_lock_irqsave(&ehca_cq_idr_lock, flags); + idr_remove(&ehca_cq_idr, my_cq->token); + write_unlock_irqrestore(&ehca_cq_idr_lock, flags); + + /* now wait until all pending events have completed */ + wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events)); + + /* nobody's using our CQ any longer -- we can destroy it */ + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0); + if (h_ret == H_R_STATE) { + /* cq in err: read err data and destroy it forcibly */ + ehca_dbg(device, "ehca_cq=%p cq_num=%x resource=%llx in err " + "state. Try to delete it forcibly.", + my_cq, cq_num, my_cq->ipz_cq_handle.handle); + ehca_error_data(shca, my_cq, my_cq->ipz_cq_handle.handle); + h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 1); + if (h_ret == H_SUCCESS) + ehca_dbg(device, "cq_num=%x deleted successfully.", + cq_num); + } + if (h_ret != H_SUCCESS) { + ehca_err(device, "hipz_h_destroy_cq() failed h_ret=%lli " + "ehca_cq=%p cq_num=%x", h_ret, my_cq, cq_num); + return ehca2ib_return_code(h_ret); + } + ipz_queue_dtor(NULL, &my_cq->ipz_queue); + kmem_cache_free(cq_cache, my_cq); + + atomic_dec(&shca->num_cqs); + return 0; +} + +int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) +{ + /* TODO: proper resize needs to be done */ + ehca_err(cq->device, "not implemented yet"); + + return -EFAULT; +} + +int ehca_init_cq_cache(void) +{ + cq_cache = kmem_cache_create("ehca_cache_cq", + sizeof(struct ehca_cq), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!cq_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_cq_cache(void) +{ + if (cq_cache) + kmem_cache_destroy(cq_cache); +} diff --git a/drivers/staging/rdma/ehca/ehca_eq.c b/drivers/staging/rdma/ehca/ehca_eq.c new file mode 100644 index 000000000000..90da6747d395 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_eq.c @@ -0,0 +1,189 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Event queue handling + * + * Authors: Waleri Fomin + * Khadija Souissi + * Reinhard Ernst + * Heiko J Schick + * Hoang-Nam Nguyen + * + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "ehca_iverbs.h" +#include "ehca_qes.h" +#include "hcp_if.h" +#include "ipz_pt_fn.h" + +int ehca_create_eq(struct ehca_shca *shca, + struct ehca_eq *eq, + const enum ehca_eq_type type, const u32 length) +{ + int ret; + u64 h_ret; + u32 nr_pages; + u32 i; + void *vpage; + struct ib_device *ib_dev = &shca->ib_device; + + spin_lock_init(&eq->spinlock); + spin_lock_init(&eq->irq_spinlock); + eq->is_initialized = 0; + + if (type != EHCA_EQ && type != EHCA_NEQ) { + ehca_err(ib_dev, "Invalid EQ type %x. eq=%p", type, eq); + return -EINVAL; + } + if (!length) { + ehca_err(ib_dev, "EQ length must not be zero. eq=%p", eq); + return -EINVAL; + } + + h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle, + &eq->pf, + type, + length, + &eq->ipz_eq_handle, + &eq->length, + &nr_pages, &eq->ist); + + if (h_ret != H_SUCCESS) { + ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq); + return -EINVAL; + } + + ret = ipz_queue_ctor(NULL, &eq->ipz_queue, nr_pages, + EHCA_PAGESIZE, sizeof(struct ehca_eqe), 0, 0); + if (!ret) { + ehca_err(ib_dev, "Can't allocate EQ pages eq=%p", eq); + goto create_eq_exit1; + } + + for (i = 0; i < nr_pages; i++) { + u64 rpage; + + vpage = ipz_qpageit_get_inc(&eq->ipz_queue); + if (!vpage) + goto create_eq_exit2; + + rpage = __pa(vpage); + h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, + eq->ipz_eq_handle, + &eq->pf, + 0, 0, rpage, 1); + + if (i == (nr_pages - 1)) { + /* last page */ + vpage = ipz_qpageit_get_inc(&eq->ipz_queue); + if (h_ret != H_SUCCESS || vpage) + goto create_eq_exit2; + } else { + if (h_ret != H_PAGE_REGISTERED) + goto create_eq_exit2; + } + } + + ipz_qeit_reset(&eq->ipz_queue); + + /* register interrupt handlers and initialize work queues */ + if (type == EHCA_EQ) { + tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); + + ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq, + 0, "ehca_eq", + (void *)shca); + if (ret < 0) + ehca_err(ib_dev, "Can't map interrupt handler."); + } else if (type == EHCA_NEQ) { + tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); + + ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq, + 0, "ehca_neq", + (void *)shca); + if (ret < 0) + ehca_err(ib_dev, "Can't map interrupt handler."); + } + + eq->is_initialized = 1; + + return 0; + +create_eq_exit2: + ipz_queue_dtor(NULL, &eq->ipz_queue); + +create_eq_exit1: + hipz_h_destroy_eq(shca->ipz_hca_handle, eq); + + return -EINVAL; +} + +void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq) +{ + unsigned long flags; + void *eqe; + + spin_lock_irqsave(&eq->spinlock, flags); + eqe = ipz_eqit_eq_get_inc_valid(&eq->ipz_queue); + spin_unlock_irqrestore(&eq->spinlock, flags); + + return eqe; +} + +int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq) +{ + unsigned long flags; + u64 h_ret; + + ibmebus_free_irq(eq->ist, (void *)shca); + + spin_lock_irqsave(&shca_list_lock, flags); + eq->is_initialized = 0; + spin_unlock_irqrestore(&shca_list_lock, flags); + + tasklet_kill(&eq->interrupt_task); + + h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq); + + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't free EQ resources."); + return -EINVAL; + } + ipz_queue_dtor(NULL, &eq->ipz_queue); + + return 0; +} diff --git a/drivers/staging/rdma/ehca/ehca_hca.c b/drivers/staging/rdma/ehca/ehca_hca.c new file mode 100644 index 000000000000..e8b1bb65797a --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_hca.c @@ -0,0 +1,414 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HCA query functions + * + * Authors: Heiko J Schick + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_tools.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +static unsigned int limit_uint(unsigned int value) +{ + return min_t(unsigned int, value, INT_MAX); +} + +int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) +{ + int i, ret = 0; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, + ib_device); + struct hipz_query_hca *rblock; + + static const u32 cap_mapping[] = { + IB_DEVICE_RESIZE_MAX_WR, HCA_CAP_WQE_RESIZE, + IB_DEVICE_BAD_PKEY_CNTR, HCA_CAP_BAD_P_KEY_CTR, + IB_DEVICE_BAD_QKEY_CNTR, HCA_CAP_Q_KEY_VIOL_CTR, + IB_DEVICE_RAW_MULTI, HCA_CAP_RAW_PACKET_MCAST, + IB_DEVICE_AUTO_PATH_MIG, HCA_CAP_AUTO_PATH_MIG, + IB_DEVICE_CHANGE_PHY_PORT, HCA_CAP_SQD_RTS_PORT_CHANGE, + IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK, + IB_DEVICE_CURR_QP_STATE_MOD, HCA_CAP_CUR_QP_STATE_MOD, + IB_DEVICE_SHUTDOWN_PORT, HCA_CAP_SHUTDOWN_PORT, + IB_DEVICE_INIT_TYPE, HCA_CAP_INIT_TYPE, + IB_DEVICE_PORT_ACTIVE_EVENT, HCA_CAP_PORT_ACTIVE_EVENT, + }; + + if (uhw->inlen || uhw->outlen) + return -EINVAL; + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query device properties"); + ret = -EINVAL; + goto query_device1; + } + + memset(props, 0, sizeof(struct ib_device_attr)); + props->page_size_cap = shca->hca_cap_mr_pgsize; + props->fw_ver = rblock->hw_ver; + props->max_mr_size = rblock->max_mr_size; + props->vendor_id = rblock->vendor_id >> 8; + props->vendor_part_id = rblock->vendor_part_id >> 16; + props->hw_ver = rblock->hw_ver; + props->max_qp = limit_uint(rblock->max_qp); + props->max_qp_wr = limit_uint(rblock->max_wqes_wq); + props->max_sge = limit_uint(rblock->max_sge); + props->max_sge_rd = limit_uint(rblock->max_sge_rd); + props->max_cq = limit_uint(rblock->max_cq); + props->max_cqe = limit_uint(rblock->max_cqe); + props->max_mr = limit_uint(rblock->max_mr); + props->max_mw = limit_uint(rblock->max_mw); + props->max_pd = limit_uint(rblock->max_pd); + props->max_ah = limit_uint(rblock->max_ah); + props->max_ee = limit_uint(rblock->max_rd_ee_context); + props->max_rdd = limit_uint(rblock->max_rd_domain); + props->max_fmr = limit_uint(rblock->max_mr); + props->max_qp_rd_atom = limit_uint(rblock->max_rr_qp); + props->max_ee_rd_atom = limit_uint(rblock->max_rr_ee_context); + props->max_res_rd_atom = limit_uint(rblock->max_rr_hca); + props->max_qp_init_rd_atom = limit_uint(rblock->max_act_wqs_qp); + props->max_ee_init_rd_atom = limit_uint(rblock->max_act_wqs_ee_context); + + if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { + props->max_srq = limit_uint(props->max_qp); + props->max_srq_wr = limit_uint(props->max_qp_wr); + props->max_srq_sge = 3; + } + + props->max_pkeys = 16; + /* Some FW versions say 0 here; insert sensible value in that case */ + props->local_ca_ack_delay = rblock->local_ca_ack_delay ? + min_t(u8, rblock->local_ca_ack_delay, 255) : 12; + props->max_raw_ipv6_qp = limit_uint(rblock->max_raw_ipv6_qp); + props->max_raw_ethy_qp = limit_uint(rblock->max_raw_ethy_qp); + props->max_mcast_grp = limit_uint(rblock->max_mcast_grp); + props->max_mcast_qp_attach = limit_uint(rblock->max_mcast_qp_attach); + props->max_total_mcast_qp_attach + = limit_uint(rblock->max_total_mcast_qp_attach); + + /* translate device capabilities */ + props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID | + IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ; + for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2) + if (rblock->hca_cap_indicators & cap_mapping[i + 1]) + props->device_cap_flags |= cap_mapping[i]; + +query_device1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + +static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu) +{ + switch (fw_mtu) { + case 0x1: + return IB_MTU_256; + case 0x2: + return IB_MTU_512; + case 0x3: + return IB_MTU_1024; + case 0x4: + return IB_MTU_2048; + case 0x5: + return IB_MTU_4096; + default: + ehca_err(&shca->ib_device, "Unknown MTU size: %x.", + fw_mtu); + return 0; + } +} + +static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap) +{ + switch (vl_cap) { + case 0x1: + return 1; + case 0x2: + return 2; + case 0x3: + return 4; + case 0x4: + return 8; + case 0x5: + return 15; + default: + ehca_err(&shca->ib_device, "invalid Vl Capability: %x.", + vl_cap); + return 0; + } +} + +int ehca_query_port(struct ib_device *ibdev, + u8 port, struct ib_port_attr *props) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, + ib_device); + struct hipz_query_port *rblock; + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_port1; + } + + memset(props, 0, sizeof(struct ib_port_attr)); + + props->active_mtu = props->max_mtu = map_mtu(shca, rblock->max_mtu); + props->port_cap_flags = rblock->capability_mask; + props->gid_tbl_len = rblock->gid_tbl_len; + if (rblock->max_msg_sz) + props->max_msg_sz = rblock->max_msg_sz; + else + props->max_msg_sz = 0x1 << 31; + props->bad_pkey_cntr = rblock->bad_pkey_cntr; + props->qkey_viol_cntr = rblock->qkey_viol_cntr; + props->pkey_tbl_len = rblock->pkey_tbl_len; + props->lid = rblock->lid; + props->sm_lid = rblock->sm_lid; + props->lmc = rblock->lmc; + props->sm_sl = rblock->sm_sl; + props->subnet_timeout = rblock->subnet_timeout; + props->init_type_reply = rblock->init_type_reply; + props->max_vl_num = map_number_of_vls(shca, rblock->vl_cap); + + if (rblock->state && rblock->phys_width) { + props->phys_state = rblock->phys_pstate; + props->state = rblock->phys_state; + props->active_width = rblock->phys_width; + props->active_speed = rblock->phys_speed; + } else { + /* old firmware releases don't report physical + * port info, so use default values + */ + props->phys_state = 5; + props->state = rblock->state; + props->active_width = IB_WIDTH_12X; + props->active_speed = IB_SPEED_SDR; + } + +query_port1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + +int ehca_query_sma_attr(struct ehca_shca *shca, + u8 port, struct ehca_sma_attr *attr) +{ + int ret = 0; + u64 h_ret; + struct hipz_query_port *rblock; + + rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_sma_attr1; + } + + memset(attr, 0, sizeof(struct ehca_sma_attr)); + + attr->lid = rblock->lid; + attr->lmc = rblock->lmc; + attr->sm_sl = rblock->sm_sl; + attr->sm_lid = rblock->sm_lid; + + attr->pkey_tbl_len = rblock->pkey_tbl_len; + memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys)); + +query_sma_attr1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + +int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca; + struct hipz_query_port *rblock; + + shca = container_of(ibdev, struct ehca_shca, ib_device); + if (index > 16) { + ehca_err(&shca->ib_device, "Invalid index: %x.", index); + return -EINVAL; + } + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_pkey1; + } + + memcpy(pkey, &rblock->pkey_entries + index, sizeof(u16)); + +query_pkey1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + +int ehca_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca = container_of(ibdev, struct ehca_shca, + ib_device); + struct hipz_query_port *rblock; + + if (index < 0 || index > 255) { + ehca_err(&shca->ib_device, "Invalid index: %x.", index); + return -EINVAL; + } + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto query_gid1; + } + + memcpy(&gid->raw[0], &rblock->gid_prefix, sizeof(u64)); + memcpy(&gid->raw[8], &rblock->guid_entries[index], sizeof(u64)); + +query_gid1: + ehca_free_fw_ctrlblock(rblock); + + return ret; +} + +static const u32 allowed_port_caps = ( + IB_PORT_SM | IB_PORT_LED_INFO_SUP | IB_PORT_CM_SUP | + IB_PORT_SNMP_TUNNEL_SUP | IB_PORT_DEVICE_MGMT_SUP | + IB_PORT_VENDOR_CLASS_SUP); + +int ehca_modify_port(struct ib_device *ibdev, + u8 port, int port_modify_mask, + struct ib_port_modify *props) +{ + int ret = 0; + struct ehca_shca *shca; + struct hipz_query_port *rblock; + u32 cap; + u64 hret; + + shca = container_of(ibdev, struct ehca_shca, ib_device); + if ((props->set_port_cap_mask | props->clr_port_cap_mask) + & ~allowed_port_caps) { + ehca_err(&shca->ib_device, "Non-changeable bits set in masks " + "set=%x clr=%x allowed=%x", props->set_port_cap_mask, + props->clr_port_cap_mask, allowed_port_caps); + return -EINVAL; + } + + if (mutex_lock_interruptible(&shca->modify_mutex)) + return -ERESTARTSYS; + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + ret = -ENOMEM; + goto modify_port1; + } + + hret = hipz_h_query_port(shca->ipz_hca_handle, port, rblock); + if (hret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query port properties"); + ret = -EINVAL; + goto modify_port2; + } + + cap = (rblock->capability_mask | props->set_port_cap_mask) + & ~props->clr_port_cap_mask; + + hret = hipz_h_modify_port(shca->ipz_hca_handle, port, + cap, props->init_type, port_modify_mask); + if (hret != H_SUCCESS) { + ehca_err(&shca->ib_device, "Modify port failed h_ret=%lli", + hret); + ret = -EINVAL; + } + +modify_port2: + ehca_free_fw_ctrlblock(rblock); + +modify_port1: + mutex_unlock(&shca->modify_mutex); + + return ret; +} diff --git a/drivers/staging/rdma/ehca/ehca_irq.c b/drivers/staging/rdma/ehca/ehca_irq.c new file mode 100644 index 000000000000..8615d7cf7e01 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_irq.c @@ -0,0 +1,870 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Functions for EQs, NEQs and interrupts + * + * Authors: Heiko J Schick + * Khadija Souissi + * Hoang-Nam Nguyen + * Joachim Fenkes + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "ehca_classes.h" +#include "ehca_irq.h" +#include "ehca_iverbs.h" +#include "ehca_tools.h" +#include "hcp_if.h" +#include "hipz_fns.h" +#include "ipz_pt_fn.h" + +#define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7) +#define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31) +#define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63) +#define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63) + +#define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1) +#define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7) +#define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15) +#define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16) +#define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16) +#define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23) + +#define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63) +#define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7) + +static void queue_comp_task(struct ehca_cq *__cq); + +static struct ehca_comp_pool *pool; + +static inline void comp_event_callback(struct ehca_cq *cq) +{ + if (!cq->ib_cq.comp_handler) + return; + + spin_lock(&cq->cb_lock); + cq->ib_cq.comp_handler(&cq->ib_cq, cq->ib_cq.cq_context); + spin_unlock(&cq->cb_lock); + + return; +} + +static void print_error_data(struct ehca_shca *shca, void *data, + u64 *rblock, int length) +{ + u64 type = EHCA_BMASK_GET(ERROR_DATA_TYPE, rblock[2]); + u64 resource = rblock[1]; + + switch (type) { + case 0x1: /* Queue Pair */ + { + struct ehca_qp *qp = (struct ehca_qp *)data; + + /* only print error data if AER is set */ + if (rblock[6] == 0) + return; + + ehca_err(&shca->ib_device, + "QP 0x%x (resource=%llx) has errors.", + qp->ib_qp.qp_num, resource); + break; + } + case 0x4: /* Completion Queue */ + { + struct ehca_cq *cq = (struct ehca_cq *)data; + + ehca_err(&shca->ib_device, + "CQ 0x%x (resource=%llx) has errors.", + cq->cq_number, resource); + break; + } + default: + ehca_err(&shca->ib_device, + "Unknown error type: %llx on %s.", + type, shca->ib_device.name); + break; + } + + ehca_err(&shca->ib_device, "Error data is available: %llx.", resource); + ehca_err(&shca->ib_device, "EHCA ----- error data begin " + "---------------------------------------------------"); + ehca_dmp(rblock, length, "resource=%llx", resource); + ehca_err(&shca->ib_device, "EHCA ----- error data end " + "----------------------------------------------------"); + + return; +} + +int ehca_error_data(struct ehca_shca *shca, void *data, + u64 resource) +{ + + unsigned long ret; + u64 *rblock; + unsigned long block_count; + + rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); + if (!rblock) { + ehca_err(&shca->ib_device, "Cannot allocate rblock memory."); + ret = -ENOMEM; + goto error_data1; + } + + /* rblock must be 4K aligned and should be 4K large */ + ret = hipz_h_error_data(shca->ipz_hca_handle, + resource, + rblock, + &block_count); + + if (ret == H_R_STATE) + ehca_err(&shca->ib_device, + "No error data is available: %llx.", resource); + else if (ret == H_SUCCESS) { + int length; + + length = EHCA_BMASK_GET(ERROR_DATA_LENGTH, rblock[0]); + + if (length > EHCA_PAGESIZE) + length = EHCA_PAGESIZE; + + print_error_data(shca, data, rblock, length); + } else + ehca_err(&shca->ib_device, + "Error data could not be fetched: %llx", resource); + + ehca_free_fw_ctrlblock(rblock); + +error_data1: + return ret; + +} + +static void dispatch_qp_event(struct ehca_shca *shca, struct ehca_qp *qp, + enum ib_event_type event_type) +{ + struct ib_event event; + + /* PATH_MIG without the QP ever having been armed is false alarm */ + if (event_type == IB_EVENT_PATH_MIG && !qp->mig_armed) + return; + + event.device = &shca->ib_device; + event.event = event_type; + + if (qp->ext_type == EQPT_SRQ) { + if (!qp->ib_srq.event_handler) + return; + + event.element.srq = &qp->ib_srq; + qp->ib_srq.event_handler(&event, qp->ib_srq.srq_context); + } else { + if (!qp->ib_qp.event_handler) + return; + + event.element.qp = &qp->ib_qp; + qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context); + } +} + +static void qp_event_callback(struct ehca_shca *shca, u64 eqe, + enum ib_event_type event_type, int fatal) +{ + struct ehca_qp *qp; + u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe); + + read_lock(&ehca_qp_idr_lock); + qp = idr_find(&ehca_qp_idr, token); + if (qp) + atomic_inc(&qp->nr_events); + read_unlock(&ehca_qp_idr_lock); + + if (!qp) + return; + + if (fatal) + ehca_error_data(shca, qp, qp->ipz_qp_handle.handle); + + dispatch_qp_event(shca, qp, fatal && qp->ext_type == EQPT_SRQ ? + IB_EVENT_SRQ_ERR : event_type); + + /* + * eHCA only processes one WQE at a time for SRQ base QPs, + * so the last WQE has been processed as soon as the QP enters + * error state. + */ + if (fatal && qp->ext_type == EQPT_SRQBASE) + dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED); + + if (atomic_dec_and_test(&qp->nr_events)) + wake_up(&qp->wait_completion); + return; +} + +static void cq_event_callback(struct ehca_shca *shca, + u64 eqe) +{ + struct ehca_cq *cq; + u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe); + + read_lock(&ehca_cq_idr_lock); + cq = idr_find(&ehca_cq_idr, token); + if (cq) + atomic_inc(&cq->nr_events); + read_unlock(&ehca_cq_idr_lock); + + if (!cq) + return; + + ehca_error_data(shca, cq, cq->ipz_cq_handle.handle); + + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + + return; +} + +static void parse_identifier(struct ehca_shca *shca, u64 eqe) +{ + u8 identifier = EHCA_BMASK_GET(EQE_EE_IDENTIFIER, eqe); + + switch (identifier) { + case 0x02: /* path migrated */ + qp_event_callback(shca, eqe, IB_EVENT_PATH_MIG, 0); + break; + case 0x03: /* communication established */ + qp_event_callback(shca, eqe, IB_EVENT_COMM_EST, 0); + break; + case 0x04: /* send queue drained */ + qp_event_callback(shca, eqe, IB_EVENT_SQ_DRAINED, 0); + break; + case 0x05: /* QP error */ + case 0x06: /* QP error */ + qp_event_callback(shca, eqe, IB_EVENT_QP_FATAL, 1); + break; + case 0x07: /* CQ error */ + case 0x08: /* CQ error */ + cq_event_callback(shca, eqe); + break; + case 0x09: /* MRMWPTE error */ + ehca_err(&shca->ib_device, "MRMWPTE error."); + break; + case 0x0A: /* port event */ + ehca_err(&shca->ib_device, "Port event."); + break; + case 0x0B: /* MR access error */ + ehca_err(&shca->ib_device, "MR access error."); + break; + case 0x0C: /* EQ error */ + ehca_err(&shca->ib_device, "EQ error."); + break; + case 0x0D: /* P/Q_Key mismatch */ + ehca_err(&shca->ib_device, "P/Q_Key mismatch."); + break; + case 0x10: /* sampling complete */ + ehca_err(&shca->ib_device, "Sampling complete."); + break; + case 0x11: /* unaffiliated access error */ + ehca_err(&shca->ib_device, "Unaffiliated access error."); + break; + case 0x12: /* path migrating */ + ehca_err(&shca->ib_device, "Path migrating."); + break; + case 0x13: /* interface trace stopped */ + ehca_err(&shca->ib_device, "Interface trace stopped."); + break; + case 0x14: /* first error capture info available */ + ehca_info(&shca->ib_device, "First error capture available"); + break; + case 0x15: /* SRQ limit reached */ + qp_event_callback(shca, eqe, IB_EVENT_SRQ_LIMIT_REACHED, 0); + break; + default: + ehca_err(&shca->ib_device, "Unknown identifier: %x on %s.", + identifier, shca->ib_device.name); + break; + } + + return; +} + +static void dispatch_port_event(struct ehca_shca *shca, int port_num, + enum ib_event_type type, const char *msg) +{ + struct ib_event event; + + ehca_info(&shca->ib_device, "port %d %s.", port_num, msg); + event.device = &shca->ib_device; + event.event = type; + event.element.port_num = port_num; + ib_dispatch_event(&event); +} + +static void notify_port_conf_change(struct ehca_shca *shca, int port_num) +{ + struct ehca_sma_attr new_attr; + struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr; + + ehca_query_sma_attr(shca, port_num, &new_attr); + + if (new_attr.sm_sl != old_attr->sm_sl || + new_attr.sm_lid != old_attr->sm_lid) + dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE, + "SM changed"); + + if (new_attr.lid != old_attr->lid || + new_attr.lmc != old_attr->lmc) + dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE, + "LID changed"); + + if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len || + memcmp(new_attr.pkeys, old_attr->pkeys, + sizeof(u16) * new_attr.pkey_tbl_len)) + dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE, + "P_Key changed"); + + *old_attr = new_attr; +} + +/* replay modify_qp for sqps -- return 0 if all is well, 1 if AQP1 destroyed */ +static int replay_modify_qp(struct ehca_sport *sport) +{ + int aqp1_destroyed; + unsigned long flags; + + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + + aqp1_destroyed = !sport->ibqp_sqp[IB_QPT_GSI]; + + if (sport->ibqp_sqp[IB_QPT_SMI]) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]); + if (!aqp1_destroyed) + ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]); + + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + + return aqp1_destroyed; +} + +static void parse_ec(struct ehca_shca *shca, u64 eqe) +{ + u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe); + u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe); + u8 spec_event; + struct ehca_sport *sport = &shca->sport[port - 1]; + + switch (ec) { + case 0x30: /* port availability change */ + if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) { + /* only replay modify_qp calls in autodetect mode; + * if AQP1 was destroyed, the port is already down + * again and we can drop the event. + */ + if (ehca_nr_ports < 0) + if (replay_modify_qp(sport)) + break; + + sport->port_state = IB_PORT_ACTIVE; + dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, + "is active"); + ehca_query_sma_attr(shca, port, &sport->saved_attr); + } else { + sport->port_state = IB_PORT_DOWN; + dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, + "is inactive"); + } + break; + case 0x31: + /* port configuration change + * disruptive change is caused by + * LID, PKEY or SM change + */ + if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) { + ehca_warn(&shca->ib_device, "disruptive port " + "%d configuration change", port); + + sport->port_state = IB_PORT_DOWN; + dispatch_port_event(shca, port, IB_EVENT_PORT_ERR, + "is inactive"); + + sport->port_state = IB_PORT_ACTIVE; + dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE, + "is active"); + ehca_query_sma_attr(shca, port, + &sport->saved_attr); + } else + notify_port_conf_change(shca, port); + break; + case 0x32: /* adapter malfunction */ + ehca_err(&shca->ib_device, "Adapter malfunction."); + break; + case 0x33: /* trace stopped */ + ehca_err(&shca->ib_device, "Traced stopped."); + break; + case 0x34: /* util async event */ + spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe); + if (spec_event == 0x80) /* client reregister required */ + dispatch_port_event(shca, port, + IB_EVENT_CLIENT_REREGISTER, + "client reregister req."); + else + ehca_warn(&shca->ib_device, "Unknown util async " + "event %x on port %x", spec_event, port); + break; + default: + ehca_err(&shca->ib_device, "Unknown event code: %x on %s.", + ec, shca->ib_device.name); + break; + } + + return; +} + +static inline void reset_eq_pending(struct ehca_cq *cq) +{ + u64 CQx_EP; + struct h_galpa gal = cq->galpas.kernel; + + hipz_galpa_store_cq(gal, cqx_ep, 0x0); + CQx_EP = hipz_galpa_load(gal, CQTEMM_OFFSET(cqx_ep)); + + return; +} + +irqreturn_t ehca_interrupt_neq(int irq, void *dev_id) +{ + struct ehca_shca *shca = (struct ehca_shca*)dev_id; + + tasklet_hi_schedule(&shca->neq.interrupt_task); + + return IRQ_HANDLED; +} + +void ehca_tasklet_neq(unsigned long data) +{ + struct ehca_shca *shca = (struct ehca_shca*)data; + struct ehca_eqe *eqe; + u64 ret; + + eqe = ehca_poll_eq(shca, &shca->neq); + + while (eqe) { + if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT, eqe->entry)) + parse_ec(shca, eqe->entry); + + eqe = ehca_poll_eq(shca, &shca->neq); + } + + ret = hipz_h_reset_event(shca->ipz_hca_handle, + shca->neq.ipz_eq_handle, 0xFFFFFFFFFFFFFFFFL); + + if (ret != H_SUCCESS) + ehca_err(&shca->ib_device, "Can't clear notification events."); + + return; +} + +irqreturn_t ehca_interrupt_eq(int irq, void *dev_id) +{ + struct ehca_shca *shca = (struct ehca_shca*)dev_id; + + tasklet_hi_schedule(&shca->eq.interrupt_task); + + return IRQ_HANDLED; +} + + +static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe) +{ + u64 eqe_value; + u32 token; + struct ehca_cq *cq; + + eqe_value = eqe->entry; + ehca_dbg(&shca->ib_device, "eqe_value=%llx", eqe_value); + if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { + ehca_dbg(&shca->ib_device, "Got completion event"); + token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); + read_lock(&ehca_cq_idr_lock); + cq = idr_find(&ehca_cq_idr, token); + if (cq) + atomic_inc(&cq->nr_events); + read_unlock(&ehca_cq_idr_lock); + if (cq == NULL) { + ehca_err(&shca->ib_device, + "Invalid eqe for non-existing cq token=%x", + token); + return; + } + reset_eq_pending(cq); + if (ehca_scaling_code) + queue_comp_task(cq); + else { + comp_event_callback(cq); + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + } + } else { + ehca_dbg(&shca->ib_device, "Got non completion event"); + parse_identifier(shca, eqe_value); + } +} + +void ehca_process_eq(struct ehca_shca *shca, int is_irq) +{ + struct ehca_eq *eq = &shca->eq; + struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache; + u64 eqe_value, ret; + int eqe_cnt, i; + int eq_empty = 0; + + spin_lock(&eq->irq_spinlock); + if (is_irq) { + const int max_query_cnt = 100; + int query_cnt = 0; + int int_state = 1; + do { + int_state = hipz_h_query_int_state( + shca->ipz_hca_handle, eq->ist); + query_cnt++; + iosync(); + } while (int_state && query_cnt < max_query_cnt); + if (unlikely((query_cnt == max_query_cnt))) + ehca_dbg(&shca->ib_device, "int_state=%x query_cnt=%x", + int_state, query_cnt); + } + + /* read out all eqes */ + eqe_cnt = 0; + do { + u32 token; + eqe_cache[eqe_cnt].eqe = ehca_poll_eq(shca, eq); + if (!eqe_cache[eqe_cnt].eqe) + break; + eqe_value = eqe_cache[eqe_cnt].eqe->entry; + if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) { + token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value); + read_lock(&ehca_cq_idr_lock); + eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token); + if (eqe_cache[eqe_cnt].cq) + atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events); + read_unlock(&ehca_cq_idr_lock); + if (!eqe_cache[eqe_cnt].cq) { + ehca_err(&shca->ib_device, + "Invalid eqe for non-existing cq " + "token=%x", token); + continue; + } + } else + eqe_cache[eqe_cnt].cq = NULL; + eqe_cnt++; + } while (eqe_cnt < EHCA_EQE_CACHE_SIZE); + if (!eqe_cnt) { + if (is_irq) + ehca_dbg(&shca->ib_device, + "No eqe found for irq event"); + goto unlock_irq_spinlock; + } else if (!is_irq) { + ret = hipz_h_eoi(eq->ist); + if (ret != H_SUCCESS) + ehca_err(&shca->ib_device, + "bad return code EOI -rc = %lld\n", ret); + ehca_dbg(&shca->ib_device, "deadman found %x eqe", eqe_cnt); + } + if (unlikely(eqe_cnt == EHCA_EQE_CACHE_SIZE)) + ehca_dbg(&shca->ib_device, "too many eqes for one irq event"); + /* enable irq for new packets */ + for (i = 0; i < eqe_cnt; i++) { + if (eq->eqe_cache[i].cq) + reset_eq_pending(eq->eqe_cache[i].cq); + } + /* check eq */ + spin_lock(&eq->spinlock); + eq_empty = (!ipz_eqit_eq_peek_valid(&shca->eq.ipz_queue)); + spin_unlock(&eq->spinlock); + /* call completion handler for cached eqes */ + for (i = 0; i < eqe_cnt; i++) + if (eq->eqe_cache[i].cq) { + if (ehca_scaling_code) + queue_comp_task(eq->eqe_cache[i].cq); + else { + struct ehca_cq *cq = eq->eqe_cache[i].cq; + comp_event_callback(cq); + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + } + } else { + ehca_dbg(&shca->ib_device, "Got non completion event"); + parse_identifier(shca, eq->eqe_cache[i].eqe->entry); + } + /* poll eq if not empty */ + if (eq_empty) + goto unlock_irq_spinlock; + do { + struct ehca_eqe *eqe; + eqe = ehca_poll_eq(shca, &shca->eq); + if (!eqe) + break; + process_eqe(shca, eqe); + } while (1); + +unlock_irq_spinlock: + spin_unlock(&eq->irq_spinlock); +} + +void ehca_tasklet_eq(unsigned long data) +{ + ehca_process_eq((struct ehca_shca*)data, 1); +} + +static int find_next_online_cpu(struct ehca_comp_pool *pool) +{ + int cpu; + unsigned long flags; + + WARN_ON_ONCE(!in_interrupt()); + if (ehca_debug_level >= 3) + ehca_dmp(cpu_online_mask, cpumask_size(), ""); + + spin_lock_irqsave(&pool->last_cpu_lock, flags); + do { + cpu = cpumask_next(pool->last_cpu, cpu_online_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_first(cpu_online_mask); + pool->last_cpu = cpu; + } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active); + spin_unlock_irqrestore(&pool->last_cpu_lock, flags); + + return cpu; +} + +static void __queue_comp_task(struct ehca_cq *__cq, + struct ehca_cpu_comp_task *cct, + struct task_struct *thread) +{ + unsigned long flags; + + spin_lock_irqsave(&cct->task_lock, flags); + spin_lock(&__cq->task_lock); + + if (__cq->nr_callbacks == 0) { + __cq->nr_callbacks++; + list_add_tail(&__cq->entry, &cct->cq_list); + cct->cq_jobs++; + wake_up_process(thread); + } else + __cq->nr_callbacks++; + + spin_unlock(&__cq->task_lock); + spin_unlock_irqrestore(&cct->task_lock, flags); +} + +static void queue_comp_task(struct ehca_cq *__cq) +{ + int cpu_id; + struct ehca_cpu_comp_task *cct; + struct task_struct *thread; + int cq_jobs; + unsigned long flags; + + cpu_id = find_next_online_cpu(pool); + BUG_ON(!cpu_online(cpu_id)); + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); + + spin_lock_irqsave(&cct->task_lock, flags); + cq_jobs = cct->cq_jobs; + spin_unlock_irqrestore(&cct->task_lock, flags); + if (cq_jobs > 0) { + cpu_id = find_next_online_cpu(pool); + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); + BUG_ON(!cct || !thread); + } + __queue_comp_task(__cq, cct, thread); +} + +static void run_comp_task(struct ehca_cpu_comp_task *cct) +{ + struct ehca_cq *cq; + + while (!list_empty(&cct->cq_list)) { + cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); + spin_unlock_irq(&cct->task_lock); + + comp_event_callback(cq); + if (atomic_dec_and_test(&cq->nr_events)) + wake_up(&cq->wait_completion); + + spin_lock_irq(&cct->task_lock); + spin_lock(&cq->task_lock); + cq->nr_callbacks--; + if (!cq->nr_callbacks) { + list_del_init(cct->cq_list.next); + cct->cq_jobs--; + } + spin_unlock(&cq->task_lock); + } +} + +static void comp_task_park(unsigned int cpu) +{ + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + struct ehca_cpu_comp_task *target; + struct task_struct *thread; + struct ehca_cq *cq, *tmp; + LIST_HEAD(list); + + spin_lock_irq(&cct->task_lock); + cct->cq_jobs = 0; + cct->active = 0; + list_splice_init(&cct->cq_list, &list); + spin_unlock_irq(&cct->task_lock); + + cpu = find_next_online_cpu(pool); + target = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu); + spin_lock_irq(&target->task_lock); + list_for_each_entry_safe(cq, tmp, &list, entry) { + list_del(&cq->entry); + __queue_comp_task(cq, target, thread); + } + spin_unlock_irq(&target->task_lock); +} + +static void comp_task_stop(unsigned int cpu, bool online) +{ + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + + spin_lock_irq(&cct->task_lock); + cct->cq_jobs = 0; + cct->active = 0; + WARN_ON(!list_empty(&cct->cq_list)); + spin_unlock_irq(&cct->task_lock); +} + +static int comp_task_should_run(unsigned int cpu) +{ + struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + + return cct->cq_jobs; +} + +static void comp_task(unsigned int cpu) +{ + struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks); + int cql_empty; + + spin_lock_irq(&cct->task_lock); + cql_empty = list_empty(&cct->cq_list); + if (!cql_empty) { + __set_current_state(TASK_RUNNING); + run_comp_task(cct); + } + spin_unlock_irq(&cct->task_lock); +} + +static struct smp_hotplug_thread comp_pool_threads = { + .thread_should_run = comp_task_should_run, + .thread_fn = comp_task, + .thread_comm = "ehca_comp/%u", + .cleanup = comp_task_stop, + .park = comp_task_park, +}; + +int ehca_create_comp_pool(void) +{ + int cpu, ret = -ENOMEM; + + if (!ehca_scaling_code) + return 0; + + pool = kzalloc(sizeof(struct ehca_comp_pool), GFP_KERNEL); + if (pool == NULL) + return -ENOMEM; + + spin_lock_init(&pool->last_cpu_lock); + pool->last_cpu = cpumask_any(cpu_online_mask); + + pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); + if (!pool->cpu_comp_tasks) + goto out_pool; + + pool->cpu_comp_threads = alloc_percpu(struct task_struct *); + if (!pool->cpu_comp_threads) + goto out_tasks; + + for_each_present_cpu(cpu) { + struct ehca_cpu_comp_task *cct; + + cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); + spin_lock_init(&cct->task_lock); + INIT_LIST_HEAD(&cct->cq_list); + } + + comp_pool_threads.store = pool->cpu_comp_threads; + ret = smpboot_register_percpu_thread(&comp_pool_threads); + if (ret) + goto out_threads; + + pr_info("eHCA scaling code enabled\n"); + return ret; + +out_threads: + free_percpu(pool->cpu_comp_threads); +out_tasks: + free_percpu(pool->cpu_comp_tasks); +out_pool: + kfree(pool); + return ret; +} + +void ehca_destroy_comp_pool(void) +{ + if (!ehca_scaling_code) + return; + + smpboot_unregister_percpu_thread(&comp_pool_threads); + + free_percpu(pool->cpu_comp_threads); + free_percpu(pool->cpu_comp_tasks); + kfree(pool); +} diff --git a/drivers/staging/rdma/ehca/ehca_irq.h b/drivers/staging/rdma/ehca/ehca_irq.h new file mode 100644 index 000000000000..5370199f08c7 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_irq.h @@ -0,0 +1,77 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Function definitions and structs for EQs, NEQs and interrupts + * + * Authors: Heiko J Schick + * Khadija Souissi + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_IRQ_H +#define __EHCA_IRQ_H + + +struct ehca_shca; + +#include +#include + +int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource); + +irqreturn_t ehca_interrupt_neq(int irq, void *dev_id); +void ehca_tasklet_neq(unsigned long data); + +irqreturn_t ehca_interrupt_eq(int irq, void *dev_id); +void ehca_tasklet_eq(unsigned long data); +void ehca_process_eq(struct ehca_shca *shca, int is_irq); + +struct ehca_cpu_comp_task { + struct list_head cq_list; + spinlock_t task_lock; + int cq_jobs; + int active; +}; + +struct ehca_comp_pool { + struct ehca_cpu_comp_task __percpu *cpu_comp_tasks; + struct task_struct * __percpu *cpu_comp_threads; + int last_cpu; + spinlock_t last_cpu_lock; +}; + +int ehca_create_comp_pool(void); +void ehca_destroy_comp_pool(void); + +#endif diff --git a/drivers/staging/rdma/ehca/ehca_iverbs.h b/drivers/staging/rdma/ehca/ehca_iverbs.h new file mode 100644 index 000000000000..80e6a3d5df3e --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_iverbs.h @@ -0,0 +1,218 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Function definitions for internal functions + * + * Authors: Heiko J Schick + * Dietmar Decker + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __EHCA_IVERBS_H__ +#define __EHCA_IVERBS_H__ + +#include "ehca_classes.h" + +int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw); + +int ehca_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props); + +enum rdma_protocol_type +ehca_query_protocol(struct ib_device *device, u8 port_num); + +int ehca_query_sma_attr(struct ehca_shca *shca, u8 port, + struct ehca_sma_attr *attr); + +int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey); + +int ehca_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *gid); + +int ehca_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, + struct ib_port_modify *props); + +struct ib_pd *ehca_alloc_pd(struct ib_device *device, + struct ib_ucontext *context, + struct ib_udata *udata); + +int ehca_dealloc_pd(struct ib_pd *pd); + +struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); + +int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +int ehca_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +int ehca_destroy_ah(struct ib_ah *ah); + +struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags); + +struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, u64 *iova_start); + +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int mr_access_flags, + struct ib_udata *udata); + +int ehca_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, int mr_access_flags, u64 *iova_start); + +int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); + +int ehca_dereg_mr(struct ib_mr *mr); + +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); + +int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw, + struct ib_mw_bind *mw_bind); + +int ehca_dealloc_mw(struct ib_mw *mw); + +struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr); + +int ehca_map_phys_fmr(struct ib_fmr *fmr, + u64 *page_list, int list_len, u64 iova); + +int ehca_unmap_fmr(struct list_head *fmr_list); + +int ehca_dealloc_fmr(struct ib_fmr *fmr); + +enum ehca_eq_type { + EHCA_EQ = 0, /* Event Queue */ + EHCA_NEQ /* Notification Event Queue */ +}; + +int ehca_create_eq(struct ehca_shca *shca, struct ehca_eq *eq, + enum ehca_eq_type type, const u32 length); + +int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq); + +void *ehca_poll_eq(struct ehca_shca *shca, struct ehca_eq *eq); + + +struct ib_cq *ehca_create_cq(struct ib_device *device, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata); + +int ehca_destroy_cq(struct ib_cq *cq); + +int ehca_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); + +int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); + +int ehca_peek_cq(struct ib_cq *cq, int wc_cnt); + +int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags); + +struct ib_qp *ehca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); + +int ehca_destroy_qp(struct ib_qp *qp); + +int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata); + +int ehca_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); + +int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr); + +int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr); + +int ehca_post_srq_recv(struct ib_srq *srq, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr); + +struct ib_srq *ehca_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata); + +int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); + +int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); + +int ehca_destroy_srq(struct ib_srq *srq); + +u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp, + struct ib_qp_init_attr *qp_init_attr); + +int ehca_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +int ehca_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, + struct ib_udata *udata); + +int ehca_dealloc_ucontext(struct ib_ucontext *context); + +int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); + +int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index); + +void ehca_poll_eqs(unsigned long data); + +int ehca_calc_ipd(struct ehca_shca *shca, int port, + enum ib_rate path_rate, u32 *ipd); + +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq); + +#ifdef CONFIG_PPC_64K_PAGES +void *ehca_alloc_fw_ctrlblock(gfp_t flags); +void ehca_free_fw_ctrlblock(void *ptr); +#else +#define ehca_alloc_fw_ctrlblock(flags) ((void *)get_zeroed_page(flags)) +#define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr)) +#endif + +void ehca_recover_sqp(struct ib_qp *sqp); + +#endif diff --git a/drivers/staging/rdma/ehca/ehca_main.c b/drivers/staging/rdma/ehca/ehca_main.c new file mode 100644 index 000000000000..8246418cd4e0 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_main.c @@ -0,0 +1,1123 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * module start stop, hca detection + * + * Authors: Heiko J Schick + * Hoang-Nam Nguyen + * Joachim Fenkes + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef CONFIG_PPC_64K_PAGES +#include +#endif + +#include +#include +#include +#include "ehca_classes.h" +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "ehca_tools.h" +#include "hcp_if.h" + +#define HCAD_VERSION "0029" + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Christoph Raisch "); +MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); +MODULE_VERSION(HCAD_VERSION); + +static bool ehca_open_aqp1 = 0; +static int ehca_hw_level = 0; +static bool ehca_poll_all_eqs = 1; + +int ehca_debug_level = 0; +int ehca_nr_ports = -1; +bool ehca_use_hp_mr = 0; +int ehca_port_act_time = 30; +int ehca_static_rate = -1; +bool ehca_scaling_code = 0; +int ehca_lock_hcalls = -1; +int ehca_max_cq = -1; +int ehca_max_qp = -1; + +module_param_named(open_aqp1, ehca_open_aqp1, bool, S_IRUGO); +module_param_named(debug_level, ehca_debug_level, int, S_IRUGO); +module_param_named(hw_level, ehca_hw_level, int, S_IRUGO); +module_param_named(nr_ports, ehca_nr_ports, int, S_IRUGO); +module_param_named(use_hp_mr, ehca_use_hp_mr, bool, S_IRUGO); +module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO); +module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO); +module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); +module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO); +module_param_named(lock_hcalls, ehca_lock_hcalls, bint, S_IRUGO); +module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO); +module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO); + +MODULE_PARM_DESC(open_aqp1, + "Open AQP1 on startup (default: no)"); +MODULE_PARM_DESC(debug_level, + "Amount of debug output (0: none (default), 1: traces, " + "2: some dumps, 3: lots)"); +MODULE_PARM_DESC(hw_level, + "Hardware level (0: autosensing (default), " + "0x10..0x14: eHCA, 0x20..0x23: eHCA2)"); +MODULE_PARM_DESC(nr_ports, + "number of connected ports (-1: autodetect (default), " + "1: port one only, 2: two ports)"); +MODULE_PARM_DESC(use_hp_mr, + "Use high performance MRs (default: no)"); +MODULE_PARM_DESC(port_act_time, + "Time to wait for port activation (default: 30 sec)"); +MODULE_PARM_DESC(poll_all_eqs, + "Poll all event queues periodically (default: yes)"); +MODULE_PARM_DESC(static_rate, + "Set permanent static rate (default: no static rate)"); +MODULE_PARM_DESC(scaling_code, + "Enable scaling code (default: no)"); +MODULE_PARM_DESC(lock_hcalls, + "Serialize all hCalls made by the driver " + "(default: autodetect)"); +MODULE_PARM_DESC(number_of_cqs, + "Max number of CQs which can be allocated " + "(default: autodetect)"); +MODULE_PARM_DESC(number_of_qps, + "Max number of QPs which can be allocated " + "(default: autodetect)"); + +DEFINE_RWLOCK(ehca_qp_idr_lock); +DEFINE_RWLOCK(ehca_cq_idr_lock); +DEFINE_IDR(ehca_qp_idr); +DEFINE_IDR(ehca_cq_idr); + +static LIST_HEAD(shca_list); /* list of all registered ehcas */ +DEFINE_SPINLOCK(shca_list_lock); + +static struct timer_list poll_eqs_timer; + +#ifdef CONFIG_PPC_64K_PAGES +static struct kmem_cache *ctblk_cache; + +void *ehca_alloc_fw_ctrlblock(gfp_t flags) +{ + void *ret = kmem_cache_zalloc(ctblk_cache, flags); + if (!ret) + ehca_gen_err("Out of memory for ctblk"); + return ret; +} + +void ehca_free_fw_ctrlblock(void *ptr) +{ + if (ptr) + kmem_cache_free(ctblk_cache, ptr); + +} +#endif + +int ehca2ib_return_code(u64 ehca_rc) +{ + switch (ehca_rc) { + case H_SUCCESS: + return 0; + case H_RESOURCE: /* Resource in use */ + case H_BUSY: + return -EBUSY; + case H_NOT_ENOUGH_RESOURCES: /* insufficient resources */ + case H_CONSTRAINED: /* resource constraint */ + case H_NO_MEM: + return -ENOMEM; + default: + return -EINVAL; + } +} + +static int ehca_create_slab_caches(void) +{ + int ret; + + ret = ehca_init_pd_cache(); + if (ret) { + ehca_gen_err("Cannot create PD SLAB cache."); + return ret; + } + + ret = ehca_init_cq_cache(); + if (ret) { + ehca_gen_err("Cannot create CQ SLAB cache."); + goto create_slab_caches2; + } + + ret = ehca_init_qp_cache(); + if (ret) { + ehca_gen_err("Cannot create QP SLAB cache."); + goto create_slab_caches3; + } + + ret = ehca_init_av_cache(); + if (ret) { + ehca_gen_err("Cannot create AV SLAB cache."); + goto create_slab_caches4; + } + + ret = ehca_init_mrmw_cache(); + if (ret) { + ehca_gen_err("Cannot create MR&MW SLAB cache."); + goto create_slab_caches5; + } + + ret = ehca_init_small_qp_cache(); + if (ret) { + ehca_gen_err("Cannot create small queue SLAB cache."); + goto create_slab_caches6; + } + +#ifdef CONFIG_PPC_64K_PAGES + ctblk_cache = kmem_cache_create("ehca_cache_ctblk", + EHCA_PAGESIZE, H_CB_ALIGNMENT, + SLAB_HWCACHE_ALIGN, + NULL); + if (!ctblk_cache) { + ehca_gen_err("Cannot create ctblk SLAB cache."); + ehca_cleanup_small_qp_cache(); + ret = -ENOMEM; + goto create_slab_caches6; + } +#endif + return 0; + +create_slab_caches6: + ehca_cleanup_mrmw_cache(); + +create_slab_caches5: + ehca_cleanup_av_cache(); + +create_slab_caches4: + ehca_cleanup_qp_cache(); + +create_slab_caches3: + ehca_cleanup_cq_cache(); + +create_slab_caches2: + ehca_cleanup_pd_cache(); + + return ret; +} + +static void ehca_destroy_slab_caches(void) +{ + ehca_cleanup_small_qp_cache(); + ehca_cleanup_mrmw_cache(); + ehca_cleanup_av_cache(); + ehca_cleanup_qp_cache(); + ehca_cleanup_cq_cache(); + ehca_cleanup_pd_cache(); +#ifdef CONFIG_PPC_64K_PAGES + if (ctblk_cache) + kmem_cache_destroy(ctblk_cache); +#endif +} + +#define EHCA_HCAAVER EHCA_BMASK_IBM(32, 39) +#define EHCA_REVID EHCA_BMASK_IBM(40, 63) + +static struct cap_descr { + u64 mask; + char *descr; +} hca_cap_descr[] = { + { HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" }, + { HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" }, + { HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" }, + { HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" }, + { HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" }, + { HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" }, + { HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" }, + { HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" }, + { HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" }, + { HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" }, + { HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" }, + { HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" }, + { HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" }, + { HCA_CAP_SRQ, "HCA_CAP_SRQ" }, + { HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" }, + { HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" }, + { HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" }, + { HCA_CAP_H_ALLOC_RES_SYNC, "HCA_CAP_H_ALLOC_RES_SYNC" }, +}; + +static int ehca_sense_attributes(struct ehca_shca *shca) +{ + int i, ret = 0; + u64 h_ret; + struct hipz_query_hca *rblock; + struct hipz_query_port *port; + const char *loc_code; + + static const u32 pgsize_map[] = { + HCA_CAP_MR_PGSIZE_4K, 0x1000, + HCA_CAP_MR_PGSIZE_64K, 0x10000, + HCA_CAP_MR_PGSIZE_1M, 0x100000, + HCA_CAP_MR_PGSIZE_16M, 0x1000000, + }; + + ehca_gen_dbg("Probing adapter %s...", + shca->ofdev->dev.of_node->full_name); + loc_code = of_get_property(shca->ofdev->dev.of_node, "ibm,loc-code", + NULL); + if (loc_code) + ehca_gen_dbg(" ... location lode=%s", loc_code); + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_gen_err("Cannot allocate rblock memory."); + return -ENOMEM; + } + + h_ret = hipz_h_query_hca(shca->ipz_hca_handle, rblock); + if (h_ret != H_SUCCESS) { + ehca_gen_err("Cannot query device properties. h_ret=%lli", + h_ret); + ret = -EPERM; + goto sense_attributes1; + } + + if (ehca_nr_ports == 1) + shca->num_ports = 1; + else + shca->num_ports = (u8)rblock->num_ports; + + ehca_gen_dbg(" ... found %x ports", rblock->num_ports); + + if (ehca_hw_level == 0) { + u32 hcaaver; + u32 revid; + + hcaaver = EHCA_BMASK_GET(EHCA_HCAAVER, rblock->hw_ver); + revid = EHCA_BMASK_GET(EHCA_REVID, rblock->hw_ver); + + ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid); + + if (hcaaver == 1) { + if (revid <= 3) + shca->hw_level = 0x10 | (revid + 1); + else + shca->hw_level = 0x14; + } else if (hcaaver == 2) { + if (revid == 0) + shca->hw_level = 0x21; + else if (revid == 0x10) + shca->hw_level = 0x22; + else if (revid == 0x20 || revid == 0x21) + shca->hw_level = 0x23; + } + + if (!shca->hw_level) { + ehca_gen_warn("unknown hardware version" + " - assuming default level"); + shca->hw_level = 0x22; + } + } else + shca->hw_level = ehca_hw_level; + ehca_gen_dbg(" ... hardware level=%x", shca->hw_level); + + shca->hca_cap = rblock->hca_cap_indicators; + ehca_gen_dbg(" ... HCA capabilities:"); + for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++) + if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap)) + ehca_gen_dbg(" %s", hca_cap_descr[i].descr); + + /* Autodetect hCall locking -- the "H_ALLOC_RESOURCE synced" flag is + * a firmware property, so it's valid across all adapters + */ + if (ehca_lock_hcalls == -1) + ehca_lock_hcalls = !EHCA_BMASK_GET(HCA_CAP_H_ALLOC_RES_SYNC, + shca->hca_cap); + + /* translate supported MR page sizes; always support 4K */ + shca->hca_cap_mr_pgsize = EHCA_PAGESIZE; + for (i = 0; i < ARRAY_SIZE(pgsize_map); i += 2) + if (rblock->memory_page_size_supported & pgsize_map[i]) + shca->hca_cap_mr_pgsize |= pgsize_map[i + 1]; + + /* Set maximum number of CQs and QPs to calculate EQ size */ + if (shca->max_num_qps == -1) + shca->max_num_qps = min_t(int, rblock->max_qp, + EHCA_MAX_NUM_QUEUES); + else if (shca->max_num_qps < 1 || shca->max_num_qps > rblock->max_qp) { + ehca_gen_warn("The requested number of QPs is out of range " + "(1 - %i) specified by HW. Value is set to %i", + rblock->max_qp, rblock->max_qp); + shca->max_num_qps = rblock->max_qp; + } + + if (shca->max_num_cqs == -1) + shca->max_num_cqs = min_t(int, rblock->max_cq, + EHCA_MAX_NUM_QUEUES); + else if (shca->max_num_cqs < 1 || shca->max_num_cqs > rblock->max_cq) { + ehca_gen_warn("The requested number of CQs is out of range " + "(1 - %i) specified by HW. Value is set to %i", + rblock->max_cq, rblock->max_cq); + } + + /* query max MTU from first port -- it's the same for all ports */ + port = (struct hipz_query_port *)rblock; + h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port); + if (h_ret != H_SUCCESS) { + ehca_gen_err("Cannot query port properties. h_ret=%lli", + h_ret); + ret = -EPERM; + goto sense_attributes1; + } + + shca->max_mtu = port->max_mtu; + +sense_attributes1: + ehca_free_fw_ctrlblock(rblock); + return ret; +} + +static int init_node_guid(struct ehca_shca *shca) +{ + int ret = 0; + struct hipz_query_hca *rblock; + + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!rblock) { + ehca_err(&shca->ib_device, "Can't allocate rblock memory."); + return -ENOMEM; + } + + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { + ehca_err(&shca->ib_device, "Can't query device properties"); + ret = -EINVAL; + goto init_node_guid1; + } + + memcpy(&shca->ib_device.node_guid, &rblock->node_guid, sizeof(u64)); + +init_node_guid1: + ehca_free_fw_ctrlblock(rblock); + return ret; +} + +static int ehca_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = ehca_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + +static int ehca_init_device(struct ehca_shca *shca) +{ + int ret; + + ret = init_node_guid(shca); + if (ret) + return ret; + + strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX); + shca->ib_device.owner = THIS_MODULE; + + shca->ib_device.uverbs_abi_ver = 8; + shca->ib_device.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_QUERY_QP) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | + (1ull << IB_USER_VERBS_CMD_DETACH_MCAST); + + shca->ib_device.node_type = RDMA_NODE_IB_CA; + shca->ib_device.phys_port_cnt = shca->num_ports; + shca->ib_device.num_comp_vectors = 1; + shca->ib_device.dma_device = &shca->ofdev->dev; + shca->ib_device.query_device = ehca_query_device; + shca->ib_device.query_port = ehca_query_port; + shca->ib_device.query_gid = ehca_query_gid; + shca->ib_device.query_pkey = ehca_query_pkey; + /* shca->in_device.modify_device = ehca_modify_device */ + shca->ib_device.modify_port = ehca_modify_port; + shca->ib_device.alloc_ucontext = ehca_alloc_ucontext; + shca->ib_device.dealloc_ucontext = ehca_dealloc_ucontext; + shca->ib_device.alloc_pd = ehca_alloc_pd; + shca->ib_device.dealloc_pd = ehca_dealloc_pd; + shca->ib_device.create_ah = ehca_create_ah; + /* shca->ib_device.modify_ah = ehca_modify_ah; */ + shca->ib_device.query_ah = ehca_query_ah; + shca->ib_device.destroy_ah = ehca_destroy_ah; + shca->ib_device.create_qp = ehca_create_qp; + shca->ib_device.modify_qp = ehca_modify_qp; + shca->ib_device.query_qp = ehca_query_qp; + shca->ib_device.destroy_qp = ehca_destroy_qp; + shca->ib_device.post_send = ehca_post_send; + shca->ib_device.post_recv = ehca_post_recv; + shca->ib_device.create_cq = ehca_create_cq; + shca->ib_device.destroy_cq = ehca_destroy_cq; + shca->ib_device.resize_cq = ehca_resize_cq; + shca->ib_device.poll_cq = ehca_poll_cq; + /* shca->ib_device.peek_cq = ehca_peek_cq; */ + shca->ib_device.req_notify_cq = ehca_req_notify_cq; + /* shca->ib_device.req_ncomp_notif = ehca_req_ncomp_notif; */ + shca->ib_device.get_dma_mr = ehca_get_dma_mr; + shca->ib_device.reg_phys_mr = ehca_reg_phys_mr; + shca->ib_device.reg_user_mr = ehca_reg_user_mr; + shca->ib_device.query_mr = ehca_query_mr; + shca->ib_device.dereg_mr = ehca_dereg_mr; + shca->ib_device.rereg_phys_mr = ehca_rereg_phys_mr; + shca->ib_device.alloc_mw = ehca_alloc_mw; + shca->ib_device.bind_mw = ehca_bind_mw; + shca->ib_device.dealloc_mw = ehca_dealloc_mw; + shca->ib_device.alloc_fmr = ehca_alloc_fmr; + shca->ib_device.map_phys_fmr = ehca_map_phys_fmr; + shca->ib_device.unmap_fmr = ehca_unmap_fmr; + shca->ib_device.dealloc_fmr = ehca_dealloc_fmr; + shca->ib_device.attach_mcast = ehca_attach_mcast; + shca->ib_device.detach_mcast = ehca_detach_mcast; + shca->ib_device.process_mad = ehca_process_mad; + shca->ib_device.mmap = ehca_mmap; + shca->ib_device.dma_ops = &ehca_dma_mapping_ops; + shca->ib_device.get_port_immutable = ehca_port_immutable; + + if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) { + shca->ib_device.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); + + shca->ib_device.create_srq = ehca_create_srq; + shca->ib_device.modify_srq = ehca_modify_srq; + shca->ib_device.query_srq = ehca_query_srq; + shca->ib_device.destroy_srq = ehca_destroy_srq; + shca->ib_device.post_srq_recv = ehca_post_srq_recv; + } + + return ret; +} + +static int ehca_create_aqp1(struct ehca_shca *shca, u32 port) +{ + struct ehca_sport *sport = &shca->sport[port - 1]; + struct ib_cq *ibcq; + struct ib_qp *ibqp; + struct ib_qp_init_attr qp_init_attr; + struct ib_cq_init_attr cq_attr = {}; + int ret; + + if (sport->ibcq_aqp1) { + ehca_err(&shca->ib_device, "AQP1 CQ is already created."); + return -EPERM; + } + + cq_attr.cqe = 10; + ibcq = ib_create_cq(&shca->ib_device, NULL, NULL, (void *)(-1), + &cq_attr); + if (IS_ERR(ibcq)) { + ehca_err(&shca->ib_device, "Cannot create AQP1 CQ."); + return PTR_ERR(ibcq); + } + sport->ibcq_aqp1 = ibcq; + + if (sport->ibqp_sqp[IB_QPT_GSI]) { + ehca_err(&shca->ib_device, "AQP1 QP is already created."); + ret = -EPERM; + goto create_aqp1; + } + + memset(&qp_init_attr, 0, sizeof(struct ib_qp_init_attr)); + qp_init_attr.send_cq = ibcq; + qp_init_attr.recv_cq = ibcq; + qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.cap.max_send_wr = 100; + qp_init_attr.cap.max_recv_wr = 100; + qp_init_attr.cap.max_send_sge = 2; + qp_init_attr.cap.max_recv_sge = 1; + qp_init_attr.qp_type = IB_QPT_GSI; + qp_init_attr.port_num = port; + qp_init_attr.qp_context = NULL; + qp_init_attr.event_handler = NULL; + qp_init_attr.srq = NULL; + + ibqp = ib_create_qp(&shca->pd->ib_pd, &qp_init_attr); + if (IS_ERR(ibqp)) { + ehca_err(&shca->ib_device, "Cannot create AQP1 QP."); + ret = PTR_ERR(ibqp); + goto create_aqp1; + } + sport->ibqp_sqp[IB_QPT_GSI] = ibqp; + + return 0; + +create_aqp1: + ib_destroy_cq(sport->ibcq_aqp1); + return ret; +} + +static int ehca_destroy_aqp1(struct ehca_sport *sport) +{ + int ret; + + ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]); + if (ret) { + ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret); + return ret; + } + + ret = ib_destroy_cq(sport->ibcq_aqp1); + if (ret) + ehca_gen_err("Cannot destroy AQP1 CQ. ret=%i", ret); + + return ret; +} + +static ssize_t ehca_show_debug_level(struct device_driver *ddp, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", ehca_debug_level); +} + +static ssize_t ehca_store_debug_level(struct device_driver *ddp, + const char *buf, size_t count) +{ + int value = (*buf) - '0'; + if (value >= 0 && value <= 9) + ehca_debug_level = value; + return 1; +} + +static DRIVER_ATTR(debug_level, S_IRUSR | S_IWUSR, + ehca_show_debug_level, ehca_store_debug_level); + +static struct attribute *ehca_drv_attrs[] = { + &driver_attr_debug_level.attr, + NULL +}; + +static struct attribute_group ehca_drv_attr_grp = { + .attrs = ehca_drv_attrs +}; + +static const struct attribute_group *ehca_drv_attr_groups[] = { + &ehca_drv_attr_grp, + NULL, +}; + +#define EHCA_RESOURCE_ATTR(name) \ +static ssize_t ehca_show_##name(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct ehca_shca *shca; \ + struct hipz_query_hca *rblock; \ + int data; \ + \ + shca = dev_get_drvdata(dev); \ + \ + rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL); \ + if (!rblock) { \ + dev_err(dev, "Can't allocate rblock memory.\n"); \ + return 0; \ + } \ + \ + if (hipz_h_query_hca(shca->ipz_hca_handle, rblock) != H_SUCCESS) { \ + dev_err(dev, "Can't query device properties\n"); \ + ehca_free_fw_ctrlblock(rblock); \ + return 0; \ + } \ + \ + data = rblock->name; \ + ehca_free_fw_ctrlblock(rblock); \ + \ + if ((strcmp(#name, "num_ports") == 0) && (ehca_nr_ports == 1)) \ + return snprintf(buf, 256, "1\n"); \ + else \ + return snprintf(buf, 256, "%d\n", data); \ + \ +} \ +static DEVICE_ATTR(name, S_IRUGO, ehca_show_##name, NULL); + +EHCA_RESOURCE_ATTR(num_ports); +EHCA_RESOURCE_ATTR(hw_ver); +EHCA_RESOURCE_ATTR(max_eq); +EHCA_RESOURCE_ATTR(cur_eq); +EHCA_RESOURCE_ATTR(max_cq); +EHCA_RESOURCE_ATTR(cur_cq); +EHCA_RESOURCE_ATTR(max_qp); +EHCA_RESOURCE_ATTR(cur_qp); +EHCA_RESOURCE_ATTR(max_mr); +EHCA_RESOURCE_ATTR(cur_mr); +EHCA_RESOURCE_ATTR(max_mw); +EHCA_RESOURCE_ATTR(cur_mw); +EHCA_RESOURCE_ATTR(max_pd); +EHCA_RESOURCE_ATTR(max_ah); + +static ssize_t ehca_show_adapter_handle(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ehca_shca *shca = dev_get_drvdata(dev); + + return sprintf(buf, "%llx\n", shca->ipz_hca_handle.handle); + +} +static DEVICE_ATTR(adapter_handle, S_IRUGO, ehca_show_adapter_handle, NULL); + +static struct attribute *ehca_dev_attrs[] = { + &dev_attr_adapter_handle.attr, + &dev_attr_num_ports.attr, + &dev_attr_hw_ver.attr, + &dev_attr_max_eq.attr, + &dev_attr_cur_eq.attr, + &dev_attr_max_cq.attr, + &dev_attr_cur_cq.attr, + &dev_attr_max_qp.attr, + &dev_attr_cur_qp.attr, + &dev_attr_max_mr.attr, + &dev_attr_cur_mr.attr, + &dev_attr_max_mw.attr, + &dev_attr_cur_mw.attr, + &dev_attr_max_pd.attr, + &dev_attr_max_ah.attr, + NULL +}; + +static struct attribute_group ehca_dev_attr_grp = { + .attrs = ehca_dev_attrs +}; + +static int ehca_probe(struct platform_device *dev) +{ + struct ehca_shca *shca; + const u64 *handle; + struct ib_pd *ibpd; + int ret, i, eq_size; + unsigned long flags; + + handle = of_get_property(dev->dev.of_node, "ibm,hca-handle", NULL); + if (!handle) { + ehca_gen_err("Cannot get eHCA handle for adapter: %s.", + dev->dev.of_node->full_name); + return -ENODEV; + } + + if (!(*handle)) { + ehca_gen_err("Wrong eHCA handle for adapter: %s.", + dev->dev.of_node->full_name); + return -ENODEV; + } + + shca = (struct ehca_shca *)ib_alloc_device(sizeof(*shca)); + if (!shca) { + ehca_gen_err("Cannot allocate shca memory."); + return -ENOMEM; + } + + mutex_init(&shca->modify_mutex); + atomic_set(&shca->num_cqs, 0); + atomic_set(&shca->num_qps, 0); + shca->max_num_qps = ehca_max_qp; + shca->max_num_cqs = ehca_max_cq; + + for (i = 0; i < ARRAY_SIZE(shca->sport); i++) + spin_lock_init(&shca->sport[i].mod_sqp_lock); + + shca->ofdev = dev; + shca->ipz_hca_handle.handle = *handle; + dev_set_drvdata(&dev->dev, shca); + + ret = ehca_sense_attributes(shca); + if (ret < 0) { + ehca_gen_err("Cannot sense eHCA attributes."); + goto probe1; + } + + ret = ehca_init_device(shca); + if (ret) { + ehca_gen_err("Cannot init ehca device struct"); + goto probe1; + } + + eq_size = 2 * shca->max_num_cqs + 4 * shca->max_num_qps; + /* create event queues */ + ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size); + if (ret) { + ehca_err(&shca->ib_device, "Cannot create EQ."); + goto probe1; + } + + ret = ehca_create_eq(shca, &shca->neq, EHCA_NEQ, 513); + if (ret) { + ehca_err(&shca->ib_device, "Cannot create NEQ."); + goto probe3; + } + + /* create internal protection domain */ + ibpd = ehca_alloc_pd(&shca->ib_device, (void *)(-1), NULL); + if (IS_ERR(ibpd)) { + ehca_err(&shca->ib_device, "Cannot create internal PD."); + ret = PTR_ERR(ibpd); + goto probe4; + } + + shca->pd = container_of(ibpd, struct ehca_pd, ib_pd); + shca->pd->ib_pd.device = &shca->ib_device; + + /* create internal max MR */ + ret = ehca_reg_internal_maxmr(shca, shca->pd, &shca->maxmr); + + if (ret) { + ehca_err(&shca->ib_device, "Cannot create internal MR ret=%i", + ret); + goto probe5; + } + + ret = ib_register_device(&shca->ib_device, NULL); + if (ret) { + ehca_err(&shca->ib_device, + "ib_register_device() failed ret=%i", ret); + goto probe6; + } + + /* create AQP1 for port 1 */ + if (ehca_open_aqp1 == 1) { + shca->sport[0].port_state = IB_PORT_DOWN; + ret = ehca_create_aqp1(shca, 1); + if (ret) { + ehca_err(&shca->ib_device, + "Cannot create AQP1 for port 1."); + goto probe7; + } + } + + /* create AQP1 for port 2 */ + if ((ehca_open_aqp1 == 1) && (shca->num_ports == 2)) { + shca->sport[1].port_state = IB_PORT_DOWN; + ret = ehca_create_aqp1(shca, 2); + if (ret) { + ehca_err(&shca->ib_device, + "Cannot create AQP1 for port 2."); + goto probe8; + } + } + + ret = sysfs_create_group(&dev->dev.kobj, &ehca_dev_attr_grp); + if (ret) /* only complain; we can live without attributes */ + ehca_err(&shca->ib_device, + "Cannot create device attributes ret=%d", ret); + + spin_lock_irqsave(&shca_list_lock, flags); + list_add(&shca->shca_list, &shca_list); + spin_unlock_irqrestore(&shca_list_lock, flags); + + return 0; + +probe8: + ret = ehca_destroy_aqp1(&shca->sport[0]); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy AQP1 for port 1. ret=%i", ret); + +probe7: + ib_unregister_device(&shca->ib_device); + +probe6: + ret = ehca_dereg_internal_maxmr(shca); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal MR. ret=%x", ret); + +probe5: + ret = ehca_dealloc_pd(&shca->pd->ib_pd); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal PD. ret=%x", ret); + +probe4: + ret = ehca_destroy_eq(shca, &shca->neq); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy NEQ. ret=%x", ret); + +probe3: + ret = ehca_destroy_eq(shca, &shca->eq); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy EQ. ret=%x", ret); + +probe1: + ib_dealloc_device(&shca->ib_device); + + return -EINVAL; +} + +static int ehca_remove(struct platform_device *dev) +{ + struct ehca_shca *shca = dev_get_drvdata(&dev->dev); + unsigned long flags; + int ret; + + sysfs_remove_group(&dev->dev.kobj, &ehca_dev_attr_grp); + + if (ehca_open_aqp1 == 1) { + int i; + for (i = 0; i < shca->num_ports; i++) { + ret = ehca_destroy_aqp1(&shca->sport[i]); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy AQP1 for port %x " + "ret=%i", ret, i); + } + } + + ib_unregister_device(&shca->ib_device); + + ret = ehca_dereg_internal_maxmr(shca); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal MR. ret=%i", ret); + + ret = ehca_dealloc_pd(&shca->pd->ib_pd); + if (ret) + ehca_err(&shca->ib_device, + "Cannot destroy internal PD. ret=%i", ret); + + ret = ehca_destroy_eq(shca, &shca->eq); + if (ret) + ehca_err(&shca->ib_device, "Cannot destroy EQ. ret=%i", ret); + + ret = ehca_destroy_eq(shca, &shca->neq); + if (ret) + ehca_err(&shca->ib_device, "Canot destroy NEQ. ret=%i", ret); + + ib_dealloc_device(&shca->ib_device); + + spin_lock_irqsave(&shca_list_lock, flags); + list_del(&shca->shca_list); + spin_unlock_irqrestore(&shca_list_lock, flags); + + return ret; +} + +static struct of_device_id ehca_device_table[] = +{ + { + .name = "lhca", + .compatible = "IBM,lhca", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, ehca_device_table); + +static struct platform_driver ehca_driver = { + .probe = ehca_probe, + .remove = ehca_remove, + .driver = { + .name = "ehca", + .owner = THIS_MODULE, + .groups = ehca_drv_attr_groups, + .of_match_table = ehca_device_table, + }, +}; + +void ehca_poll_eqs(unsigned long data) +{ + struct ehca_shca *shca; + + spin_lock(&shca_list_lock); + list_for_each_entry(shca, &shca_list, shca_list) { + if (shca->eq.is_initialized) { + /* call deadman proc only if eq ptr does not change */ + struct ehca_eq *eq = &shca->eq; + int max = 3; + volatile u64 q_ofs, q_ofs2; + unsigned long flags; + spin_lock_irqsave(&eq->spinlock, flags); + q_ofs = eq->ipz_queue.current_q_offset; + spin_unlock_irqrestore(&eq->spinlock, flags); + do { + spin_lock_irqsave(&eq->spinlock, flags); + q_ofs2 = eq->ipz_queue.current_q_offset; + spin_unlock_irqrestore(&eq->spinlock, flags); + max--; + } while (q_ofs == q_ofs2 && max > 0); + if (q_ofs == q_ofs2) + ehca_process_eq(shca, 0); + } + } + mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ)); + spin_unlock(&shca_list_lock); +} + +static int ehca_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + static unsigned long ehca_dmem_warn_time; + unsigned long flags; + + switch (action) { + case MEM_CANCEL_OFFLINE: + case MEM_CANCEL_ONLINE: + case MEM_ONLINE: + case MEM_OFFLINE: + return NOTIFY_OK; + case MEM_GOING_ONLINE: + case MEM_GOING_OFFLINE: + /* only ok if no hca is attached to the lpar */ + spin_lock_irqsave(&shca_list_lock, flags); + if (list_empty(&shca_list)) { + spin_unlock_irqrestore(&shca_list_lock, flags); + return NOTIFY_OK; + } else { + spin_unlock_irqrestore(&shca_list_lock, flags); + if (printk_timed_ratelimit(&ehca_dmem_warn_time, + 30 * 1000)) + ehca_gen_err("DMEM operations are not allowed" + "in conjunction with eHCA"); + return NOTIFY_BAD; + } + } + return NOTIFY_OK; +} + +static struct notifier_block ehca_mem_nb = { + .notifier_call = ehca_mem_notifier, +}; + +static int __init ehca_module_init(void) +{ + int ret; + + printk(KERN_INFO "eHCA Infiniband Device Driver " + "(Version " HCAD_VERSION ")\n"); + + ret = ehca_create_comp_pool(); + if (ret) { + ehca_gen_err("Cannot create comp pool."); + return ret; + } + + ret = ehca_create_slab_caches(); + if (ret) { + ehca_gen_err("Cannot create SLAB caches"); + ret = -ENOMEM; + goto module_init1; + } + + ret = ehca_create_busmap(); + if (ret) { + ehca_gen_err("Cannot create busmap."); + goto module_init2; + } + + ret = ibmebus_register_driver(&ehca_driver); + if (ret) { + ehca_gen_err("Cannot register eHCA device driver"); + ret = -EINVAL; + goto module_init3; + } + + ret = register_memory_notifier(&ehca_mem_nb); + if (ret) { + ehca_gen_err("Failed registering memory add/remove notifier"); + goto module_init4; + } + + if (ehca_poll_all_eqs != 1) { + ehca_gen_err("WARNING!!!"); + ehca_gen_err("It is possible to lose interrupts."); + } else { + init_timer(&poll_eqs_timer); + poll_eqs_timer.function = ehca_poll_eqs; + poll_eqs_timer.expires = jiffies + HZ; + add_timer(&poll_eqs_timer); + } + + return 0; + +module_init4: + ibmebus_unregister_driver(&ehca_driver); + +module_init3: + ehca_destroy_busmap(); + +module_init2: + ehca_destroy_slab_caches(); + +module_init1: + ehca_destroy_comp_pool(); + return ret; +}; + +static void __exit ehca_module_exit(void) +{ + if (ehca_poll_all_eqs == 1) + del_timer_sync(&poll_eqs_timer); + + ibmebus_unregister_driver(&ehca_driver); + + unregister_memory_notifier(&ehca_mem_nb); + + ehca_destroy_busmap(); + + ehca_destroy_slab_caches(); + + ehca_destroy_comp_pool(); + + idr_destroy(&ehca_cq_idr); + idr_destroy(&ehca_qp_idr); +}; + +module_init(ehca_module_init); +module_exit(ehca_module_exit); diff --git a/drivers/staging/rdma/ehca/ehca_mcast.c b/drivers/staging/rdma/ehca/ehca_mcast.c new file mode 100644 index 000000000000..cec181532924 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_mcast.c @@ -0,0 +1,131 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * mcast functions + * + * Authors: Khadija Souissi + * Waleri Fomin + * Reinhard Ernst + * Hoang-Nam Nguyen + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +#define MAX_MC_LID 0xFFFE +#define MIN_MC_LID 0xC000 /* Multicast limits */ +#define EHCA_VALID_MULTICAST_GID(gid) ((gid)[0] == 0xFF) +#define EHCA_VALID_MULTICAST_LID(lid) \ + (((lid) >= MIN_MC_LID) && ((lid) <= MAX_MC_LID)) + +int ehca_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, + ib_device); + union ib_gid my_gid; + u64 subnet_prefix, interface_id, h_ret; + + if (ibqp->qp_type != IB_QPT_UD) { + ehca_err(ibqp->device, "invalid qp_type=%x", ibqp->qp_type); + return -EINVAL; + } + + if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { + ehca_err(ibqp->device, "invalid mulitcast gid"); + return -EINVAL; + } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { + ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid); + return -EINVAL; + } + + memcpy(&my_gid, gid->raw, sizeof(union ib_gid)); + + subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); + interface_id = be64_to_cpu(my_gid.global.interface_id); + h_ret = hipz_h_attach_mcqp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + my_qp->galpas.kernel, + lid, subnet_prefix, interface_id); + if (h_ret != H_SUCCESS) + ehca_err(ibqp->device, + "ehca_qp=%p qp_num=%x hipz_h_attach_mcqp() failed " + "h_ret=%lli", my_qp, ibqp->qp_num, h_ret); + + return ehca2ib_return_code(h_ret); +} + +int ehca_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = container_of(ibqp->pd->device, + struct ehca_shca, ib_device); + union ib_gid my_gid; + u64 subnet_prefix, interface_id, h_ret; + + if (ibqp->qp_type != IB_QPT_UD) { + ehca_err(ibqp->device, "invalid qp_type %x", ibqp->qp_type); + return -EINVAL; + } + + if (!(EHCA_VALID_MULTICAST_GID(gid->raw))) { + ehca_err(ibqp->device, "invalid mulitcast gid"); + return -EINVAL; + } else if ((lid < MIN_MC_LID) || (lid > MAX_MC_LID)) { + ehca_err(ibqp->device, "invalid mulitcast lid=%x", lid); + return -EINVAL; + } + + memcpy(&my_gid, gid->raw, sizeof(union ib_gid)); + + subnet_prefix = be64_to_cpu(my_gid.global.subnet_prefix); + interface_id = be64_to_cpu(my_gid.global.interface_id); + h_ret = hipz_h_detach_mcqp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + my_qp->galpas.kernel, + lid, subnet_prefix, interface_id); + if (h_ret != H_SUCCESS) + ehca_err(ibqp->device, + "ehca_qp=%p qp_num=%x hipz_h_detach_mcqp() failed " + "h_ret=%lli", my_qp, ibqp->qp_num, h_ret); + + return ehca2ib_return_code(h_ret); +} diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.c b/drivers/staging/rdma/ehca/ehca_mrmw.c new file mode 100644 index 000000000000..f914b30999f8 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_mrmw.c @@ -0,0 +1,2593 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * MR/MW functions + * + * Authors: Dietmar Decker + * Christoph Raisch + * Hoang-Nam Nguyen + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "hcp_if.h" +#include "hipz_hw.h" + +#define NUM_CHUNKS(length, chunk_size) \ + (((length) + (chunk_size - 1)) / (chunk_size)) + +/* max number of rpages (per hcall register_rpages) */ +#define MAX_RPAGES 512 + +/* DMEM toleration management */ +#define EHCA_SECTSHIFT SECTION_SIZE_BITS +#define EHCA_SECTSIZE (1UL << EHCA_SECTSHIFT) +#define EHCA_HUGEPAGESHIFT 34 +#define EHCA_HUGEPAGE_SIZE (1UL << EHCA_HUGEPAGESHIFT) +#define EHCA_HUGEPAGE_PFN_MASK ((EHCA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT) +#define EHCA_INVAL_ADDR 0xFFFFFFFFFFFFFFFFULL +#define EHCA_DIR_INDEX_SHIFT 13 /* 8k Entries in 64k block */ +#define EHCA_TOP_INDEX_SHIFT (EHCA_DIR_INDEX_SHIFT * 2) +#define EHCA_MAP_ENTRIES (1 << EHCA_DIR_INDEX_SHIFT) +#define EHCA_TOP_MAP_SIZE (0x10000) /* currently fixed map size */ +#define EHCA_DIR_MAP_SIZE (0x10000) +#define EHCA_ENT_MAP_SIZE (0x10000) +#define EHCA_INDEX_MASK (EHCA_MAP_ENTRIES - 1) + +static unsigned long ehca_mr_len; + +/* + * Memory map data structures + */ +struct ehca_dir_bmap { + u64 ent[EHCA_MAP_ENTRIES]; +}; +struct ehca_top_bmap { + struct ehca_dir_bmap *dir[EHCA_MAP_ENTRIES]; +}; +struct ehca_bmap { + struct ehca_top_bmap *top[EHCA_MAP_ENTRIES]; +}; + +static struct ehca_bmap *ehca_bmap; + +static struct kmem_cache *mr_cache; +static struct kmem_cache *mw_cache; + +enum ehca_mr_pgsize { + EHCA_MR_PGSIZE4K = 0x1000L, + EHCA_MR_PGSIZE64K = 0x10000L, + EHCA_MR_PGSIZE1M = 0x100000L, + EHCA_MR_PGSIZE16M = 0x1000000L +}; + +#define EHCA_MR_PGSHIFT4K 12 +#define EHCA_MR_PGSHIFT64K 16 +#define EHCA_MR_PGSHIFT1M 20 +#define EHCA_MR_PGSHIFT16M 24 + +static u64 ehca_map_vaddr(void *caddr); + +static u32 ehca_encode_hwpage_size(u32 pgsize) +{ + int log = ilog2(pgsize); + WARN_ON(log < 12 || log > 24 || log & 3); + return (log - 12) / 4; +} + +static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca) +{ + return rounddown_pow_of_two(shca->hca_cap_mr_pgsize); +} + +static struct ehca_mr *ehca_mr_new(void) +{ + struct ehca_mr *me; + + me = kmem_cache_zalloc(mr_cache, GFP_KERNEL); + if (me) + spin_lock_init(&me->mrlock); + else + ehca_gen_err("alloc failed"); + + return me; +} + +static void ehca_mr_delete(struct ehca_mr *me) +{ + kmem_cache_free(mr_cache, me); +} + +static struct ehca_mw *ehca_mw_new(void) +{ + struct ehca_mw *me; + + me = kmem_cache_zalloc(mw_cache, GFP_KERNEL); + if (me) + spin_lock_init(&me->mwlock); + else + ehca_gen_err("alloc failed"); + + return me; +} + +static void ehca_mw_delete(struct ehca_mw *me) +{ + kmem_cache_free(mw_cache, me); +} + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags) +{ + struct ib_mr *ib_mr; + int ret; + struct ehca_mr *e_maxmr; + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + + if (shca->maxmr) { + e_maxmr = ehca_mr_new(); + if (!e_maxmr) { + ehca_err(&shca->ib_device, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto get_dma_mr_exit0; + } + + ret = ehca_reg_maxmr(shca, e_maxmr, + (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)), + mr_access_flags, e_pd, + &e_maxmr->ib.ib_mr.lkey, + &e_maxmr->ib.ib_mr.rkey); + if (ret) { + ehca_mr_delete(e_maxmr); + ib_mr = ERR_PTR(ret); + goto get_dma_mr_exit0; + } + ib_mr = &e_maxmr->ib.ib_mr; + } else { + ehca_err(&shca->ib_device, "no internal max-MR exist!"); + ib_mr = ERR_PTR(-EINVAL); + goto get_dma_mr_exit0; + } + +get_dma_mr_exit0: + if (IS_ERR(ib_mr)) + ehca_err(&shca->ib_device, "h_ret=%li pd=%p mr_access_flags=%x", + PTR_ERR(ib_mr), pd, mr_access_flags); + return ib_mr; +} /* end ehca_get_dma_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start) +{ + struct ib_mr *ib_mr; + int ret; + struct ehca_mr *e_mr; + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + + u64 size; + + if ((num_phys_buf <= 0) || !phys_buf_array) { + ehca_err(pd->device, "bad input values: num_phys_buf=%x " + "phys_buf_array=%p", num_phys_buf, phys_buf_array); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + + /* check physical buffer list and calculate size */ + ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf, + iova_start, &size); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit0; + } + if ((size == 0) || + (((u64)iova_start + size) < (u64)iova_start)) { + ehca_err(pd->device, "bad input values: size=%llx iova_start=%p", + size, iova_start); + ib_mr = ERR_PTR(-EINVAL); + goto reg_phys_mr_exit0; + } + + e_mr = ehca_mr_new(); + if (!e_mr) { + ehca_err(pd->device, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto reg_phys_mr_exit0; + } + + /* register MR on HCA */ + if (ehca_mr_is_maxmr(size, iova_start)) { + e_mr->flags |= EHCA_MR_FLAG_MAXMR; + ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags, + e_pd, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit1; + } + } else { + struct ehca_mr_pginfo pginfo; + u32 num_kpages; + u32 num_hwpages; + u64 hw_pgsize; + + num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size, + PAGE_SIZE); + /* for kernel space we try most possible pgsize */ + hw_pgsize = ehca_get_max_hwpage_size(shca); + num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size, + hw_pgsize); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.hwpage_size = hw_pgsize; + pginfo.num_hwpages = num_hwpages; + pginfo.u.phy.num_phys_buf = num_phys_buf; + pginfo.u.phy.phys_buf_array = phys_buf_array; + pginfo.next_hwpage = + ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; + + ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags, + e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_phys_mr_exit1; + } + } + + /* successful registration of all pages */ + return &e_mr->ib.ib_mr; + +reg_phys_mr_exit1: + ehca_mr_delete(e_mr); +reg_phys_mr_exit0: + if (IS_ERR(ib_mr)) + ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p " + "num_phys_buf=%x mr_access_flags=%x iova_start=%p", + PTR_ERR(ib_mr), pd, phys_buf_array, + num_phys_buf, mr_access_flags, iova_start); + return ib_mr; +} /* end ehca_reg_phys_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int mr_access_flags, + struct ib_udata *udata) +{ + struct ib_mr *ib_mr; + struct ehca_mr *e_mr; + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_mr_pginfo pginfo; + int ret, page_shift; + u32 num_kpages; + u32 num_hwpages; + u64 hwpage_size; + + if (!pd) { + ehca_gen_err("bad pd=%p", pd); + return ERR_PTR(-EFAULT); + } + + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit0; + } + + if (length == 0 || virt + length < virt) { + ehca_err(pd->device, "bad input values: length=%llx " + "virt_base=%llx", length, virt); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit0; + } + + e_mr = ehca_mr_new(); + if (!e_mr) { + ehca_err(pd->device, "out of memory"); + ib_mr = ERR_PTR(-ENOMEM); + goto reg_user_mr_exit0; + } + + e_mr->umem = ib_umem_get(pd->uobject->context, start, length, + mr_access_flags, 0); + if (IS_ERR(e_mr->umem)) { + ib_mr = (void *)e_mr->umem; + goto reg_user_mr_exit1; + } + + if (e_mr->umem->page_size != PAGE_SIZE) { + ehca_err(pd->device, "page size not supported, " + "e_mr->umem->page_size=%x", e_mr->umem->page_size); + ib_mr = ERR_PTR(-EINVAL); + goto reg_user_mr_exit2; + } + + /* determine number of MR pages */ + num_kpages = NUM_CHUNKS((virt % PAGE_SIZE) + length, PAGE_SIZE); + /* select proper hw_pgsize */ + page_shift = PAGE_SHIFT; + if (e_mr->umem->hugetlb) { + /* determine page_shift, clamp between 4K and 16M */ + page_shift = (fls64(length - 1) + 3) & ~3; + page_shift = min(max(page_shift, EHCA_MR_PGSHIFT4K), + EHCA_MR_PGSHIFT16M); + } + hwpage_size = 1UL << page_shift; + + /* now that we have the desired page size, shift until it's + * supported, too. 4K is always supported, so this terminates. + */ + while (!(hwpage_size & shca->hca_cap_mr_pgsize)) + hwpage_size >>= 4; + +reg_user_mr_fallback: + num_hwpages = NUM_CHUNKS((virt % hwpage_size) + length, hwpage_size); + /* register MR on HCA */ + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_USER; + pginfo.hwpage_size = hwpage_size; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.u.usr.region = e_mr->umem; + pginfo.next_hwpage = ib_umem_offset(e_mr->umem) / hwpage_size; + pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl; + ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, + e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); + if (ret == -EINVAL && pginfo.hwpage_size > PAGE_SIZE) { + ehca_warn(pd->device, "failed to register mr " + "with hwpage_size=%llx", hwpage_size); + ehca_info(pd->device, "try to register mr with " + "kpage_size=%lx", PAGE_SIZE); + /* + * this means kpages are not contiguous for a hw page + * try kernel page size as fallback solution + */ + hwpage_size = PAGE_SIZE; + goto reg_user_mr_fallback; + } + if (ret) { + ib_mr = ERR_PTR(ret); + goto reg_user_mr_exit2; + } + + /* successful registration of all pages */ + return &e_mr->ib.ib_mr; + +reg_user_mr_exit2: + ib_umem_release(e_mr->umem); +reg_user_mr_exit1: + ehca_mr_delete(e_mr); +reg_user_mr_exit0: + if (IS_ERR(ib_mr)) + ehca_err(pd->device, "rc=%li pd=%p mr_access_flags=%x udata=%p", + PTR_ERR(ib_mr), pd, mr_access_flags, udata); + return ib_mr; +} /* end ehca_reg_user_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start) +{ + int ret; + + struct ehca_shca *shca = + container_of(mr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + u64 new_size; + u64 *new_start; + u32 new_acl; + struct ehca_pd *new_pd; + u32 tmp_lkey, tmp_rkey; + unsigned long sl_flags; + u32 num_kpages = 0; + u32 num_hwpages = 0; + struct ehca_mr_pginfo pginfo; + + if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) { + /* TODO not supported, because PHYP rereg hCall needs pages */ + ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not " + "supported yet, mr_rereg_mask=%x", mr_rereg_mask); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + if (mr_rereg_mask & IB_MR_REREG_PD) { + if (!pd) { + ehca_err(mr->device, "rereg with bad pd, pd=%p " + "mr_rereg_mask=%x", pd, mr_rereg_mask); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + } + + if ((mr_rereg_mask & + ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) || + (mr_rereg_mask == 0)) { + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + /* check other parameters */ + if (e_mr == shca->maxmr) { + /* should be impossible, however reject to be sure */ + ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p " + "shca->maxmr=%p mr->lkey=%x", + mr, shca->maxmr, mr->lkey); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */ + if (e_mr->flags & EHCA_MR_FLAG_FMR) { + ehca_err(mr->device, "not supported for FMR, mr=%p " + "flags=%x", mr, e_mr->flags); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + if (!phys_buf_array || num_phys_buf <= 0) { + ehca_err(mr->device, "bad input values mr_rereg_mask=%x" + " phys_buf_array=%p num_phys_buf=%x", + mr_rereg_mask, phys_buf_array, num_phys_buf); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + } + if ((mr_rereg_mask & IB_MR_REREG_ACCESS) && /* change ACL */ + (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(mr->device, "bad input values: mr_rereg_mask=%x " + "mr_access_flags=%x", mr_rereg_mask, mr_access_flags); + ret = -EINVAL; + goto rereg_phys_mr_exit0; + } + + /* set requested values dependent on rereg request */ + spin_lock_irqsave(&e_mr->mrlock, sl_flags); + new_start = e_mr->start; + new_size = e_mr->size; + new_acl = e_mr->acl; + new_pd = container_of(mr->pd, struct ehca_pd, ib_pd); + + if (mr_rereg_mask & IB_MR_REREG_TRANS) { + u64 hw_pgsize = ehca_get_max_hwpage_size(shca); + + new_start = iova_start; /* change address */ + /* check physical buffer list and calculate size */ + ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, + num_phys_buf, iova_start, + &new_size); + if (ret) + goto rereg_phys_mr_exit1; + if ((new_size == 0) || + (((u64)iova_start + new_size) < (u64)iova_start)) { + ehca_err(mr->device, "bad input values: new_size=%llx " + "iova_start=%p", new_size, iova_start); + ret = -EINVAL; + goto rereg_phys_mr_exit1; + } + num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) + + new_size, PAGE_SIZE); + num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) + + new_size, hw_pgsize); + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.hwpage_size = hw_pgsize; + pginfo.num_hwpages = num_hwpages; + pginfo.u.phy.num_phys_buf = num_phys_buf; + pginfo.u.phy.phys_buf_array = phys_buf_array; + pginfo.next_hwpage = + ((u64)iova_start & ~PAGE_MASK) / hw_pgsize; + } + if (mr_rereg_mask & IB_MR_REREG_ACCESS) + new_acl = mr_access_flags; + if (mr_rereg_mask & IB_MR_REREG_PD) + new_pd = container_of(pd, struct ehca_pd, ib_pd); + + ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl, + new_pd, &pginfo, &tmp_lkey, &tmp_rkey); + if (ret) + goto rereg_phys_mr_exit1; + + /* successful reregistration */ + if (mr_rereg_mask & IB_MR_REREG_PD) + mr->pd = pd; + mr->lkey = tmp_lkey; + mr->rkey = tmp_rkey; + +rereg_phys_mr_exit1: + spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); +rereg_phys_mr_exit0: + if (ret) + ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p " + "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x " + "iova_start=%p", + ret, mr, mr_rereg_mask, pd, phys_buf_array, + num_phys_buf, mr_access_flags, iova_start); + return ret; +} /* end ehca_rereg_phys_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca = + container_of(mr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + unsigned long sl_flags; + struct ehca_mr_hipzout_parms hipzout; + + if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " + "e_mr->flags=%x", mr, e_mr, e_mr->flags); + ret = -EINVAL; + goto query_mr_exit0; + } + + memset(mr_attr, 0, sizeof(struct ib_mr_attr)); + spin_lock_irqsave(&e_mr->mrlock, sl_flags); + + h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p " + "hca_hndl=%llx mr_hndl=%llx lkey=%x", + h_ret, mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, mr->lkey); + ret = ehca2ib_return_code(h_ret); + goto query_mr_exit1; + } + mr_attr->pd = mr->pd; + mr_attr->device_virt_addr = hipzout.vaddr; + mr_attr->size = hipzout.len; + mr_attr->lkey = hipzout.lkey; + mr_attr->rkey = hipzout.rkey; + ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags); + +query_mr_exit1: + spin_unlock_irqrestore(&e_mr->mrlock, sl_flags); +query_mr_exit0: + if (ret) + ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p", + ret, mr, mr_attr); + return ret; +} /* end ehca_query_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dereg_mr(struct ib_mr *mr) +{ + int ret = 0; + u64 h_ret; + struct ehca_shca *shca = + container_of(mr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr); + + if ((e_mr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p " + "e_mr->flags=%x", mr, e_mr, e_mr->flags); + ret = -EINVAL; + goto dereg_mr_exit0; + } else if (e_mr == shca->maxmr) { + /* should be impossible, however reject to be sure */ + ehca_err(mr->device, "dereg internal max-MR impossible, mr=%p " + "shca->maxmr=%p mr->lkey=%x", + mr, shca->maxmr, mr->lkey); + ret = -EINVAL; + goto dereg_mr_exit0; + } + + /* TODO: BUSY: MR still has bound window(s) */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + ehca_err(mr->device, "hipz_free_mr failed, h_ret=%lli shca=%p " + "e_mr=%p hca_hndl=%llx mr_hndl=%llx mr->lkey=%x", + h_ret, shca, e_mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, mr->lkey); + ret = ehca2ib_return_code(h_ret); + goto dereg_mr_exit0; + } + + if (e_mr->umem) + ib_umem_release(e_mr->umem); + + /* successful deregistration */ + ehca_mr_delete(e_mr); + +dereg_mr_exit0: + if (ret) + ehca_err(mr->device, "ret=%i mr=%p", ret, mr); + return ret; +} /* end ehca_dereg_mr() */ + +/*----------------------------------------------------------------------*/ + +struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) +{ + struct ib_mw *ib_mw; + u64 h_ret; + struct ehca_mw *e_mw; + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_mw_hipzout_parms hipzout; + + if (type != IB_MW_TYPE_1) + return ERR_PTR(-EINVAL); + + e_mw = ehca_mw_new(); + if (!e_mw) { + ib_mw = ERR_PTR(-ENOMEM); + goto alloc_mw_exit0; + } + + h_ret = hipz_h_alloc_resource_mw(shca->ipz_hca_handle, e_mw, + e_pd->fw_pd, &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(pd->device, "hipz_mw_allocate failed, h_ret=%lli " + "shca=%p hca_hndl=%llx mw=%p", + h_ret, shca, shca->ipz_hca_handle.handle, e_mw); + ib_mw = ERR_PTR(ehca2ib_return_code(h_ret)); + goto alloc_mw_exit1; + } + /* successful MW allocation */ + e_mw->ipz_mw_handle = hipzout.handle; + e_mw->ib_mw.rkey = hipzout.rkey; + return &e_mw->ib_mw; + +alloc_mw_exit1: + ehca_mw_delete(e_mw); +alloc_mw_exit0: + if (IS_ERR(ib_mw)) + ehca_err(pd->device, "h_ret=%li pd=%p", PTR_ERR(ib_mw), pd); + return ib_mw; +} /* end ehca_alloc_mw() */ + +/*----------------------------------------------------------------------*/ + +int ehca_bind_mw(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind) +{ + /* TODO: not supported up to now */ + ehca_gen_err("bind MW currently not supported by HCAD"); + + return -EPERM; +} /* end ehca_bind_mw() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dealloc_mw(struct ib_mw *mw) +{ + u64 h_ret; + struct ehca_shca *shca = + container_of(mw->device, struct ehca_shca, ib_device); + struct ehca_mw *e_mw = container_of(mw, struct ehca_mw, ib_mw); + + h_ret = hipz_h_free_resource_mw(shca->ipz_hca_handle, e_mw); + if (h_ret != H_SUCCESS) { + ehca_err(mw->device, "hipz_free_mw failed, h_ret=%lli shca=%p " + "mw=%p rkey=%x hca_hndl=%llx mw_hndl=%llx", + h_ret, shca, mw, mw->rkey, shca->ipz_hca_handle.handle, + e_mw->ipz_mw_handle.handle); + return ehca2ib_return_code(h_ret); + } + /* successful deallocation */ + ehca_mw_delete(e_mw); + return 0; +} /* end ehca_dealloc_mw() */ + +/*----------------------------------------------------------------------*/ + +struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr) +{ + struct ib_fmr *ib_fmr; + struct ehca_shca *shca = + container_of(pd->device, struct ehca_shca, ib_device); + struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_mr *e_fmr; + int ret; + u32 tmp_lkey, tmp_rkey; + struct ehca_mr_pginfo pginfo; + u64 hw_pgsize; + + /* check other parameters */ + if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || + ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && + !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) { + /* + * Remote Write Access requires Local Write Access + * Remote Atomic Access requires Local Write Access + */ + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + if (mr_access_flags & IB_ACCESS_MW_BIND) { + ehca_err(pd->device, "bad input values: mr_access_flags=%x", + mr_access_flags); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + if ((fmr_attr->max_pages == 0) || (fmr_attr->max_maps == 0)) { + ehca_err(pd->device, "bad input values: fmr_attr->max_pages=%x " + "fmr_attr->max_maps=%x fmr_attr->page_shift=%x", + fmr_attr->max_pages, fmr_attr->max_maps, + fmr_attr->page_shift); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + + hw_pgsize = 1 << fmr_attr->page_shift; + if (!(hw_pgsize & shca->hca_cap_mr_pgsize)) { + ehca_err(pd->device, "unsupported fmr_attr->page_shift=%x", + fmr_attr->page_shift); + ib_fmr = ERR_PTR(-EINVAL); + goto alloc_fmr_exit0; + } + + e_fmr = ehca_mr_new(); + if (!e_fmr) { + ib_fmr = ERR_PTR(-ENOMEM); + goto alloc_fmr_exit0; + } + e_fmr->flags |= EHCA_MR_FLAG_FMR; + + /* register MR on HCA */ + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.hwpage_size = hw_pgsize; + /* + * pginfo.num_hwpages==0, ie register_rpages() will not be called + * but deferred to map_phys_fmr() + */ + ret = ehca_reg_mr(shca, e_fmr, NULL, + fmr_attr->max_pages * (1 << fmr_attr->page_shift), + mr_access_flags, e_pd, &pginfo, + &tmp_lkey, &tmp_rkey, EHCA_REG_MR); + if (ret) { + ib_fmr = ERR_PTR(ret); + goto alloc_fmr_exit1; + } + + /* successful */ + e_fmr->hwpage_size = hw_pgsize; + e_fmr->fmr_page_size = 1 << fmr_attr->page_shift; + e_fmr->fmr_max_pages = fmr_attr->max_pages; + e_fmr->fmr_max_maps = fmr_attr->max_maps; + e_fmr->fmr_map_cnt = 0; + return &e_fmr->ib.ib_fmr; + +alloc_fmr_exit1: + ehca_mr_delete(e_fmr); +alloc_fmr_exit0: + return ib_fmr; +} /* end ehca_alloc_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_map_phys_fmr(struct ib_fmr *fmr, + u64 *page_list, + int list_len, + u64 iova) +{ + int ret; + struct ehca_shca *shca = + container_of(fmr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); + struct ehca_pd *e_pd = container_of(fmr->pd, struct ehca_pd, ib_pd); + struct ehca_mr_pginfo pginfo; + u32 tmp_lkey, tmp_rkey; + + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x", + e_fmr, e_fmr->flags); + ret = -EINVAL; + goto map_phys_fmr_exit0; + } + ret = ehca_fmr_check_page_list(e_fmr, page_list, list_len); + if (ret) + goto map_phys_fmr_exit0; + if (iova % e_fmr->fmr_page_size) { + /* only whole-numbered pages */ + ehca_err(fmr->device, "bad iova, iova=%llx fmr_page_size=%x", + iova, e_fmr->fmr_page_size); + ret = -EINVAL; + goto map_phys_fmr_exit0; + } + if (e_fmr->fmr_map_cnt >= e_fmr->fmr_max_maps) { + /* HCAD does not limit the maps, however trace this anyway */ + ehca_info(fmr->device, "map limit exceeded, fmr=%p " + "e_fmr->fmr_map_cnt=%x e_fmr->fmr_max_maps=%x", + fmr, e_fmr->fmr_map_cnt, e_fmr->fmr_max_maps); + } + + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_FMR; + pginfo.num_kpages = list_len; + pginfo.hwpage_size = e_fmr->hwpage_size; + pginfo.num_hwpages = + list_len * e_fmr->fmr_page_size / pginfo.hwpage_size; + pginfo.u.fmr.page_list = page_list; + pginfo.next_hwpage = + (iova & (e_fmr->fmr_page_size-1)) / pginfo.hwpage_size; + pginfo.u.fmr.fmr_pgsize = e_fmr->fmr_page_size; + + ret = ehca_rereg_mr(shca, e_fmr, (u64 *)iova, + list_len * e_fmr->fmr_page_size, + e_fmr->acl, e_pd, &pginfo, &tmp_lkey, &tmp_rkey); + if (ret) + goto map_phys_fmr_exit0; + + /* successful reregistration */ + e_fmr->fmr_map_cnt++; + e_fmr->ib.ib_fmr.lkey = tmp_lkey; + e_fmr->ib.ib_fmr.rkey = tmp_rkey; + return 0; + +map_phys_fmr_exit0: + if (ret) + ehca_err(fmr->device, "ret=%i fmr=%p page_list=%p list_len=%x " + "iova=%llx", ret, fmr, page_list, list_len, iova); + return ret; +} /* end ehca_map_phys_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_unmap_fmr(struct list_head *fmr_list) +{ + int ret = 0; + struct ib_fmr *ib_fmr; + struct ehca_shca *shca = NULL; + struct ehca_shca *prev_shca; + struct ehca_mr *e_fmr; + u32 num_fmr = 0; + u32 unmap_fmr_cnt = 0; + + /* check all FMR belong to same SHCA, and check internal flag */ + list_for_each_entry(ib_fmr, fmr_list, list) { + prev_shca = shca; + shca = container_of(ib_fmr->device, struct ehca_shca, + ib_device); + e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); + if ((shca != prev_shca) && prev_shca) { + ehca_err(&shca->ib_device, "SHCA mismatch, shca=%p " + "prev_shca=%p e_fmr=%p", + shca, prev_shca, e_fmr); + ret = -EINVAL; + goto unmap_fmr_exit0; + } + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(&shca->ib_device, "not a FMR, e_fmr=%p " + "e_fmr->flags=%x", e_fmr, e_fmr->flags); + ret = -EINVAL; + goto unmap_fmr_exit0; + } + num_fmr++; + } + + /* loop over all FMRs to unmap */ + list_for_each_entry(ib_fmr, fmr_list, list) { + unmap_fmr_cnt++; + e_fmr = container_of(ib_fmr, struct ehca_mr, ib.ib_fmr); + shca = container_of(ib_fmr->device, struct ehca_shca, + ib_device); + ret = ehca_unmap_one_fmr(shca, e_fmr); + if (ret) { + /* unmap failed, stop unmapping of rest of FMRs */ + ehca_err(&shca->ib_device, "unmap of one FMR failed, " + "stop rest, e_fmr=%p num_fmr=%x " + "unmap_fmr_cnt=%x lkey=%x", e_fmr, num_fmr, + unmap_fmr_cnt, e_fmr->ib.ib_fmr.lkey); + goto unmap_fmr_exit0; + } + } + +unmap_fmr_exit0: + if (ret) + ehca_gen_err("ret=%i fmr_list=%p num_fmr=%x unmap_fmr_cnt=%x", + ret, fmr_list, num_fmr, unmap_fmr_cnt); + return ret; +} /* end ehca_unmap_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dealloc_fmr(struct ib_fmr *fmr) +{ + int ret; + u64 h_ret; + struct ehca_shca *shca = + container_of(fmr->device, struct ehca_shca, ib_device); + struct ehca_mr *e_fmr = container_of(fmr, struct ehca_mr, ib.ib_fmr); + + if (!(e_fmr->flags & EHCA_MR_FLAG_FMR)) { + ehca_err(fmr->device, "not a FMR, e_fmr=%p e_fmr->flags=%x", + e_fmr, e_fmr->flags); + ret = -EINVAL; + goto free_fmr_exit0; + } + + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); + if (h_ret != H_SUCCESS) { + ehca_err(fmr->device, "hipz_free_mr failed, h_ret=%lli e_fmr=%p " + "hca_hndl=%llx fmr_hndl=%llx fmr->lkey=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, fmr->lkey); + ret = ehca2ib_return_code(h_ret); + goto free_fmr_exit0; + } + /* successful deregistration */ + ehca_mr_delete(e_fmr); + return 0; + +free_fmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i fmr=%p", ret, fmr); + return ret; +} /* end ehca_dealloc_fmr() */ + +/*----------------------------------------------------------------------*/ + +static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo); + +int ehca_reg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, /*OUT*/ + u32 *rkey, /*OUT*/ + enum ehca_reg_type reg_type) +{ + int ret; + u64 h_ret; + u32 hipz_acl; + struct ehca_mr_hipzout_parms hipzout; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl); + if (ehca_use_hp_mr == 1) + hipz_acl |= 0x00000001; + + h_ret = hipz_h_alloc_resource_mr(shca->ipz_hca_handle, e_mr, + (u64)iova_start, size, hipz_acl, + e_pd->fw_pd, &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_alloc_mr failed, h_ret=%lli " + "hca_hndl=%llx", h_ret, shca->ipz_hca_handle.handle); + ret = ehca2ib_return_code(h_ret); + goto ehca_reg_mr_exit0; + } + + e_mr->ipz_mr_handle = hipzout.handle; + + if (reg_type == EHCA_REG_BUSMAP_MR) + ret = ehca_reg_bmap_mr_rpages(shca, e_mr, pginfo); + else if (reg_type == EHCA_REG_MR) + ret = ehca_reg_mr_rpages(shca, e_mr, pginfo); + else + ret = -EINVAL; + + if (ret) + goto ehca_reg_mr_exit1; + + /* successful registration */ + e_mr->num_kpages = pginfo->num_kpages; + e_mr->num_hwpages = pginfo->num_hwpages; + e_mr->hwpage_size = pginfo->hwpage_size; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + return 0; + +ehca_reg_mr_exit1: + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "h_ret=%lli shca=%p e_mr=%p " + "iova_start=%p size=%llx acl=%x e_pd=%p lkey=%x " + "pginfo=%p num_kpages=%llx num_hwpages=%llx ret=%i", + h_ret, shca, e_mr, iova_start, size, acl, e_pd, + hipzout.lkey, pginfo, pginfo->num_kpages, + pginfo->num_hwpages, ret); + ehca_err(&shca->ib_device, "internal error in ehca_reg_mr, " + "not recoverable"); + } +ehca_reg_mr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p " + "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p " + "num_kpages=%llx num_hwpages=%llx", + ret, shca, e_mr, iova_start, size, acl, e_pd, pginfo, + pginfo->num_kpages, pginfo->num_hwpages); + return ret; +} /* end ehca_reg_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo) +{ + int ret = 0; + u64 h_ret; + u32 rnum; + u64 rpage; + u32 i; + u64 *kpage; + + if (!pginfo->num_hwpages) /* in case of fmr */ + return 0; + + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!kpage) { + ehca_err(&shca->ib_device, "kpage alloc failed"); + ret = -ENOMEM; + goto ehca_reg_mr_rpages_exit0; + } + + /* max MAX_RPAGES ehca mr pages per register call */ + for (i = 0; i < NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES); i++) { + + if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { + rnum = pginfo->num_hwpages % MAX_RPAGES; /* last shot */ + if (rnum == 0) + rnum = MAX_RPAGES; /* last shot is full */ + } else + rnum = MAX_RPAGES; + + ret = ehca_set_pagebuf(pginfo, rnum, kpage); + if (ret) { + ehca_err(&shca->ib_device, "ehca_set_pagebuf " + "bad rc, ret=%i rnum=%x kpage=%p", + ret, rnum, kpage); + goto ehca_reg_mr_rpages_exit1; + } + + if (rnum > 1) { + rpage = __pa(kpage); + if (!rpage) { + ehca_err(&shca->ib_device, "kpage=%p i=%x", + kpage, i); + ret = -EFAULT; + goto ehca_reg_mr_rpages_exit1; + } + } else + rpage = *kpage; + + h_ret = hipz_h_register_rpage_mr( + shca->ipz_hca_handle, e_mr, + ehca_encode_hwpage_size(pginfo->hwpage_size), + 0, rpage, rnum); + + if (i == NUM_CHUNKS(pginfo->num_hwpages, MAX_RPAGES) - 1) { + /* + * check for 'registration complete'==H_SUCCESS + * and for 'page registered'==H_PAGE_REGISTERED + */ + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "last " + "hipz_reg_rpage_mr failed, h_ret=%lli " + "e_mr=%p i=%x hca_hndl=%llx mr_hndl=%llx" + " lkey=%x", h_ret, e_mr, i, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey); + ret = ehca2ib_return_code(h_ret); + break; + } else + ret = 0; + } else if (h_ret != H_PAGE_REGISTERED) { + ehca_err(&shca->ib_device, "hipz_reg_rpage_mr failed, " + "h_ret=%lli e_mr=%p i=%x lkey=%x hca_hndl=%llx " + "mr_hndl=%llx", h_ret, e_mr, i, + e_mr->ib.ib_mr.lkey, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle); + ret = ehca2ib_return_code(h_ret); + break; + } else + ret = 0; + } /* end for(i) */ + + +ehca_reg_mr_rpages_exit1: + ehca_free_fw_ctrlblock(kpage); +ehca_reg_mr_rpages_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p pginfo=%p " + "num_kpages=%llx num_hwpages=%llx", ret, shca, e_mr, + pginfo, pginfo->num_kpages, pginfo->num_hwpages); + return ret; +} /* end ehca_reg_mr_rpages() */ + +/*----------------------------------------------------------------------*/ + +inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + u32 acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, /*OUT*/ + u32 *rkey) /*OUT*/ +{ + int ret; + u64 h_ret; + u32 hipz_acl; + u64 *kpage; + u64 rpage; + struct ehca_mr_pginfo pginfo_save; + struct ehca_mr_hipzout_parms hipzout; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(pginfo->hwpage_size, &hipz_acl); + + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!kpage) { + ehca_err(&shca->ib_device, "kpage alloc failed"); + ret = -ENOMEM; + goto ehca_rereg_mr_rereg1_exit0; + } + + pginfo_save = *pginfo; + ret = ehca_set_pagebuf(pginfo, pginfo->num_hwpages, kpage); + if (ret) { + ehca_err(&shca->ib_device, "set pagebuf failed, e_mr=%p " + "pginfo=%p type=%x num_kpages=%llx num_hwpages=%llx " + "kpage=%p", e_mr, pginfo, pginfo->type, + pginfo->num_kpages, pginfo->num_hwpages, kpage); + goto ehca_rereg_mr_rereg1_exit1; + } + rpage = __pa(kpage); + if (!rpage) { + ehca_err(&shca->ib_device, "kpage=%p", kpage); + ret = -EFAULT; + goto ehca_rereg_mr_rereg1_exit1; + } + h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_mr, + (u64)iova_start, size, hipz_acl, + e_pd->fw_pd, rpage, &hipzout); + if (h_ret != H_SUCCESS) { + /* + * reregistration unsuccessful, try it again with the 3 hCalls, + * e.g. this is required in case H_MR_CONDITION + * (MW bound or MR is shared) + */ + ehca_warn(&shca->ib_device, "hipz_h_reregister_pmr failed " + "(Rereg1), h_ret=%lli e_mr=%p", h_ret, e_mr); + *pginfo = pginfo_save; + ret = -EAGAIN; + } else if ((u64 *)hipzout.vaddr != iova_start) { + ehca_err(&shca->ib_device, "PHYP changed iova_start in " + "rereg_pmr, iova_start=%p iova_start_out=%llx e_mr=%p " + "mr_handle=%llx lkey=%x lkey_out=%x", iova_start, + hipzout.vaddr, e_mr, e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey, hipzout.lkey); + ret = -EFAULT; + } else { + /* + * successful reregistration + * note: start and start_out are identical for eServer HCAs + */ + e_mr->num_kpages = pginfo->num_kpages; + e_mr->num_hwpages = pginfo->num_hwpages; + e_mr->hwpage_size = pginfo->hwpage_size; + e_mr->start = iova_start; + e_mr->size = size; + e_mr->acl = acl; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + } + +ehca_rereg_mr_rereg1_exit1: + ehca_free_fw_ctrlblock(kpage); +ehca_rereg_mr_rereg1_exit0: + if ( ret && (ret != -EAGAIN) ) + ehca_err(&shca->ib_device, "ret=%i lkey=%x rkey=%x " + "pginfo=%p num_kpages=%llx num_hwpages=%llx", + ret, *lkey, *rkey, pginfo, pginfo->num_kpages, + pginfo->num_hwpages); + return ret; +} /* end ehca_rereg_mr_rereg1() */ + +/*----------------------------------------------------------------------*/ + +int ehca_rereg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey) +{ + int ret = 0; + u64 h_ret; + int rereg_1_hcall = 1; /* 1: use hipz_h_reregister_pmr directly */ + int rereg_3_hcall = 0; /* 1: use 3 hipz calls for reregistration */ + + /* first determine reregistration hCall(s) */ + if ((pginfo->num_hwpages > MAX_RPAGES) || + (e_mr->num_hwpages > MAX_RPAGES) || + (pginfo->num_hwpages > e_mr->num_hwpages)) { + ehca_dbg(&shca->ib_device, "Rereg3 case, " + "pginfo->num_hwpages=%llx e_mr->num_hwpages=%x", + pginfo->num_hwpages, e_mr->num_hwpages); + rereg_1_hcall = 0; + rereg_3_hcall = 1; + } + + if (e_mr->flags & EHCA_MR_FLAG_MAXMR) { /* check for max-MR */ + rereg_1_hcall = 0; + rereg_3_hcall = 1; + e_mr->flags &= ~EHCA_MR_FLAG_MAXMR; + ehca_err(&shca->ib_device, "Rereg MR for max-MR! e_mr=%p", + e_mr); + } + + if (rereg_1_hcall) { + ret = ehca_rereg_mr_rereg1(shca, e_mr, iova_start, size, + acl, e_pd, pginfo, lkey, rkey); + if (ret) { + if (ret == -EAGAIN) + rereg_3_hcall = 1; + else + goto ehca_rereg_mr_exit0; + } + } + + if (rereg_3_hcall) { + struct ehca_mr save_mr; + + /* first deregister old MR */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_mr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_free_mr failed, " + "h_ret=%lli e_mr=%p hca_hndl=%llx mr_hndl=%llx " + "mr->lkey=%x", + h_ret, e_mr, shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle, + e_mr->ib.ib_mr.lkey); + ret = ehca2ib_return_code(h_ret); + goto ehca_rereg_mr_exit0; + } + /* clean ehca_mr_t, without changing struct ib_mr and lock */ + save_mr = *e_mr; + ehca_mr_deletenew(e_mr); + + /* set some MR values */ + e_mr->flags = save_mr.flags; + e_mr->hwpage_size = save_mr.hwpage_size; + e_mr->fmr_page_size = save_mr.fmr_page_size; + e_mr->fmr_max_pages = save_mr.fmr_max_pages; + e_mr->fmr_max_maps = save_mr.fmr_max_maps; + e_mr->fmr_map_cnt = save_mr.fmr_map_cnt; + + ret = ehca_reg_mr(shca, e_mr, iova_start, size, acl, + e_pd, pginfo, lkey, rkey, EHCA_REG_MR); + if (ret) { + u32 offset = (u64)(&e_mr->flags) - (u64)e_mr; + memcpy(&e_mr->flags, &(save_mr.flags), + sizeof(struct ehca_mr) - offset); + goto ehca_rereg_mr_exit0; + } + } + +ehca_rereg_mr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p e_mr=%p " + "iova_start=%p size=%llx acl=%x e_pd=%p pginfo=%p " + "num_kpages=%llx lkey=%x rkey=%x rereg_1_hcall=%x " + "rereg_3_hcall=%x", ret, shca, e_mr, iova_start, size, + acl, e_pd, pginfo, pginfo->num_kpages, *lkey, *rkey, + rereg_1_hcall, rereg_3_hcall); + return ret; +} /* end ehca_rereg_mr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_unmap_one_fmr(struct ehca_shca *shca, + struct ehca_mr *e_fmr) +{ + int ret = 0; + u64 h_ret; + struct ehca_pd *e_pd = + container_of(e_fmr->ib.ib_fmr.pd, struct ehca_pd, ib_pd); + struct ehca_mr save_fmr; + u32 tmp_lkey, tmp_rkey; + struct ehca_mr_pginfo pginfo; + struct ehca_mr_hipzout_parms hipzout; + struct ehca_mr save_mr; + + if (e_fmr->fmr_max_pages <= MAX_RPAGES) { + /* + * note: after using rereg hcall with len=0, + * rereg hcall must be used again for registering pages + */ + h_ret = hipz_h_reregister_pmr(shca->ipz_hca_handle, e_fmr, 0, + 0, 0, e_pd->fw_pd, 0, &hipzout); + if (h_ret == H_SUCCESS) { + /* successful reregistration */ + e_fmr->start = NULL; + e_fmr->size = 0; + tmp_lkey = hipzout.lkey; + tmp_rkey = hipzout.rkey; + return 0; + } + /* + * should not happen, because length checked above, + * FMRs are not shared and no MW bound to FMRs + */ + ehca_err(&shca->ib_device, "hipz_reregister_pmr failed " + "(Rereg1), h_ret=%lli e_fmr=%p hca_hndl=%llx " + "mr_hndl=%llx lkey=%x lkey_out=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey, hipzout.lkey); + /* try free and rereg */ + } + + /* first free old FMR */ + h_ret = hipz_h_free_resource_mr(shca->ipz_hca_handle, e_fmr); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_free_mr failed, " + "h_ret=%lli e_fmr=%p hca_hndl=%llx mr_hndl=%llx " + "lkey=%x", + h_ret, e_fmr, shca->ipz_hca_handle.handle, + e_fmr->ipz_mr_handle.handle, + e_fmr->ib.ib_fmr.lkey); + ret = ehca2ib_return_code(h_ret); + goto ehca_unmap_one_fmr_exit0; + } + /* clean ehca_mr_t, without changing lock */ + save_fmr = *e_fmr; + ehca_mr_deletenew(e_fmr); + + /* set some MR values */ + e_fmr->flags = save_fmr.flags; + e_fmr->hwpage_size = save_fmr.hwpage_size; + e_fmr->fmr_page_size = save_fmr.fmr_page_size; + e_fmr->fmr_max_pages = save_fmr.fmr_max_pages; + e_fmr->fmr_max_maps = save_fmr.fmr_max_maps; + e_fmr->fmr_map_cnt = save_fmr.fmr_map_cnt; + e_fmr->acl = save_fmr.acl; + + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_FMR; + ret = ehca_reg_mr(shca, e_fmr, NULL, + (e_fmr->fmr_max_pages * e_fmr->fmr_page_size), + e_fmr->acl, e_pd, &pginfo, &tmp_lkey, + &tmp_rkey, EHCA_REG_MR); + if (ret) { + u32 offset = (u64)(&e_fmr->flags) - (u64)e_fmr; + memcpy(&e_fmr->flags, &(save_mr.flags), + sizeof(struct ehca_mr) - offset); + } + +ehca_unmap_one_fmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i tmp_lkey=%x tmp_rkey=%x " + "fmr_max_pages=%x", + ret, tmp_lkey, tmp_rkey, e_fmr->fmr_max_pages); + return ret; +} /* end ehca_unmap_one_fmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_smr(struct ehca_shca *shca, + struct ehca_mr *e_origmr, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, /*OUT*/ + u32 *rkey) /*OUT*/ +{ + int ret = 0; + u64 h_ret; + u32 hipz_acl; + struct ehca_mr_hipzout_parms hipzout; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl); + + h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, + (u64)iova_start, hipz_acl, e_pd->fw_pd, + &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli " + "shca=%p e_origmr=%p e_newmr=%p iova_start=%p acl=%x " + "e_pd=%p hca_hndl=%llx mr_hndl=%llx lkey=%x", + h_ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd, + shca->ipz_hca_handle.handle, + e_origmr->ipz_mr_handle.handle, + e_origmr->ib.ib_mr.lkey); + ret = ehca2ib_return_code(h_ret); + goto ehca_reg_smr_exit0; + } + /* successful registration */ + e_newmr->num_kpages = e_origmr->num_kpages; + e_newmr->num_hwpages = e_origmr->num_hwpages; + e_newmr->hwpage_size = e_origmr->hwpage_size; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; + e_newmr->ipz_mr_handle = hipzout.handle; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + return 0; + +ehca_reg_smr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p e_origmr=%p " + "e_newmr=%p iova_start=%p acl=%x e_pd=%p", + ret, shca, e_origmr, e_newmr, iova_start, acl, e_pd); + return ret; +} /* end ehca_reg_smr() */ + +/*----------------------------------------------------------------------*/ +static inline void *ehca_calc_sectbase(int top, int dir, int idx) +{ + unsigned long ret = idx; + ret |= dir << EHCA_DIR_INDEX_SHIFT; + ret |= top << EHCA_TOP_INDEX_SHIFT; + return __va(ret << SECTION_SIZE_BITS); +} + +#define ehca_bmap_valid(entry) \ + ((u64)entry != (u64)EHCA_INVAL_ADDR) + +static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage, + struct ehca_shca *shca, struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 h_ret = 0; + unsigned long page = 0; + u64 rpage = __pa(kpage); + int page_count; + + void *sectbase = ehca_calc_sectbase(top, dir, idx); + if ((unsigned long)sectbase & (pginfo->hwpage_size - 1)) { + ehca_err(&shca->ib_device, "reg_mr_section will probably fail:" + "hwpage_size does not fit to " + "section start address"); + } + page_count = EHCA_SECTSIZE / pginfo->hwpage_size; + + while (page < page_count) { + u64 rnum; + for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count); + rnum++) { + void *pg = sectbase + ((page++) * pginfo->hwpage_size); + kpage[rnum] = __pa(pg); + } + + h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr, + ehca_encode_hwpage_size(pginfo->hwpage_size), + 0, rpage, rnum); + + if ((h_ret != H_SUCCESS) && (h_ret != H_PAGE_REGISTERED)) { + ehca_err(&shca->ib_device, "register_rpage_mr failed"); + return h_ret; + } + } + return h_ret; +} + +static u64 ehca_reg_mr_sections(int top, int dir, u64 *kpage, + struct ehca_shca *shca, struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 hret = H_SUCCESS; + int idx; + + for (idx = 0; idx < EHCA_MAP_ENTRIES; idx++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir]->ent[idx])) + continue; + + hret = ehca_reg_mr_section(top, dir, idx, kpage, shca, mr, + pginfo); + if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) + return hret; + } + return hret; +} + +static u64 ehca_reg_mr_dir_sections(int top, u64 *kpage, struct ehca_shca *shca, + struct ehca_mr *mr, + struct ehca_mr_pginfo *pginfo) +{ + u64 hret = H_SUCCESS; + int dir; + + for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + continue; + + hret = ehca_reg_mr_sections(top, dir, kpage, shca, mr, pginfo); + if ((hret != H_SUCCESS) && (hret != H_PAGE_REGISTERED)) + return hret; + } + return hret; +} + +/* register internal max-MR to internal SHCA */ +int ehca_reg_internal_maxmr( + struct ehca_shca *shca, + struct ehca_pd *e_pd, + struct ehca_mr **e_maxmr) /*OUT*/ +{ + int ret; + struct ehca_mr *e_mr; + u64 *iova_start; + u64 size_maxmr; + struct ehca_mr_pginfo pginfo; + struct ib_phys_buf ib_pbuf; + u32 num_kpages; + u32 num_hwpages; + u64 hw_pgsize; + + if (!ehca_bmap) { + ret = -EFAULT; + goto ehca_reg_internal_maxmr_exit0; + } + + e_mr = ehca_mr_new(); + if (!e_mr) { + ehca_err(&shca->ib_device, "out of memory"); + ret = -ENOMEM; + goto ehca_reg_internal_maxmr_exit0; + } + e_mr->flags |= EHCA_MR_FLAG_MAXMR; + + /* register internal max-MR on HCA */ + size_maxmr = ehca_mr_len; + iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)); + ib_pbuf.addr = 0; + ib_pbuf.size = size_maxmr; + num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr, + PAGE_SIZE); + hw_pgsize = ehca_get_max_hwpage_size(shca); + num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size_maxmr, + hw_pgsize); + + memset(&pginfo, 0, sizeof(pginfo)); + pginfo.type = EHCA_MR_PGI_PHYS; + pginfo.num_kpages = num_kpages; + pginfo.num_hwpages = num_hwpages; + pginfo.hwpage_size = hw_pgsize; + pginfo.u.phy.num_phys_buf = 1; + pginfo.u.phy.phys_buf_array = &ib_pbuf; + + ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd, + &pginfo, &e_mr->ib.ib_mr.lkey, + &e_mr->ib.ib_mr.rkey, EHCA_REG_BUSMAP_MR); + if (ret) { + ehca_err(&shca->ib_device, "reg of internal max MR failed, " + "e_mr=%p iova_start=%p size_maxmr=%llx num_kpages=%x " + "num_hwpages=%x", e_mr, iova_start, size_maxmr, + num_kpages, num_hwpages); + goto ehca_reg_internal_maxmr_exit1; + } + + /* successful registration of all pages */ + e_mr->ib.ib_mr.device = e_pd->ib_pd.device; + e_mr->ib.ib_mr.pd = &e_pd->ib_pd; + e_mr->ib.ib_mr.uobject = NULL; + atomic_inc(&(e_pd->ib_pd.usecnt)); + atomic_set(&(e_mr->ib.ib_mr.usecnt), 0); + *e_maxmr = e_mr; + return 0; + +ehca_reg_internal_maxmr_exit1: + ehca_mr_delete(e_mr); +ehca_reg_internal_maxmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p e_pd=%p e_maxmr=%p", + ret, shca, e_pd, e_maxmr); + return ret; +} /* end ehca_reg_internal_maxmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_reg_maxmr(struct ehca_shca *shca, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey) +{ + u64 h_ret; + struct ehca_mr *e_origmr = shca->maxmr; + u32 hipz_acl; + struct ehca_mr_hipzout_parms hipzout; + + ehca_mrmw_map_acl(acl, &hipz_acl); + ehca_mrmw_set_pgsize_hipz_acl(e_origmr->hwpage_size, &hipz_acl); + + h_ret = hipz_h_register_smr(shca->ipz_hca_handle, e_newmr, e_origmr, + (u64)iova_start, hipz_acl, e_pd->fw_pd, + &hipzout); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_reg_smr failed, h_ret=%lli " + "e_origmr=%p hca_hndl=%llx mr_hndl=%llx lkey=%x", + h_ret, e_origmr, shca->ipz_hca_handle.handle, + e_origmr->ipz_mr_handle.handle, + e_origmr->ib.ib_mr.lkey); + return ehca2ib_return_code(h_ret); + } + /* successful registration */ + e_newmr->num_kpages = e_origmr->num_kpages; + e_newmr->num_hwpages = e_origmr->num_hwpages; + e_newmr->hwpage_size = e_origmr->hwpage_size; + e_newmr->start = iova_start; + e_newmr->size = e_origmr->size; + e_newmr->acl = acl; + e_newmr->ipz_mr_handle = hipzout.handle; + *lkey = hipzout.lkey; + *rkey = hipzout.rkey; + return 0; +} /* end ehca_reg_maxmr() */ + +/*----------------------------------------------------------------------*/ + +int ehca_dereg_internal_maxmr(struct ehca_shca *shca) +{ + int ret; + struct ehca_mr *e_maxmr; + struct ib_pd *ib_pd; + + if (!shca->maxmr) { + ehca_err(&shca->ib_device, "bad call, shca=%p", shca); + ret = -EINVAL; + goto ehca_dereg_internal_maxmr_exit0; + } + + e_maxmr = shca->maxmr; + ib_pd = e_maxmr->ib.ib_mr.pd; + shca->maxmr = NULL; /* remove internal max-MR indication from SHCA */ + + ret = ehca_dereg_mr(&e_maxmr->ib.ib_mr); + if (ret) { + ehca_err(&shca->ib_device, "dereg internal max-MR failed, " + "ret=%i e_maxmr=%p shca=%p lkey=%x", + ret, e_maxmr, shca, e_maxmr->ib.ib_mr.lkey); + shca->maxmr = e_maxmr; + goto ehca_dereg_internal_maxmr_exit0; + } + + atomic_dec(&ib_pd->usecnt); + +ehca_dereg_internal_maxmr_exit0: + if (ret) + ehca_err(&shca->ib_device, "ret=%i shca=%p shca->maxmr=%p", + ret, shca, shca->maxmr); + return ret; +} /* end ehca_dereg_internal_maxmr() */ + +/*----------------------------------------------------------------------*/ + +/* + * check physical buffer array of MR verbs for validness and + * calculates MR size + */ +int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + u64 *iova_start, + u64 *size) +{ + struct ib_phys_buf *pbuf = phys_buf_array; + u64 size_count = 0; + u32 i; + + if (num_phys_buf == 0) { + ehca_gen_err("bad phys buf array len, num_phys_buf=0"); + return -EINVAL; + } + /* check first buffer */ + if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) { + ehca_gen_err("iova_start/addr mismatch, iova_start=%p " + "pbuf->addr=%llx pbuf->size=%llx", + iova_start, pbuf->addr, pbuf->size); + return -EINVAL; + } + if (((pbuf->addr + pbuf->size) % PAGE_SIZE) && + (num_phys_buf > 1)) { + ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx " + "pbuf->size=%llx", pbuf->addr, pbuf->size); + return -EINVAL; + } + + for (i = 0; i < num_phys_buf; i++) { + if ((i > 0) && (pbuf->addr % PAGE_SIZE)) { + ehca_gen_err("bad address, i=%x pbuf->addr=%llx " + "pbuf->size=%llx", + i, pbuf->addr, pbuf->size); + return -EINVAL; + } + if (((i > 0) && /* not 1st */ + (i < (num_phys_buf - 1)) && /* not last */ + (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) { + ehca_gen_err("bad size, i=%x pbuf->size=%llx", + i, pbuf->size); + return -EINVAL; + } + size_count += pbuf->size; + pbuf++; + } + + *size = size_count; + return 0; +} /* end ehca_mr_chk_buf_and_calc_size() */ + +/*----------------------------------------------------------------------*/ + +/* check page list of map FMR verb for validness */ +int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, + u64 *page_list, + int list_len) +{ + u32 i; + u64 *page; + + if ((list_len == 0) || (list_len > e_fmr->fmr_max_pages)) { + ehca_gen_err("bad list_len, list_len=%x " + "e_fmr->fmr_max_pages=%x fmr=%p", + list_len, e_fmr->fmr_max_pages, e_fmr); + return -EINVAL; + } + + /* each page must be aligned */ + page = page_list; + for (i = 0; i < list_len; i++) { + if (*page % e_fmr->fmr_page_size) { + ehca_gen_err("bad page, i=%x *page=%llx page=%p fmr=%p " + "fmr_page_size=%x", i, *page, page, e_fmr, + e_fmr->fmr_page_size); + return -EINVAL; + } + page++; + } + + return 0; +} /* end ehca_fmr_check_page_list() */ + +/*----------------------------------------------------------------------*/ + +/* PAGE_SIZE >= pginfo->hwpage_size */ +static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret = 0; + u64 pgaddr; + u32 j = 0; + int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size; + struct scatterlist **sg = &pginfo->u.usr.next_sg; + + while (*sg != NULL) { + pgaddr = page_to_pfn(sg_page(*sg)) + << PAGE_SHIFT; + *kpage = pgaddr + (pginfo->next_hwpage * + pginfo->hwpage_size); + if (!(*kpage)) { + ehca_gen_err("pgaddr=%llx " + "sg_dma_address=%llx " + "entry=%llx next_hwpage=%llx", + pgaddr, (u64)sg_dma_address(*sg), + pginfo->u.usr.next_nmap, + pginfo->next_hwpage); + return -EFAULT; + } + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + kpage++; + if (pginfo->next_hwpage % hwpages_per_kpage == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.usr.next_nmap)++; + pginfo->next_hwpage = 0; + *sg = sg_next(*sg); + } + j++; + if (j >= number) + break; + } + + return ret; +} + +/* + * check given pages for contiguous layout + * last page addr is returned in prev_pgaddr for further check + */ +static int ehca_check_kpages_per_ate(struct scatterlist **sg, + int num_pages, + u64 *prev_pgaddr) +{ + for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) { + u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT; + if (ehca_debug_level >= 3) + ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr, + *(u64 *)__va(pgaddr)); + if (pgaddr - PAGE_SIZE != *prev_pgaddr) { + ehca_gen_err("uncontiguous page found pgaddr=%llx " + "prev_pgaddr=%llx entries_left_in_hwpage=%x", + pgaddr, *prev_pgaddr, num_pages); + return -EINVAL; + } + *prev_pgaddr = pgaddr; + } + return 0; +} + +/* PAGE_SIZE < pginfo->hwpage_size */ +static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret = 0; + u64 pgaddr, prev_pgaddr; + u32 j = 0; + int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE; + int nr_kpages = kpages_per_hwpage; + struct scatterlist **sg = &pginfo->u.usr.next_sg; + + while (*sg != NULL) { + + if (nr_kpages == kpages_per_hwpage) { + pgaddr = (page_to_pfn(sg_page(*sg)) + << PAGE_SHIFT); + *kpage = pgaddr; + if (!(*kpage)) { + ehca_gen_err("pgaddr=%llx entry=%llx", + pgaddr, pginfo->u.usr.next_nmap); + ret = -EFAULT; + return ret; + } + /* + * The first page in a hwpage must be aligned; + * the first MR page is exempt from this rule. + */ + if (pgaddr & (pginfo->hwpage_size - 1)) { + if (pginfo->hwpage_cnt) { + ehca_gen_err( + "invalid alignment " + "pgaddr=%llx entry=%llx " + "mr_pgsize=%llx", + pgaddr, pginfo->u.usr.next_nmap, + pginfo->hwpage_size); + ret = -EFAULT; + return ret; + } + /* first MR page */ + pginfo->kpage_cnt = + (pgaddr & + (pginfo->hwpage_size - 1)) >> + PAGE_SHIFT; + nr_kpages -= pginfo->kpage_cnt; + *kpage = pgaddr & + ~(pginfo->hwpage_size - 1); + } + if (ehca_debug_level >= 3) { + u64 val = *(u64 *)__va(pgaddr); + ehca_gen_dbg("kpage=%llx page=%llx " + "value=%016llx", + *kpage, pgaddr, val); + } + prev_pgaddr = pgaddr; + *sg = sg_next(*sg); + pginfo->kpage_cnt++; + pginfo->u.usr.next_nmap++; + nr_kpages--; + if (!nr_kpages) + goto next_kpage; + continue; + } + + ret = ehca_check_kpages_per_ate(sg, nr_kpages, + &prev_pgaddr); + if (ret) + return ret; + pginfo->kpage_cnt += nr_kpages; + pginfo->u.usr.next_nmap += nr_kpages; + +next_kpage: + nr_kpages = kpages_per_hwpage; + (pginfo->hwpage_cnt)++; + kpage++; + j++; + if (j >= number) + break; + } + + return ret; +} + +static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo, + u32 number, u64 *kpage) +{ + int ret = 0; + struct ib_phys_buf *pbuf; + u64 num_hw, offs_hw; + u32 i = 0; + + /* loop over desired phys_buf_array entries */ + while (i < number) { + pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf; + num_hw = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) + + pbuf->size, pginfo->hwpage_size); + offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) / + pginfo->hwpage_size; + while (pginfo->next_hwpage < offs_hw + num_hw) { + /* sanity check */ + if ((pginfo->kpage_cnt >= pginfo->num_kpages) || + (pginfo->hwpage_cnt >= pginfo->num_hwpages)) { + ehca_gen_err("kpage_cnt >= num_kpages, " + "kpage_cnt=%llx num_kpages=%llx " + "hwpage_cnt=%llx " + "num_hwpages=%llx i=%x", + pginfo->kpage_cnt, + pginfo->num_kpages, + pginfo->hwpage_cnt, + pginfo->num_hwpages, i); + return -EFAULT; + } + *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) + + (pginfo->next_hwpage * pginfo->hwpage_size); + if ( !(*kpage) && pbuf->addr ) { + ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx " + "next_hwpage=%llx", pbuf->addr, + pbuf->size, pginfo->next_hwpage); + return -EFAULT; + } + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + if (PAGE_SIZE >= pginfo->hwpage_size) { + if (pginfo->next_hwpage % + (PAGE_SIZE / pginfo->hwpage_size) == 0) + (pginfo->kpage_cnt)++; + } else + pginfo->kpage_cnt += pginfo->hwpage_size / + PAGE_SIZE; + kpage++; + i++; + if (i >= number) break; + } + if (pginfo->next_hwpage >= offs_hw + num_hw) { + (pginfo->u.phy.next_buf)++; + pginfo->next_hwpage = 0; + } + } + return ret; +} + +static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo, + u32 number, u64 *kpage) +{ + int ret = 0; + u64 *fmrlist; + u32 i; + + /* loop over desired page_list entries */ + fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem; + for (i = 0; i < number; i++) { + *kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) + + pginfo->next_hwpage * pginfo->hwpage_size; + if ( !(*kpage) ) { + ehca_gen_err("*fmrlist=%llx fmrlist=%p " + "next_listelem=%llx next_hwpage=%llx", + *fmrlist, fmrlist, + pginfo->u.fmr.next_listelem, + pginfo->next_hwpage); + return -EFAULT; + } + (pginfo->hwpage_cnt)++; + if (pginfo->u.fmr.fmr_pgsize >= pginfo->hwpage_size) { + if (pginfo->next_hwpage % + (pginfo->u.fmr.fmr_pgsize / + pginfo->hwpage_size) == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.fmr.next_listelem)++; + fmrlist++; + pginfo->next_hwpage = 0; + } else + (pginfo->next_hwpage)++; + } else { + unsigned int cnt_per_hwpage = pginfo->hwpage_size / + pginfo->u.fmr.fmr_pgsize; + unsigned int j; + u64 prev = *kpage; + /* check if adrs are contiguous */ + for (j = 1; j < cnt_per_hwpage; j++) { + u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1); + if (prev + pginfo->u.fmr.fmr_pgsize != p) { + ehca_gen_err("uncontiguous fmr pages " + "found prev=%llx p=%llx " + "idx=%x", prev, p, i + j); + return -EINVAL; + } + prev = p; + } + pginfo->kpage_cnt += cnt_per_hwpage; + pginfo->u.fmr.next_listelem += cnt_per_hwpage; + fmrlist += cnt_per_hwpage; + } + kpage++; + } + return ret; +} + +/* setup page buffer from page info */ +int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage) +{ + int ret; + + switch (pginfo->type) { + case EHCA_MR_PGI_PHYS: + ret = ehca_set_pagebuf_phys(pginfo, number, kpage); + break; + case EHCA_MR_PGI_USER: + ret = PAGE_SIZE >= pginfo->hwpage_size ? + ehca_set_pagebuf_user1(pginfo, number, kpage) : + ehca_set_pagebuf_user2(pginfo, number, kpage); + break; + case EHCA_MR_PGI_FMR: + ret = ehca_set_pagebuf_fmr(pginfo, number, kpage); + break; + default: + ehca_gen_err("bad pginfo->type=%x", pginfo->type); + ret = -EFAULT; + break; + } + return ret; +} /* end ehca_set_pagebuf() */ + +/*----------------------------------------------------------------------*/ + +/* + * check MR if it is a max-MR, i.e. uses whole memory + * in case it's a max-MR 1 is returned, else 0 + */ +int ehca_mr_is_maxmr(u64 size, + u64 *iova_start) +{ + /* a MR is treated as max-MR only if it fits following: */ + if ((size == ehca_mr_len) && + (iova_start == (void *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START)))) { + ehca_gen_dbg("this is a max-MR"); + return 1; + } else + return 0; +} /* end ehca_mr_is_maxmr() */ + +/*----------------------------------------------------------------------*/ + +/* map access control for MR/MW. This routine is used for MR and MW. */ +void ehca_mrmw_map_acl(int ib_acl, + u32 *hipz_acl) +{ + *hipz_acl = 0; + if (ib_acl & IB_ACCESS_REMOTE_READ) + *hipz_acl |= HIPZ_ACCESSCTRL_R_READ; + if (ib_acl & IB_ACCESS_REMOTE_WRITE) + *hipz_acl |= HIPZ_ACCESSCTRL_R_WRITE; + if (ib_acl & IB_ACCESS_REMOTE_ATOMIC) + *hipz_acl |= HIPZ_ACCESSCTRL_R_ATOMIC; + if (ib_acl & IB_ACCESS_LOCAL_WRITE) + *hipz_acl |= HIPZ_ACCESSCTRL_L_WRITE; + if (ib_acl & IB_ACCESS_MW_BIND) + *hipz_acl |= HIPZ_ACCESSCTRL_MW_BIND; +} /* end ehca_mrmw_map_acl() */ + +/*----------------------------------------------------------------------*/ + +/* sets page size in hipz access control for MR/MW. */ +void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl) /*INOUT*/ +{ + *hipz_acl |= (ehca_encode_hwpage_size(pgsize) << 24); +} /* end ehca_mrmw_set_pgsize_hipz_acl() */ + +/*----------------------------------------------------------------------*/ + +/* + * reverse map access control for MR/MW. + * This routine is used for MR and MW. + */ +void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, + int *ib_acl) /*OUT*/ +{ + *ib_acl = 0; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_READ) + *ib_acl |= IB_ACCESS_REMOTE_READ; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_WRITE) + *ib_acl |= IB_ACCESS_REMOTE_WRITE; + if (*hipz_acl & HIPZ_ACCESSCTRL_R_ATOMIC) + *ib_acl |= IB_ACCESS_REMOTE_ATOMIC; + if (*hipz_acl & HIPZ_ACCESSCTRL_L_WRITE) + *ib_acl |= IB_ACCESS_LOCAL_WRITE; + if (*hipz_acl & HIPZ_ACCESSCTRL_MW_BIND) + *ib_acl |= IB_ACCESS_MW_BIND; +} /* end ehca_mrmw_reverse_map_acl() */ + + +/*----------------------------------------------------------------------*/ + +/* + * MR destructor and constructor + * used in Reregister MR verb, sets all fields in ehca_mr_t to 0, + * except struct ib_mr and spinlock + */ +void ehca_mr_deletenew(struct ehca_mr *mr) +{ + mr->flags = 0; + mr->num_kpages = 0; + mr->num_hwpages = 0; + mr->acl = 0; + mr->start = NULL; + mr->fmr_page_size = 0; + mr->fmr_max_pages = 0; + mr->fmr_max_maps = 0; + mr->fmr_map_cnt = 0; + memset(&mr->ipz_mr_handle, 0, sizeof(mr->ipz_mr_handle)); + memset(&mr->galpas, 0, sizeof(mr->galpas)); +} /* end ehca_mr_deletenew() */ + +int ehca_init_mrmw_cache(void) +{ + mr_cache = kmem_cache_create("ehca_cache_mr", + sizeof(struct ehca_mr), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!mr_cache) + return -ENOMEM; + mw_cache = kmem_cache_create("ehca_cache_mw", + sizeof(struct ehca_mw), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!mw_cache) { + kmem_cache_destroy(mr_cache); + mr_cache = NULL; + return -ENOMEM; + } + return 0; +} + +void ehca_cleanup_mrmw_cache(void) +{ + if (mr_cache) + kmem_cache_destroy(mr_cache); + if (mw_cache) + kmem_cache_destroy(mw_cache); +} + +static inline int ehca_init_top_bmap(struct ehca_top_bmap *ehca_top_bmap, + int dir) +{ + if (!ehca_bmap_valid(ehca_top_bmap->dir[dir])) { + ehca_top_bmap->dir[dir] = + kmalloc(sizeof(struct ehca_dir_bmap), GFP_KERNEL); + if (!ehca_top_bmap->dir[dir]) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_top_bmap->dir[dir], 0xFF, EHCA_ENT_MAP_SIZE); + } + return 0; +} + +static inline int ehca_init_bmap(struct ehca_bmap *ehca_bmap, int top, int dir) +{ + if (!ehca_bmap_valid(ehca_bmap->top[top])) { + ehca_bmap->top[top] = + kmalloc(sizeof(struct ehca_top_bmap), GFP_KERNEL); + if (!ehca_bmap->top[top]) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_bmap->top[top], 0xFF, EHCA_DIR_MAP_SIZE); + } + return ehca_init_top_bmap(ehca_bmap->top[top], dir); +} + +static inline int ehca_calc_index(unsigned long i, unsigned long s) +{ + return (i >> s) & EHCA_INDEX_MASK; +} + +void ehca_destroy_busmap(void) +{ + int top, dir; + + if (!ehca_bmap) + return; + + for (top = 0; top < EHCA_MAP_ENTRIES; top++) { + if (!ehca_bmap_valid(ehca_bmap->top[top])) + continue; + for (dir = 0; dir < EHCA_MAP_ENTRIES; dir++) { + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + continue; + + kfree(ehca_bmap->top[top]->dir[dir]); + } + + kfree(ehca_bmap->top[top]); + } + + kfree(ehca_bmap); + ehca_bmap = NULL; +} + +static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages) +{ + unsigned long i, start_section, end_section; + int top, dir, idx; + + if (!nr_pages) + return 0; + + if (!ehca_bmap) { + ehca_bmap = kmalloc(sizeof(struct ehca_bmap), GFP_KERNEL); + if (!ehca_bmap) + return -ENOMEM; + /* Set map block to 0xFF according to EHCA_INVAL_ADDR */ + memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE); + } + + start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE; + end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE; + for (i = start_section; i < end_section; i++) { + int ret; + top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT); + dir = ehca_calc_index(i, EHCA_DIR_INDEX_SHIFT); + idx = i & EHCA_INDEX_MASK; + + ret = ehca_init_bmap(ehca_bmap, top, dir); + if (ret) { + ehca_destroy_busmap(); + return ret; + } + ehca_bmap->top[top]->dir[dir]->ent[idx] = ehca_mr_len; + ehca_mr_len += EHCA_SECTSIZE; + } + return 0; +} + +static int ehca_is_hugepage(unsigned long pfn) +{ + int page_order; + + if (pfn & EHCA_HUGEPAGE_PFN_MASK) + return 0; + + page_order = compound_order(pfn_to_page(pfn)); + if (page_order + PAGE_SHIFT != EHCA_HUGEPAGESHIFT) + return 0; + + return 1; +} + +static int ehca_create_busmap_callback(unsigned long initial_pfn, + unsigned long total_nr_pages, void *arg) +{ + int ret; + unsigned long pfn, start_pfn, end_pfn, nr_pages; + + if ((total_nr_pages * PAGE_SIZE) < EHCA_HUGEPAGE_SIZE) + return ehca_update_busmap(initial_pfn, total_nr_pages); + + /* Given chunk is >= 16GB -> check for hugepages */ + start_pfn = initial_pfn; + end_pfn = initial_pfn + total_nr_pages; + pfn = start_pfn; + + while (pfn < end_pfn) { + if (ehca_is_hugepage(pfn)) { + /* Add mem found in front of the hugepage */ + nr_pages = pfn - start_pfn; + ret = ehca_update_busmap(start_pfn, nr_pages); + if (ret) + return ret; + /* Skip the hugepage */ + pfn += (EHCA_HUGEPAGE_SIZE / PAGE_SIZE); + start_pfn = pfn; + } else + pfn += (EHCA_SECTSIZE / PAGE_SIZE); + } + + /* Add mem found behind the hugepage(s) */ + nr_pages = pfn - start_pfn; + return ehca_update_busmap(start_pfn, nr_pages); +} + +int ehca_create_busmap(void) +{ + int ret; + + ehca_mr_len = 0; + ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL, + ehca_create_busmap_callback); + return ret; +} + +static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo) +{ + int top; + u64 hret, *kpage; + + kpage = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!kpage) { + ehca_err(&shca->ib_device, "kpage alloc failed"); + return -ENOMEM; + } + for (top = 0; top < EHCA_MAP_ENTRIES; top++) { + if (!ehca_bmap_valid(ehca_bmap->top[top])) + continue; + hret = ehca_reg_mr_dir_sections(top, kpage, shca, e_mr, pginfo); + if ((hret != H_PAGE_REGISTERED) && (hret != H_SUCCESS)) + break; + } + + ehca_free_fw_ctrlblock(kpage); + + if (hret == H_SUCCESS) + return 0; /* Everything is fine */ + else { + ehca_err(&shca->ib_device, "ehca_reg_bmap_mr_rpages failed, " + "h_ret=%lli e_mr=%p top=%x lkey=%x " + "hca_hndl=%llx mr_hndl=%llx", hret, e_mr, top, + e_mr->ib.ib_mr.lkey, + shca->ipz_hca_handle.handle, + e_mr->ipz_mr_handle.handle); + return ehca2ib_return_code(hret); + } +} + +static u64 ehca_map_vaddr(void *caddr) +{ + int top, dir, idx; + unsigned long abs_addr, offset; + u64 entry; + + if (!ehca_bmap) + return EHCA_INVAL_ADDR; + + abs_addr = __pa(caddr); + top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT); + if (!ehca_bmap_valid(ehca_bmap->top[top])) + return EHCA_INVAL_ADDR; + + dir = ehca_calc_index(abs_addr, EHCA_DIR_INDEX_SHIFT + EHCA_SECTSHIFT); + if (!ehca_bmap_valid(ehca_bmap->top[top]->dir[dir])) + return EHCA_INVAL_ADDR; + + idx = ehca_calc_index(abs_addr, EHCA_SECTSHIFT); + + entry = ehca_bmap->top[top]->dir[dir]->ent[idx]; + if (ehca_bmap_valid(entry)) { + offset = (unsigned long)caddr & (EHCA_SECTSIZE - 1); + return entry | offset; + } else + return EHCA_INVAL_ADDR; +} + +static int ehca_dma_mapping_error(struct ib_device *dev, u64 dma_addr) +{ + return dma_addr == EHCA_INVAL_ADDR; +} + +static u64 ehca_dma_map_single(struct ib_device *dev, void *cpu_addr, + size_t size, enum dma_data_direction direction) +{ + if (cpu_addr) + return ehca_map_vaddr(cpu_addr); + else + return EHCA_INVAL_ADDR; +} + +static void ehca_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static u64 ehca_dma_map_page(struct ib_device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + u64 addr; + + if (offset + size > PAGE_SIZE) + return EHCA_INVAL_ADDR; + + addr = ehca_map_vaddr(page_address(page)); + if (!ehca_dma_mapping_error(dev, addr)) + addr += offset; + + return addr; +} + +static void ehca_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size, + enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static int ehca_dma_map_sg(struct ib_device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) { + u64 addr; + addr = ehca_map_vaddr(sg_virt(sg)); + if (ehca_dma_mapping_error(dev, addr)) + return 0; + + sg->dma_address = addr; + sg->dma_length = sg->length; + } + return nents; +} + +static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ + /* This is only a stub; nothing to be done here */ +} + +static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr, + size_t size, + enum dma_data_direction dir) +{ + dma_sync_single_for_cpu(dev->dma_device, addr, size, dir); +} + +static void ehca_dma_sync_single_for_device(struct ib_device *dev, u64 addr, + size_t size, + enum dma_data_direction dir) +{ + dma_sync_single_for_device(dev->dma_device, addr, size, dir); +} + +static void *ehca_dma_alloc_coherent(struct ib_device *dev, size_t size, + u64 *dma_handle, gfp_t flag) +{ + struct page *p; + void *addr = NULL; + u64 dma_addr; + + p = alloc_pages(flag, get_order(size)); + if (p) { + addr = page_address(p); + dma_addr = ehca_map_vaddr(addr); + if (ehca_dma_mapping_error(dev, dma_addr)) { + free_pages((unsigned long)addr, get_order(size)); + return NULL; + } + if (dma_handle) + *dma_handle = dma_addr; + return addr; + } + return NULL; +} + +static void ehca_dma_free_coherent(struct ib_device *dev, size_t size, + void *cpu_addr, u64 dma_handle) +{ + if (cpu_addr && size) + free_pages((unsigned long)cpu_addr, get_order(size)); +} + + +struct ib_dma_mapping_ops ehca_dma_mapping_ops = { + .mapping_error = ehca_dma_mapping_error, + .map_single = ehca_dma_map_single, + .unmap_single = ehca_dma_unmap_single, + .map_page = ehca_dma_map_page, + .unmap_page = ehca_dma_unmap_page, + .map_sg = ehca_dma_map_sg, + .unmap_sg = ehca_dma_unmap_sg, + .sync_single_for_cpu = ehca_dma_sync_single_for_cpu, + .sync_single_for_device = ehca_dma_sync_single_for_device, + .alloc_coherent = ehca_dma_alloc_coherent, + .free_coherent = ehca_dma_free_coherent, +}; diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.h b/drivers/staging/rdma/ehca/ehca_mrmw.h new file mode 100644 index 000000000000..50d8b51306dd --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_mrmw.h @@ -0,0 +1,132 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * MR/MW declarations and inline functions + * + * Authors: Dietmar Decker + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _EHCA_MRMW_H_ +#define _EHCA_MRMW_H_ + +enum ehca_reg_type { + EHCA_REG_MR, + EHCA_REG_BUSMAP_MR +}; + +int ehca_reg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int acl, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey, + enum ehca_reg_type reg_type); + +int ehca_reg_mr_rpages(struct ehca_shca *shca, + struct ehca_mr *e_mr, + struct ehca_mr_pginfo *pginfo); + +int ehca_rereg_mr(struct ehca_shca *shca, + struct ehca_mr *e_mr, + u64 *iova_start, + u64 size, + int mr_access_flags, + struct ehca_pd *e_pd, + struct ehca_mr_pginfo *pginfo, + u32 *lkey, + u32 *rkey); + +int ehca_unmap_one_fmr(struct ehca_shca *shca, + struct ehca_mr *e_fmr); + +int ehca_reg_smr(struct ehca_shca *shca, + struct ehca_mr *e_origmr, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey); + +int ehca_reg_internal_maxmr(struct ehca_shca *shca, + struct ehca_pd *e_pd, + struct ehca_mr **maxmr); + +int ehca_reg_maxmr(struct ehca_shca *shca, + struct ehca_mr *e_newmr, + u64 *iova_start, + int acl, + struct ehca_pd *e_pd, + u32 *lkey, + u32 *rkey); + +int ehca_dereg_internal_maxmr(struct ehca_shca *shca); + +int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + u64 *iova_start, + u64 *size); + +int ehca_fmr_check_page_list(struct ehca_mr *e_fmr, + u64 *page_list, + int list_len); + +int ehca_set_pagebuf(struct ehca_mr_pginfo *pginfo, + u32 number, + u64 *kpage); + +int ehca_mr_is_maxmr(u64 size, + u64 *iova_start); + +void ehca_mrmw_map_acl(int ib_acl, + u32 *hipz_acl); + +void ehca_mrmw_set_pgsize_hipz_acl(u32 pgsize, u32 *hipz_acl); + +void ehca_mrmw_reverse_map_acl(const u32 *hipz_acl, + int *ib_acl); + +void ehca_mr_deletenew(struct ehca_mr *mr); + +int ehca_create_busmap(void); + +void ehca_destroy_busmap(void); + +extern struct ib_dma_mapping_ops ehca_dma_mapping_ops; +#endif /*_EHCA_MRMW_H_*/ diff --git a/drivers/staging/rdma/ehca/ehca_pd.c b/drivers/staging/rdma/ehca/ehca_pd.c new file mode 100644 index 000000000000..351577a6670a --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_pd.c @@ -0,0 +1,124 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * PD functions + * + * Authors: Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_tools.h" +#include "ehca_iverbs.h" + +static struct kmem_cache *pd_cache; + +struct ib_pd *ehca_alloc_pd(struct ib_device *device, + struct ib_ucontext *context, struct ib_udata *udata) +{ + struct ehca_pd *pd; + int i; + + pd = kmem_cache_zalloc(pd_cache, GFP_KERNEL); + if (!pd) { + ehca_err(device, "device=%p context=%p out of memory", + device, context); + return ERR_PTR(-ENOMEM); + } + + for (i = 0; i < 2; i++) { + INIT_LIST_HEAD(&pd->free[i]); + INIT_LIST_HEAD(&pd->full[i]); + } + mutex_init(&pd->lock); + + /* + * Kernel PD: when device = -1, 0 + * User PD: when context != -1 + */ + if (!context) { + /* + * Kernel PDs after init reuses always + * the one created in ehca_shca_reopen() + */ + struct ehca_shca *shca = container_of(device, struct ehca_shca, + ib_device); + pd->fw_pd.value = shca->pd->fw_pd.value; + } else + pd->fw_pd.value = (u64)pd; + + return &pd->ib_pd; +} + +int ehca_dealloc_pd(struct ib_pd *pd) +{ + struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); + int i, leftovers = 0; + struct ipz_small_queue_page *page, *tmp; + + for (i = 0; i < 2; i++) { + list_splice(&my_pd->full[i], &my_pd->free[i]); + list_for_each_entry_safe(page, tmp, &my_pd->free[i], list) { + leftovers = 1; + free_page(page->page); + kmem_cache_free(small_qp_cache, page); + } + } + + if (leftovers) + ehca_warn(pd->device, + "Some small queue pages were not freed"); + + kmem_cache_free(pd_cache, my_pd); + + return 0; +} + +int ehca_init_pd_cache(void) +{ + pd_cache = kmem_cache_create("ehca_cache_pd", + sizeof(struct ehca_pd), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!pd_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_pd_cache(void) +{ + if (pd_cache) + kmem_cache_destroy(pd_cache); +} diff --git a/drivers/staging/rdma/ehca/ehca_qes.h b/drivers/staging/rdma/ehca/ehca_qes.h new file mode 100644 index 000000000000..90c4efa67586 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_qes.h @@ -0,0 +1,260 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Hardware request structures + * + * Authors: Waleri Fomin + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef _EHCA_QES_H_ +#define _EHCA_QES_H_ + +#include "ehca_tools.h" + +/* virtual scatter gather entry to specify remote addresses with length */ +struct ehca_vsgentry { + u64 vaddr; + u32 lkey; + u32 length; +}; + +#define GRH_FLAG_MASK EHCA_BMASK_IBM( 7, 7) +#define GRH_IPVERSION_MASK EHCA_BMASK_IBM( 0, 3) +#define GRH_TCLASS_MASK EHCA_BMASK_IBM( 4, 12) +#define GRH_FLOWLABEL_MASK EHCA_BMASK_IBM(13, 31) +#define GRH_PAYLEN_MASK EHCA_BMASK_IBM(32, 47) +#define GRH_NEXTHEADER_MASK EHCA_BMASK_IBM(48, 55) +#define GRH_HOPLIMIT_MASK EHCA_BMASK_IBM(56, 63) + +/* + * Unreliable Datagram Address Vector Format + * see IBTA Vol1 chapter 8.3 Global Routing Header + */ +struct ehca_ud_av { + u8 sl; + u8 lnh; + u16 dlid; + u8 reserved1; + u8 reserved2; + u8 reserved3; + u8 slid_path_bits; + u8 reserved4; + u8 ipd; + u8 reserved5; + u8 pmtu; + u32 reserved6; + u64 reserved7; + union { + struct { + u64 word_0; /* always set to 6 */ + /*should be 0x1B for IB transport */ + u64 word_1; + u64 word_2; + u64 word_3; + u64 word_4; + } grh; + struct { + u32 wd_0; + u32 wd_1; + /* DWord_1 --> SGID */ + + u32 sgid_wd3; + u32 sgid_wd2; + + u32 sgid_wd1; + u32 sgid_wd0; + /* DWord_3 --> DGID */ + + u32 dgid_wd3; + u32 dgid_wd2; + + u32 dgid_wd1; + u32 dgid_wd0; + } grh_l; + }; +}; + +/* maximum number of sg entries allowed in a WQE */ +#define MAX_WQE_SG_ENTRIES 252 + +#define WQE_OPTYPE_SEND 0x80 +#define WQE_OPTYPE_RDMAREAD 0x40 +#define WQE_OPTYPE_RDMAWRITE 0x20 +#define WQE_OPTYPE_CMPSWAP 0x10 +#define WQE_OPTYPE_FETCHADD 0x08 +#define WQE_OPTYPE_BIND 0x04 + +#define WQE_WRFLAG_REQ_SIGNAL_COM 0x80 +#define WQE_WRFLAG_FENCE 0x40 +#define WQE_WRFLAG_IMM_DATA_PRESENT 0x20 +#define WQE_WRFLAG_SOLIC_EVENT 0x10 + +#define WQEF_CACHE_HINT 0x80 +#define WQEF_CACHE_HINT_RD_WR 0x40 +#define WQEF_TIMED_WQE 0x20 +#define WQEF_PURGE 0x08 +#define WQEF_HIGH_NIBBLE 0xF0 + +#define MW_BIND_ACCESSCTRL_R_WRITE 0x40 +#define MW_BIND_ACCESSCTRL_R_READ 0x20 +#define MW_BIND_ACCESSCTRL_R_ATOMIC 0x10 + +struct ehca_wqe { + u64 work_request_id; + u8 optype; + u8 wr_flag; + u16 pkeyi; + u8 wqef; + u8 nr_of_data_seg; + u16 wqe_provided_slid; + u32 destination_qp_number; + u32 resync_psn_sqp; + u32 local_ee_context_qkey; + u32 immediate_data; + union { + struct { + u64 remote_virtual_address; + u32 rkey; + u32 reserved; + u64 atomic_1st_op_dma_len; + u64 atomic_2nd_op; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + + } nud; + struct { + u64 ehca_ud_av_ptr; + u64 reserved1; + u64 reserved2; + u64 reserved3; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + } ud_avp; + struct { + struct ehca_ud_av ud_av; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES - + 2]; + } ud_av; + struct { + u64 reserved0; + u64 reserved1; + u64 reserved2; + u64 reserved3; + struct ehca_vsgentry sg_list[MAX_WQE_SG_ENTRIES]; + } all_rcv; + + struct { + u64 reserved; + u32 rkey; + u32 old_rkey; + u64 reserved1; + u64 reserved2; + u64 virtual_address; + u32 reserved3; + u32 length; + u32 reserved4; + u16 reserved5; + u8 reserved6; + u8 lr_ctl; + u32 lkey; + u32 reserved7; + u64 reserved8; + u64 reserved9; + u64 reserved10; + u64 reserved11; + } bind; + struct { + u64 reserved12; + u64 reserved13; + u32 size; + u32 start; + } inline_data; + } u; + +}; + +#define WC_SEND_RECEIVE EHCA_BMASK_IBM(0, 0) +#define WC_IMM_DATA EHCA_BMASK_IBM(1, 1) +#define WC_GRH_PRESENT EHCA_BMASK_IBM(2, 2) +#define WC_SE_BIT EHCA_BMASK_IBM(3, 3) +#define WC_STATUS_ERROR_BIT 0x80000000 +#define WC_STATUS_REMOTE_ERROR_FLAGS 0x0000F800 +#define WC_STATUS_PURGE_BIT 0x10 +#define WC_SEND_RECEIVE_BIT 0x80 + +struct ehca_cqe { + u64 work_request_id; + u8 optype; + u8 w_completion_flags; + u16 reserved1; + u32 nr_bytes_transferred; + u32 immediate_data; + u32 local_qp_number; + u8 freed_resource_count; + u8 service_level; + u16 wqe_count; + u32 qp_token; + u32 qkey_ee_token; + u32 remote_qp_number; + u16 dlid; + u16 rlid; + u16 reserved2; + u16 pkey_index; + u32 cqe_timestamp; + u32 wqe_timestamp; + u8 wqe_timestamp_valid; + u8 reserved3; + u8 reserved4; + u8 cqe_flags; + u32 status; +}; + +struct ehca_eqe { + u64 entry; +}; + +struct ehca_mrte { + u64 starting_va; + u64 length; /* length of memory region in bytes*/ + u32 pd; + u8 key_instance; + u8 pagesize; + u8 mr_control; + u8 local_remote_access_ctrl; + u8 reserved[0x20 - 0x18]; + u64 at_pointer[4]; +}; +#endif /*_EHCA_QES_H_*/ diff --git a/drivers/staging/rdma/ehca/ehca_qp.c b/drivers/staging/rdma/ehca/ehca_qp.c new file mode 100644 index 000000000000..2e89356c46fa --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_qp.c @@ -0,0 +1,2257 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * QP functions + * + * Authors: Joachim Fenkes + * Stefan Roscher + * Waleri Fomin + * Hoang-Nam Nguyen + * Reinhard Ernst + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" +#include "hipz_fns.h" + +static struct kmem_cache *qp_cache; + +/* + * attributes not supported by query qp + */ +#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS | \ + IB_QP_EN_SQD_ASYNC_NOTIFY) + +/* + * ehca (internal) qp state values + */ +enum ehca_qp_state { + EHCA_QPS_RESET = 1, + EHCA_QPS_INIT = 2, + EHCA_QPS_RTR = 3, + EHCA_QPS_RTS = 5, + EHCA_QPS_SQD = 6, + EHCA_QPS_SQE = 8, + EHCA_QPS_ERR = 128 +}; + +/* + * qp state transitions as defined by IB Arch Rel 1.1 page 431 + */ +enum ib_qp_statetrans { + IB_QPST_ANY2RESET, + IB_QPST_ANY2ERR, + IB_QPST_RESET2INIT, + IB_QPST_INIT2RTR, + IB_QPST_INIT2INIT, + IB_QPST_RTR2RTS, + IB_QPST_RTS2SQD, + IB_QPST_RTS2RTS, + IB_QPST_SQD2RTS, + IB_QPST_SQE2RTS, + IB_QPST_SQD2SQD, + IB_QPST_MAX /* nr of transitions, this must be last!!! */ +}; + +/* + * ib2ehca_qp_state maps IB to ehca qp_state + * returns ehca qp state corresponding to given ib qp state + */ +static inline enum ehca_qp_state ib2ehca_qp_state(enum ib_qp_state ib_qp_state) +{ + switch (ib_qp_state) { + case IB_QPS_RESET: + return EHCA_QPS_RESET; + case IB_QPS_INIT: + return EHCA_QPS_INIT; + case IB_QPS_RTR: + return EHCA_QPS_RTR; + case IB_QPS_RTS: + return EHCA_QPS_RTS; + case IB_QPS_SQD: + return EHCA_QPS_SQD; + case IB_QPS_SQE: + return EHCA_QPS_SQE; + case IB_QPS_ERR: + return EHCA_QPS_ERR; + default: + ehca_gen_err("invalid ib_qp_state=%x", ib_qp_state); + return -EINVAL; + } +} + +/* + * ehca2ib_qp_state maps ehca to IB qp_state + * returns ib qp state corresponding to given ehca qp state + */ +static inline enum ib_qp_state ehca2ib_qp_state(enum ehca_qp_state + ehca_qp_state) +{ + switch (ehca_qp_state) { + case EHCA_QPS_RESET: + return IB_QPS_RESET; + case EHCA_QPS_INIT: + return IB_QPS_INIT; + case EHCA_QPS_RTR: + return IB_QPS_RTR; + case EHCA_QPS_RTS: + return IB_QPS_RTS; + case EHCA_QPS_SQD: + return IB_QPS_SQD; + case EHCA_QPS_SQE: + return IB_QPS_SQE; + case EHCA_QPS_ERR: + return IB_QPS_ERR; + default: + ehca_gen_err("invalid ehca_qp_state=%x", ehca_qp_state); + return -EINVAL; + } +} + +/* + * ehca_qp_type used as index for req_attr and opt_attr of + * struct ehca_modqp_statetrans + */ +enum ehca_qp_type { + QPT_RC = 0, + QPT_UC = 1, + QPT_UD = 2, + QPT_SQP = 3, + QPT_MAX +}; + +/* + * ib2ehcaqptype maps Ib to ehca qp_type + * returns ehca qp type corresponding to ib qp type + */ +static inline enum ehca_qp_type ib2ehcaqptype(enum ib_qp_type ibqptype) +{ + switch (ibqptype) { + case IB_QPT_SMI: + case IB_QPT_GSI: + return QPT_SQP; + case IB_QPT_RC: + return QPT_RC; + case IB_QPT_UC: + return QPT_UC; + case IB_QPT_UD: + return QPT_UD; + default: + ehca_gen_err("Invalid ibqptype=%x", ibqptype); + return -EINVAL; + } +} + +static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate, + int ib_tostate) +{ + int index = -EINVAL; + switch (ib_tostate) { + case IB_QPS_RESET: + index = IB_QPST_ANY2RESET; + break; + case IB_QPS_INIT: + switch (ib_fromstate) { + case IB_QPS_RESET: + index = IB_QPST_RESET2INIT; + break; + case IB_QPS_INIT: + index = IB_QPST_INIT2INIT; + break; + } + break; + case IB_QPS_RTR: + if (ib_fromstate == IB_QPS_INIT) + index = IB_QPST_INIT2RTR; + break; + case IB_QPS_RTS: + switch (ib_fromstate) { + case IB_QPS_RTR: + index = IB_QPST_RTR2RTS; + break; + case IB_QPS_RTS: + index = IB_QPST_RTS2RTS; + break; + case IB_QPS_SQD: + index = IB_QPST_SQD2RTS; + break; + case IB_QPS_SQE: + index = IB_QPST_SQE2RTS; + break; + } + break; + case IB_QPS_SQD: + if (ib_fromstate == IB_QPS_RTS) + index = IB_QPST_RTS2SQD; + break; + case IB_QPS_SQE: + break; + case IB_QPS_ERR: + index = IB_QPST_ANY2ERR; + break; + default: + break; + } + return index; +} + +/* + * ibqptype2servicetype returns hcp service type corresponding to given + * ib qp type used by create_qp() + */ +static inline int ibqptype2servicetype(enum ib_qp_type ibqptype) +{ + switch (ibqptype) { + case IB_QPT_SMI: + case IB_QPT_GSI: + return ST_UD; + case IB_QPT_RC: + return ST_RC; + case IB_QPT_UC: + return ST_UC; + case IB_QPT_UD: + return ST_UD; + case IB_QPT_RAW_IPV6: + return -EINVAL; + case IB_QPT_RAW_ETHERTYPE: + return -EINVAL; + default: + ehca_gen_err("Invalid ibqptype=%x", ibqptype); + return -EINVAL; + } +} + +/* + * init userspace queue info from ipz_queue data + */ +static inline void queue2resp(struct ipzu_queue_resp *resp, + struct ipz_queue *queue) +{ + resp->qe_size = queue->qe_size; + resp->act_nr_of_sg = queue->act_nr_of_sg; + resp->queue_length = queue->queue_length; + resp->pagesize = queue->pagesize; + resp->toggle_state = queue->toggle_state; + resp->offset = queue->offset; +} + +/* + * init_qp_queue initializes/constructs r/squeue and registers queue pages. + */ +static inline int init_qp_queue(struct ehca_shca *shca, + struct ehca_pd *pd, + struct ehca_qp *my_qp, + struct ipz_queue *queue, + int q_type, + u64 expected_hret, + struct ehca_alloc_queue_parms *parms, + int wqe_size) +{ + int ret, cnt, ipz_rc, nr_q_pages; + void *vpage; + u64 rpage, h_ret; + struct ib_device *ib_dev = &shca->ib_device; + struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle; + + if (!parms->queue_size) + return 0; + + if (parms->is_small) { + nr_q_pages = 1; + ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, + 128 << parms->page_size, + wqe_size, parms->act_nr_sges, 1); + } else { + nr_q_pages = parms->queue_size; + ipz_rc = ipz_queue_ctor(pd, queue, nr_q_pages, + EHCA_PAGESIZE, wqe_size, + parms->act_nr_sges, 0); + } + + if (!ipz_rc) { + ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%i", + ipz_rc); + return -EBUSY; + } + + /* register queue pages */ + for (cnt = 0; cnt < nr_q_pages; cnt++) { + vpage = ipz_qpageit_get_inc(queue); + if (!vpage) { + ehca_err(ib_dev, "ipz_qpageit_get_inc() " + "failed p_vpage= %p", vpage); + ret = -EINVAL; + goto init_qp_queue1; + } + rpage = __pa(vpage); + + h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, + my_qp->ipz_qp_handle, + NULL, 0, q_type, + rpage, parms->is_small ? 0 : 1, + my_qp->galpas.kernel); + if (cnt == (nr_q_pages - 1)) { /* last page! */ + if (h_ret != expected_hret) { + ehca_err(ib_dev, "hipz_qp_register_rpage() " + "h_ret=%lli", h_ret); + ret = ehca2ib_return_code(h_ret); + goto init_qp_queue1; + } + vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue); + if (vpage) { + ehca_err(ib_dev, "ipz_qpageit_get_inc() " + "should not succeed vpage=%p", vpage); + ret = -EINVAL; + goto init_qp_queue1; + } + } else { + if (h_ret != H_PAGE_REGISTERED) { + ehca_err(ib_dev, "hipz_qp_register_rpage() " + "h_ret=%lli", h_ret); + ret = ehca2ib_return_code(h_ret); + goto init_qp_queue1; + } + } + } + + ipz_qeit_reset(queue); + + return 0; + +init_qp_queue1: + ipz_queue_dtor(pd, queue); + return ret; +} + +static inline int ehca_calc_wqe_size(int act_nr_sge, int is_llqp) +{ + if (is_llqp) + return 128 << act_nr_sge; + else + return offsetof(struct ehca_wqe, + u.nud.sg_list[act_nr_sge]); +} + +static void ehca_determine_small_queue(struct ehca_alloc_queue_parms *queue, + int req_nr_sge, int is_llqp) +{ + u32 wqe_size, q_size; + int act_nr_sge = req_nr_sge; + + if (!is_llqp) + /* round up #SGEs so WQE size is a power of 2 */ + for (act_nr_sge = 4; act_nr_sge <= 252; + act_nr_sge = 4 + 2 * act_nr_sge) + if (act_nr_sge >= req_nr_sge) + break; + + wqe_size = ehca_calc_wqe_size(act_nr_sge, is_llqp); + q_size = wqe_size * (queue->max_wr + 1); + + if (q_size <= 512) + queue->page_size = 2; + else if (q_size <= 1024) + queue->page_size = 3; + else + queue->page_size = 0; + + queue->is_small = (queue->page_size != 0); +} + +/* needs to be called with cq->spinlock held */ +void ehca_add_to_err_list(struct ehca_qp *qp, int on_sq) +{ + struct list_head *list, *node; + + /* TODO: support low latency QPs */ + if (qp->ext_type == EQPT_LLQP) + return; + + if (on_sq) { + list = &qp->send_cq->sqp_err_list; + node = &qp->sq_err_node; + } else { + list = &qp->recv_cq->rqp_err_list; + node = &qp->rq_err_node; + } + + if (list_empty(node)) + list_add_tail(node, list); + + return; +} + +static void del_from_err_list(struct ehca_cq *cq, struct list_head *node) +{ + unsigned long flags; + + spin_lock_irqsave(&cq->spinlock, flags); + + if (!list_empty(node)) + list_del_init(node); + + spin_unlock_irqrestore(&cq->spinlock, flags); +} + +static void reset_queue_map(struct ehca_queue_map *qmap) +{ + int i; + + qmap->tail = qmap->entries - 1; + qmap->left_to_poll = 0; + qmap->next_wqe_idx = 0; + for (i = 0; i < qmap->entries; i++) { + qmap->map[i].reported = 1; + qmap->map[i].cqe_req = 0; + } +} + +/* + * Create an ib_qp struct that is either a QP or an SRQ, depending on + * the value of the is_srq parameter. If init_attr and srq_init_attr share + * fields, the field out of init_attr is used. + */ +static struct ehca_qp *internal_create_qp( + struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata, int is_srq) +{ + struct ehca_qp *my_qp, *my_srq = NULL; + struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd); + struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, + ib_device); + struct ib_ucontext *context = NULL; + u64 h_ret; + int is_llqp = 0, has_srq = 0, is_user = 0; + int qp_type, max_send_sge, max_recv_sge, ret; + + /* h_call's out parameters */ + struct ehca_alloc_qp_parms parms; + u32 swqe_size = 0, rwqe_size = 0, ib_qp_num; + unsigned long flags; + + if (!atomic_add_unless(&shca->num_qps, 1, shca->max_num_qps)) { + ehca_err(pd->device, "Unable to create QP, max number of %i " + "QPs reached.", shca->max_num_qps); + ehca_err(pd->device, "To increase the maximum number of QPs " + "use the number_of_qps module parameter.\n"); + return ERR_PTR(-ENOSPC); + } + + if (init_attr->create_flags) { + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + + memset(&parms, 0, sizeof(parms)); + qp_type = init_attr->qp_type; + + if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR && + init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) { + ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed", + init_attr->sq_sig_type); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + + /* save LLQP info */ + if (qp_type & 0x80) { + is_llqp = 1; + parms.ext_type = EQPT_LLQP; + parms.ll_comp_flags = qp_type & LLQP_COMP_MASK; + } + qp_type &= 0x1F; + init_attr->qp_type &= 0x1F; + + /* handle SRQ base QPs */ + if (init_attr->srq) { + my_srq = container_of(init_attr->srq, struct ehca_qp, ib_srq); + + if (qp_type == IB_QPT_UC) { + ehca_err(pd->device, "UC with SRQ not supported"); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + + has_srq = 1; + parms.ext_type = EQPT_SRQBASE; + parms.srq_qpn = my_srq->real_qp_num; + } + + if (is_llqp && has_srq) { + ehca_err(pd->device, "LLQPs can't have an SRQ"); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + + /* handle SRQs */ + if (is_srq) { + parms.ext_type = EQPT_SRQ; + parms.srq_limit = srq_init_attr->attr.srq_limit; + if (init_attr->cap.max_recv_sge > 3) { + ehca_err(pd->device, "no more than three SGEs " + "supported for SRQ pd=%p max_sge=%x", + pd, init_attr->cap.max_recv_sge); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + } + + /* check QP type */ + if (qp_type != IB_QPT_UD && + qp_type != IB_QPT_UC && + qp_type != IB_QPT_RC && + qp_type != IB_QPT_SMI && + qp_type != IB_QPT_GSI) { + ehca_err(pd->device, "wrong QP Type=%x", qp_type); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + + if (is_llqp) { + switch (qp_type) { + case IB_QPT_RC: + if ((init_attr->cap.max_send_wr > 255) || + (init_attr->cap.max_recv_wr > 255)) { + ehca_err(pd->device, + "Invalid Number of max_sq_wr=%x " + "or max_rq_wr=%x for RC LLQP", + init_attr->cap.max_send_wr, + init_attr->cap.max_recv_wr); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + break; + case IB_QPT_UD: + if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) { + ehca_err(pd->device, "UD LLQP not supported " + "by this adapter"); + atomic_dec(&shca->num_qps); + return ERR_PTR(-ENOSYS); + } + if (!(init_attr->cap.max_send_sge <= 5 + && init_attr->cap.max_send_sge >= 1 + && init_attr->cap.max_recv_sge <= 5 + && init_attr->cap.max_recv_sge >= 1)) { + ehca_err(pd->device, + "Invalid Number of max_send_sge=%x " + "or max_recv_sge=%x for UD LLQP", + init_attr->cap.max_send_sge, + init_attr->cap.max_recv_sge); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } else if (init_attr->cap.max_send_wr > 255) { + ehca_err(pd->device, + "Invalid Number of " + "max_send_wr=%x for UD QP_TYPE=%x", + init_attr->cap.max_send_wr, qp_type); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + break; + default: + ehca_err(pd->device, "unsupported LL QP Type=%x", + qp_type); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + } else { + int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI + || qp_type == IB_QPT_GSI) ? 250 : 252; + + if (init_attr->cap.max_send_sge > max_sge + || init_attr->cap.max_recv_sge > max_sge) { + ehca_err(pd->device, "Invalid number of SGEs requested " + "send_sge=%x recv_sge=%x max_sge=%x", + init_attr->cap.max_send_sge, + init_attr->cap.max_recv_sge, max_sge); + atomic_dec(&shca->num_qps); + return ERR_PTR(-EINVAL); + } + } + + my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL); + if (!my_qp) { + ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd); + atomic_dec(&shca->num_qps); + return ERR_PTR(-ENOMEM); + } + + if (pd->uobject && udata) { + is_user = 1; + context = pd->uobject->context; + } + + atomic_set(&my_qp->nr_events, 0); + init_waitqueue_head(&my_qp->wait_completion); + spin_lock_init(&my_qp->spinlock_s); + spin_lock_init(&my_qp->spinlock_r); + my_qp->qp_type = qp_type; + my_qp->ext_type = parms.ext_type; + my_qp->state = IB_QPS_RESET; + + if (init_attr->recv_cq) + my_qp->recv_cq = + container_of(init_attr->recv_cq, struct ehca_cq, ib_cq); + if (init_attr->send_cq) + my_qp->send_cq = + container_of(init_attr->send_cq, struct ehca_cq, ib_cq); + + idr_preload(GFP_KERNEL); + write_lock_irqsave(&ehca_qp_idr_lock, flags); + + ret = idr_alloc(&ehca_qp_idr, my_qp, 0, 0x2000000, GFP_NOWAIT); + if (ret >= 0) + my_qp->token = ret; + + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + idr_preload_end(); + if (ret < 0) { + if (ret == -ENOSPC) { + ret = -EINVAL; + ehca_err(pd->device, "Invalid number of qp"); + } else { + ret = -ENOMEM; + ehca_err(pd->device, "Can't allocate new idr entry."); + } + goto create_qp_exit0; + } + + if (has_srq) + parms.srq_token = my_qp->token; + + parms.servicetype = ibqptype2servicetype(qp_type); + if (parms.servicetype < 0) { + ret = -EINVAL; + ehca_err(pd->device, "Invalid qp_type=%x", qp_type); + goto create_qp_exit1; + } + + /* Always signal by WQE so we can hide circ. WQEs */ + parms.sigtype = HCALL_SIGT_BY_WQE; + + /* UD_AV CIRCUMVENTION */ + max_send_sge = init_attr->cap.max_send_sge; + max_recv_sge = init_attr->cap.max_recv_sge; + if (parms.servicetype == ST_UD && !is_llqp) { + max_send_sge += 2; + max_recv_sge += 2; + } + + parms.token = my_qp->token; + parms.eq_handle = shca->eq.ipz_eq_handle; + parms.pd = my_pd->fw_pd; + if (my_qp->send_cq) + parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle; + if (my_qp->recv_cq) + parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle; + + parms.squeue.max_wr = init_attr->cap.max_send_wr; + parms.rqueue.max_wr = init_attr->cap.max_recv_wr; + parms.squeue.max_sge = max_send_sge; + parms.rqueue.max_sge = max_recv_sge; + + /* RC QPs need one more SWQE for unsolicited ack circumvention */ + if (qp_type == IB_QPT_RC) + parms.squeue.max_wr++; + + if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) { + if (HAS_SQ(my_qp)) + ehca_determine_small_queue( + &parms.squeue, max_send_sge, is_llqp); + if (HAS_RQ(my_qp)) + ehca_determine_small_queue( + &parms.rqueue, max_recv_sge, is_llqp); + parms.qp_storage = + (parms.squeue.is_small || parms.rqueue.is_small); + } + + h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms, is_user); + if (h_ret != H_SUCCESS) { + ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lli", + h_ret); + ret = ehca2ib_return_code(h_ret); + goto create_qp_exit1; + } + + ib_qp_num = my_qp->real_qp_num = parms.real_qp_num; + my_qp->ipz_qp_handle = parms.qp_handle; + my_qp->galpas = parms.galpas; + + swqe_size = ehca_calc_wqe_size(parms.squeue.act_nr_sges, is_llqp); + rwqe_size = ehca_calc_wqe_size(parms.rqueue.act_nr_sges, is_llqp); + + switch (qp_type) { + case IB_QPT_RC: + if (is_llqp) { + parms.squeue.act_nr_sges = 1; + parms.rqueue.act_nr_sges = 1; + } + /* hide the extra WQE */ + parms.squeue.act_nr_wqes--; + break; + case IB_QPT_UD: + case IB_QPT_GSI: + case IB_QPT_SMI: + /* UD circumvention */ + if (is_llqp) { + parms.squeue.act_nr_sges = 1; + parms.rqueue.act_nr_sges = 1; + } else { + parms.squeue.act_nr_sges -= 2; + parms.rqueue.act_nr_sges -= 2; + } + + if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) { + parms.squeue.act_nr_wqes = init_attr->cap.max_send_wr; + parms.rqueue.act_nr_wqes = init_attr->cap.max_recv_wr; + parms.squeue.act_nr_sges = init_attr->cap.max_send_sge; + parms.rqueue.act_nr_sges = init_attr->cap.max_recv_sge; + ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1; + } + + break; + + default: + break; + } + + /* initialize r/squeue and register queue pages */ + if (HAS_SQ(my_qp)) { + ret = init_qp_queue( + shca, my_pd, my_qp, &my_qp->ipz_squeue, 0, + HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS, + &parms.squeue, swqe_size); + if (ret) { + ehca_err(pd->device, "Couldn't initialize squeue " + "and pages ret=%i", ret); + goto create_qp_exit2; + } + + if (!is_user) { + my_qp->sq_map.entries = my_qp->ipz_squeue.queue_length / + my_qp->ipz_squeue.qe_size; + my_qp->sq_map.map = vmalloc(my_qp->sq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->sq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit3; + } + INIT_LIST_HEAD(&my_qp->sq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->sq_map); + } + } + + if (HAS_RQ(my_qp)) { + ret = init_qp_queue( + shca, my_pd, my_qp, &my_qp->ipz_rqueue, 1, + H_SUCCESS, &parms.rqueue, rwqe_size); + if (ret) { + ehca_err(pd->device, "Couldn't initialize rqueue " + "and pages ret=%i", ret); + goto create_qp_exit4; + } + if (!is_user) { + my_qp->rq_map.entries = my_qp->ipz_rqueue.queue_length / + my_qp->ipz_rqueue.qe_size; + my_qp->rq_map.map = vmalloc(my_qp->rq_map.entries * + sizeof(struct ehca_qmap_entry)); + if (!my_qp->rq_map.map) { + ehca_err(pd->device, "Couldn't allocate squeue " + "map ret=%i", ret); + goto create_qp_exit5; + } + INIT_LIST_HEAD(&my_qp->rq_err_node); + /* to avoid the generation of bogus flush CQEs */ + reset_queue_map(&my_qp->rq_map); + } + } else if (init_attr->srq && !is_user) { + /* this is a base QP, use the queue map of the SRQ */ + my_qp->rq_map = my_srq->rq_map; + INIT_LIST_HEAD(&my_qp->rq_err_node); + + my_qp->ipz_rqueue = my_srq->ipz_rqueue; + } + + if (is_srq) { + my_qp->ib_srq.pd = &my_pd->ib_pd; + my_qp->ib_srq.device = my_pd->ib_pd.device; + + my_qp->ib_srq.srq_context = init_attr->qp_context; + my_qp->ib_srq.event_handler = init_attr->event_handler; + } else { + my_qp->ib_qp.qp_num = ib_qp_num; + my_qp->ib_qp.pd = &my_pd->ib_pd; + my_qp->ib_qp.device = my_pd->ib_pd.device; + + my_qp->ib_qp.recv_cq = init_attr->recv_cq; + my_qp->ib_qp.send_cq = init_attr->send_cq; + + my_qp->ib_qp.qp_type = qp_type; + my_qp->ib_qp.srq = init_attr->srq; + + my_qp->ib_qp.qp_context = init_attr->qp_context; + my_qp->ib_qp.event_handler = init_attr->event_handler; + } + + init_attr->cap.max_inline_data = 0; /* not supported yet */ + init_attr->cap.max_recv_sge = parms.rqueue.act_nr_sges; + init_attr->cap.max_recv_wr = parms.rqueue.act_nr_wqes; + init_attr->cap.max_send_sge = parms.squeue.act_nr_sges; + init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes; + my_qp->init_attr = *init_attr; + + if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { + shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = + &my_qp->ib_qp; + if (ehca_nr_ports < 0) { + /* alloc array to cache subsequent modify qp parms + * for autodetect mode + */ + my_qp->mod_qp_parm = + kzalloc(EHCA_MOD_QP_PARM_MAX * + sizeof(*my_qp->mod_qp_parm), + GFP_KERNEL); + if (!my_qp->mod_qp_parm) { + ehca_err(pd->device, + "Could not alloc mod_qp_parm"); + goto create_qp_exit5; + } + } + } + + /* NOTE: define_apq0() not supported yet */ + if (qp_type == IB_QPT_GSI) { + h_ret = ehca_define_sqp(shca, my_qp, init_attr); + if (h_ret != H_SUCCESS) { + kfree(my_qp->mod_qp_parm); + my_qp->mod_qp_parm = NULL; + /* the QP pointer is no longer valid */ + shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] = + NULL; + ret = ehca2ib_return_code(h_ret); + goto create_qp_exit6; + } + } + + if (my_qp->send_cq) { + ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp); + if (ret) { + ehca_err(pd->device, + "Couldn't assign qp to send_cq ret=%i", ret); + goto create_qp_exit7; + } + } + + /* copy queues, galpa data to user space */ + if (context && udata) { + struct ehca_create_qp_resp resp; + memset(&resp, 0, sizeof(resp)); + + resp.qp_num = my_qp->real_qp_num; + resp.token = my_qp->token; + resp.qp_type = my_qp->qp_type; + resp.ext_type = my_qp->ext_type; + resp.qkey = my_qp->qkey; + resp.real_qp_num = my_qp->real_qp_num; + + if (HAS_SQ(my_qp)) + queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue); + if (HAS_RQ(my_qp)) + queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue); + resp.fw_handle_ofs = (u32) + (my_qp->galpas.user.fw_handle & (PAGE_SIZE - 1)); + + if (ib_copy_to_udata(udata, &resp, sizeof resp)) { + ehca_err(pd->device, "Copy to udata failed"); + ret = -EINVAL; + goto create_qp_exit8; + } + } + + return my_qp; + +create_qp_exit8: + ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num); + +create_qp_exit7: + kfree(my_qp->mod_qp_parm); + +create_qp_exit6: + if (HAS_RQ(my_qp) && !is_user) + vfree(my_qp->rq_map.map); + +create_qp_exit5: + if (HAS_RQ(my_qp)) + ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); + +create_qp_exit4: + if (HAS_SQ(my_qp) && !is_user) + vfree(my_qp->sq_map.map); + +create_qp_exit3: + if (HAS_SQ(my_qp)) + ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); + +create_qp_exit2: + hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); + +create_qp_exit1: + write_lock_irqsave(&ehca_qp_idr_lock, flags); + idr_remove(&ehca_qp_idr, my_qp->token); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + +create_qp_exit0: + kmem_cache_free(qp_cache, my_qp); + atomic_dec(&shca->num_qps); + return ERR_PTR(ret); +} + +struct ib_qp *ehca_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata) +{ + struct ehca_qp *ret; + + ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0); + return IS_ERR(ret) ? (struct ib_qp *)ret : &ret->ib_qp; +} + +static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, + struct ib_uobject *uobject); + +struct ib_srq *ehca_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) +{ + struct ib_qp_init_attr qp_init_attr; + struct ehca_qp *my_qp; + struct ib_srq *ret; + struct ehca_shca *shca = container_of(pd->device, struct ehca_shca, + ib_device); + struct hcp_modify_qp_control_block *mqpcb; + u64 hret, update_mask; + + if (srq_init_attr->srq_type != IB_SRQT_BASIC) + return ERR_PTR(-ENOSYS); + + /* For common attributes, internal_create_qp() takes its info + * out of qp_init_attr, so copy all common attrs there. + */ + memset(&qp_init_attr, 0, sizeof(qp_init_attr)); + qp_init_attr.event_handler = srq_init_attr->event_handler; + qp_init_attr.qp_context = srq_init_attr->srq_context; + qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.qp_type = IB_QPT_RC; + qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr; + qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge; + + my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1); + if (IS_ERR(my_qp)) + return (struct ib_srq *)my_qp; + + /* copy back return values */ + srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr; + srq_init_attr->attr.max_sge = 3; + + /* drive SRQ into RTR state */ + mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!mqpcb) { + ehca_err(pd->device, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); + ret = ERR_PTR(-ENOMEM); + goto create_srq1; + } + + mqpcb->qp_state = EHCA_QPS_INIT; + mqpcb->prim_phys_port = 1; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not modify SRQ to INIT " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + mqpcb->qp_enable = 1; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not enable SRQ " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + mqpcb->qp_state = EHCA_QPS_RTR; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + hret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + if (hret != H_SUCCESS) { + ehca_err(pd->device, "Could not modify SRQ to RTR " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, my_qp->real_qp_num, hret); + goto create_srq2; + } + + ehca_free_fw_ctrlblock(mqpcb); + + return &my_qp->ib_srq; + +create_srq2: + ret = ERR_PTR(ehca2ib_return_code(hret)); + ehca_free_fw_ctrlblock(mqpcb); + +create_srq1: + internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject); + + return ret; +} + +/* + * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts + * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe + * returns total number of bad wqes in bad_wqe_cnt + */ +static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca, + int *bad_wqe_cnt) +{ + u64 h_ret; + struct ipz_queue *squeue; + void *bad_send_wqe_p, *bad_send_wqe_v; + u64 q_ofs; + struct ehca_wqe *wqe; + int qp_num = my_qp->ib_qp.qp_num; + + /* get send wqe pointer */ + h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, &my_qp->pf, + &bad_send_wqe_p, NULL, 2); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "hipz_h_disable_and_get_wqe() failed" + " ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, qp_num, h_ret); + return ehca2ib_return_code(h_ret); + } + bad_send_wqe_p = (void *)((u64)bad_send_wqe_p & (~(1L << 63))); + ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p", + qp_num, bad_send_wqe_p); + /* convert wqe pointer to vadr */ + bad_send_wqe_v = __va((u64)bad_send_wqe_p); + if (ehca_debug_level >= 2) + ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num); + squeue = &my_qp->ipz_squeue; + if (ipz_queue_abs_to_offset(squeue, (u64)bad_send_wqe_p, &q_ofs)) { + ehca_err(&shca->ib_device, "failed to get wqe offset qp_num=%x" + " bad_send_wqe_p=%p", qp_num, bad_send_wqe_p); + return -EFAULT; + } + + /* loop sets wqe's purge bit */ + wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); + *bad_wqe_cnt = 0; + while (wqe->optype != 0xff && wqe->wqef != 0xff) { + if (ehca_debug_level >= 2) + ehca_dmp(wqe, 32, "qp_num=%x wqe", qp_num); + wqe->nr_of_data_seg = 0; /* suppress data access */ + wqe->wqef = WQEF_PURGE; /* WQE to be purged */ + q_ofs = ipz_queue_advance_offset(squeue, q_ofs); + wqe = (struct ehca_wqe *)ipz_qeit_calc(squeue, q_ofs); + *bad_wqe_cnt = (*bad_wqe_cnt)+1; + } + /* + * bad wqe will be reprocessed and ignored when pol_cq() is called, + * i.e. nr of wqes with flush error status is one less + */ + ehca_dbg(&shca->ib_device, "qp_num=%x flusherr_wqe_cnt=%x", + qp_num, (*bad_wqe_cnt)-1); + wqe->wqef = 0; + + return 0; +} + +static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue, + struct ehca_queue_map *qmap) +{ + void *wqe_v; + u64 q_ofs; + u32 wqe_idx; + unsigned int tail_idx; + + /* convert real to abs address */ + wqe_p = wqe_p & (~(1UL << 63)); + + wqe_v = __va(wqe_p); + + if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) { + ehca_gen_err("Invalid offset for calculating left cqes " + "wqe_p=%#llx wqe_v=%p\n", wqe_p, wqe_v); + return -EFAULT; + } + + tail_idx = next_index(qmap->tail, qmap->entries); + wqe_idx = q_ofs / ipz_queue->qe_size; + + /* check all processed wqes, whether a cqe is requested or not */ + while (tail_idx != wqe_idx) { + if (qmap->map[tail_idx].cqe_req) + qmap->left_to_poll++; + tail_idx = next_index(tail_idx, qmap->entries); + } + /* save index in queue, where we have to start flushing */ + qmap->next_wqe_idx = wqe_idx; + return 0; +} + +static int check_for_left_cqes(struct ehca_qp *my_qp, struct ehca_shca *shca) +{ + u64 h_ret; + void *send_wqe_p, *recv_wqe_p; + int ret; + unsigned long flags; + int qp_num = my_qp->ib_qp.qp_num; + + /* this hcall is not supported on base QPs */ + if (my_qp->ext_type != EQPT_SRQBASE) { + /* get send and receive wqe pointer */ + h_ret = hipz_h_disable_and_get_wqe(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, &my_qp->pf, + &send_wqe_p, &recv_wqe_p, 4); + if (h_ret != H_SUCCESS) { + ehca_err(&shca->ib_device, "disable_and_get_wqe() " + "failed ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, qp_num, h_ret); + return ehca2ib_return_code(h_ret); + } + + /* + * acquire lock to ensure that nobody is polling the cq which + * could mean that the qmap->tail pointer is in an + * inconsistent state. + */ + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + ret = calc_left_cqes((u64)send_wqe_p, &my_qp->ipz_squeue, + &my_qp->sq_map); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + if (ret) + return ret; + + + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + ret = calc_left_cqes((u64)recv_wqe_p, &my_qp->ipz_rqueue, + &my_qp->rq_map); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); + if (ret) + return ret; + } else { + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + my_qp->sq_map.left_to_poll = 0; + my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, + my_qp->sq_map.entries); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + my_qp->rq_map.left_to_poll = 0; + my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, + my_qp->rq_map.entries); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); + } + + /* this assures flush cqes being generated only for pending wqes */ + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); + ehca_add_to_err_list(my_qp, 1); + spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); + + if (HAS_RQ(my_qp)) { + spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); + ehca_add_to_err_list(my_qp, 0); + spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, + flags); + } + } + + return 0; +} + +/* + * internal_modify_qp with circumvention to handle aqp0 properly + * smi_reset2init indicates if this is an internal reset-to-init-call for + * smi. This flag must always be zero if called from ehca_modify_qp()! + * This internal func was intorduced to avoid recursion of ehca_modify_qp()! + */ +static int internal_modify_qp(struct ib_qp *ibqp, + struct ib_qp_attr *attr, + int attr_mask, int smi_reset2init) +{ + enum ib_qp_state qp_cur_state, qp_new_state; + int cnt, qp_attr_idx, ret = 0; + enum ib_qp_statetrans statetrans; + struct hcp_modify_qp_control_block *mqpcb; + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = + container_of(ibqp->pd->device, struct ehca_shca, ib_device); + u64 update_mask; + u64 h_ret; + int bad_wqe_cnt = 0; + int is_user = 0; + int squeue_locked = 0; + unsigned long flags = 0; + + /* do query_qp to obtain current attr values */ + mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC); + if (!mqpcb) { + ehca_err(ibqp->device, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num); + return -ENOMEM; + } + + h_ret = hipz_h_query_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + mqpcb, my_qp->galpas.kernel); + if (h_ret != H_SUCCESS) { + ehca_err(ibqp->device, "hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, ibqp->qp_num, h_ret); + ret = ehca2ib_return_code(h_ret); + goto modify_qp_exit1; + } + if (ibqp->uobject) + is_user = 1; + + qp_cur_state = ehca2ib_qp_state(mqpcb->qp_state); + + if (qp_cur_state == -EINVAL) { /* invalid qp state */ + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid current ehca_qp_state=%x " + "ehca_qp=%p qp_num=%x", + mqpcb->qp_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + /* + * circumvention to set aqp0 initial state to init + * as expected by IB spec + */ + if (smi_reset2init == 0 && + ibqp->qp_type == IB_QPT_SMI && + qp_cur_state == IB_QPS_RESET && + (attr_mask & IB_QP_STATE) && + attr->qp_state == IB_QPS_INIT) { /* RESET -> INIT */ + struct ib_qp_attr smiqp_attr = { + .qp_state = IB_QPS_INIT, + .port_num = my_qp->init_attr.port_num, + .pkey_index = 0, + .qkey = 0 + }; + int smiqp_attr_mask = IB_QP_STATE | IB_QP_PORT | + IB_QP_PKEY_INDEX | IB_QP_QKEY; + int smirc = internal_modify_qp( + ibqp, &smiqp_attr, smiqp_attr_mask, 1); + if (smirc) { + ehca_err(ibqp->device, "SMI RESET -> INIT failed. " + "ehca_modify_qp() rc=%i", smirc); + ret = H_PARAMETER; + goto modify_qp_exit1; + } + qp_cur_state = IB_QPS_INIT; + ehca_dbg(ibqp->device, "SMI RESET -> INIT succeeded"); + } + /* is transmitted current state equal to "real" current state */ + if ((attr_mask & IB_QP_CUR_STATE) && + qp_cur_state != attr->cur_qp_state) { + ret = -EINVAL; + ehca_err(ibqp->device, + "Invalid IB_QP_CUR_STATE attr->curr_qp_state=%x <>" + " actual cur_qp_state=%x. ehca_qp=%p qp_num=%x", + attr->cur_qp_state, qp_cur_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + ehca_dbg(ibqp->device, "ehca_qp=%p qp_num=%x current qp_state=%x " + "new qp_state=%x attribute_mask=%x", + my_qp, ibqp->qp_num, qp_cur_state, attr->qp_state, attr_mask); + + qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state; + if (!smi_reset2init && + !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type, + attr_mask, IB_LINK_LAYER_UNSPECIFIED)) { + ret = -EINVAL; + ehca_err(ibqp->device, + "Invalid qp transition new_state=%x cur_state=%x " + "ehca_qp=%p qp_num=%x attr_mask=%x", qp_new_state, + qp_cur_state, my_qp, ibqp->qp_num, attr_mask); + goto modify_qp_exit1; + } + + mqpcb->qp_state = ib2ehca_qp_state(qp_new_state); + if (mqpcb->qp_state) + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1); + else { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid new qp state=%x " + "ehca_qp=%p qp_num=%x", + qp_new_state, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + /* retrieve state transition struct to get req and opt attrs */ + statetrans = get_modqp_statetrans(qp_cur_state, qp_new_state); + if (statetrans < 0) { + ret = -EINVAL; + ehca_err(ibqp->device, " qp_cur_state=%x " + "new_qp_state=%x State_xsition=%x ehca_qp=%p " + "qp_num=%x", qp_cur_state, qp_new_state, + statetrans, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + qp_attr_idx = ib2ehcaqptype(ibqp->qp_type); + + if (qp_attr_idx < 0) { + ret = qp_attr_idx; + ehca_err(ibqp->device, + "Invalid QP type=%x ehca_qp=%p qp_num=%x", + ibqp->qp_type, my_qp, ibqp->qp_num); + goto modify_qp_exit1; + } + + ehca_dbg(ibqp->device, + "ehca_qp=%p qp_num=%x qp_state_xsit=%x", + my_qp, ibqp->qp_num, statetrans); + + /* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set + * in non-LL UD QPs. + */ + if ((my_qp->qp_type == IB_QPT_UD) && + (my_qp->ext_type != EQPT_LLQP) && + (statetrans == IB_QPST_INIT2RTR) && + (shca->hw_level >= 0x22)) { + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); + mqpcb->send_grh_flag = 1; + } + + /* sqe -> rts: set purge bit of bad wqe before actual trans */ + if ((my_qp->qp_type == IB_QPT_UD || + my_qp->qp_type == IB_QPT_GSI || + my_qp->qp_type == IB_QPT_SMI) && + statetrans == IB_QPST_SQE2RTS) { + /* mark next free wqe if kernel */ + if (!ibqp->uobject) { + struct ehca_wqe *wqe; + /* lock send queue */ + spin_lock_irqsave(&my_qp->spinlock_s, flags); + squeue_locked = 1; + /* mark next free wqe */ + wqe = (struct ehca_wqe *) + ipz_qeit_get(&my_qp->ipz_squeue); + wqe->optype = wqe->wqef = 0xff; + ehca_dbg(ibqp->device, "qp_num=%x next_free_wqe=%p", + ibqp->qp_num, wqe); + } + ret = prepare_sqe_rts(my_qp, shca, &bad_wqe_cnt); + if (ret) { + ehca_err(ibqp->device, "prepare_sqe_rts() failed " + "ehca_qp=%p qp_num=%x ret=%i", + my_qp, ibqp->qp_num, ret); + goto modify_qp_exit2; + } + } + + /* + * enable RDMA_Atomic_Control if reset->init und reliable con + * this is necessary since gen2 does not provide that flag, + * but pHyp requires it + */ + if (statetrans == IB_QPST_RESET2INIT && + (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC)) { + mqpcb->rdma_atomic_ctrl = 3; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RDMA_ATOMIC_CTRL, 1); + } + /* circ. pHyp requires #RDMA/Atomic Resp Res for UC INIT -> RTR */ + if (statetrans == IB_QPST_INIT2RTR && + (ibqp->qp_type == IB_QPT_UC) && + !(attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)) { + mqpcb->rdma_nr_atomic_resp_res = 1; /* default to 1 */ + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); + } + + if (attr_mask & IB_QP_PKEY_INDEX) { + if (attr->pkey_index >= 16) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid pkey_index=%x. " + "ehca_qp=%p qp_num=%x max_pkey_index=f", + attr->pkey_index, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + mqpcb->prim_p_key_idx = attr->pkey_index; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1); + } + if (attr_mask & IB_QP_PORT) { + struct ehca_sport *sport; + struct ehca_qp *aqp1; + if (attr->port_num < 1 || attr->port_num > shca->num_ports) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid port=%x. " + "ehca_qp=%p qp_num=%x num_ports=%x", + attr->port_num, my_qp, ibqp->qp_num, + shca->num_ports); + goto modify_qp_exit2; + } + sport = &shca->sport[attr->port_num - 1]; + if (!sport->ibqp_sqp[IB_QPT_GSI]) { + /* should not occur */ + ret = -EFAULT; + ehca_err(ibqp->device, "AQP1 was not created for " + "port=%x", attr->port_num); + goto modify_qp_exit2; + } + aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI], + struct ehca_qp, ib_qp); + if (ibqp->qp_type != IB_QPT_GSI && + ibqp->qp_type != IB_QPT_SMI && + aqp1->mod_qp_parm) { + /* + * firmware will reject this modify_qp() because + * port is not activated/initialized fully + */ + ret = -EFAULT; + ehca_warn(ibqp->device, "Couldn't modify qp port=%x: " + "either port is being activated (try again) " + "or cabling issue", attr->port_num); + goto modify_qp_exit2; + } + mqpcb->prim_phys_port = attr->port_num; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1); + } + if (attr_mask & IB_QP_QKEY) { + mqpcb->qkey = attr->qkey; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_QKEY, 1); + } + if (attr_mask & IB_QP_AV) { + mqpcb->dlid = attr->ah_attr.dlid; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DLID, 1); + mqpcb->source_path_bits = attr->ah_attr.src_path_bits; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS, 1); + mqpcb->service_level = attr->ah_attr.sl; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL, 1); + + if (ehca_calc_ipd(shca, mqpcb->prim_phys_port, + attr->ah_attr.static_rate, + &mqpcb->max_static_rate)) { + ret = -EINVAL; + goto modify_qp_exit2; + } + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE, 1); + + /* + * Always supply the GRH flag, even if it's zero, to give the + * hypervisor a clear "yes" or "no" instead of a "perhaps" + */ + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1); + + /* + * only if GRH is TRUE we might consider SOURCE_GID_IDX + * and DEST_GID otherwise phype will return H_ATTR_PARM!!! + */ + if (attr->ah_attr.ah_flags == IB_AH_GRH) { + mqpcb->send_grh_flag = 1; + + mqpcb->source_gid_idx = attr->ah_attr.grh.sgid_index; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX, 1); + + for (cnt = 0; cnt < 16; cnt++) + mqpcb->dest_gid.byte[cnt] = + attr->ah_attr.grh.dgid.raw[cnt]; + + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_GID, 1); + mqpcb->flow_label = attr->ah_attr.grh.flow_label; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL, 1); + mqpcb->hop_limit = attr->ah_attr.grh.hop_limit; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT, 1); + mqpcb->traffic_class = attr->ah_attr.grh.traffic_class; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS, 1); + } + } + + if (attr_mask & IB_QP_PATH_MTU) { + /* store ld(MTU) */ + my_qp->mtu_shift = attr->path_mtu + 7; + mqpcb->path_mtu = attr->path_mtu; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1); + } + if (attr_mask & IB_QP_TIMEOUT) { + mqpcb->timeout = attr->timeout; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT, 1); + } + if (attr_mask & IB_QP_RETRY_CNT) { + mqpcb->retry_count = attr->retry_cnt; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT, 1); + } + if (attr_mask & IB_QP_RNR_RETRY) { + mqpcb->rnr_retry_count = attr->rnr_retry; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT, 1); + } + if (attr_mask & IB_QP_RQ_PSN) { + mqpcb->receive_psn = attr->rq_psn; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_RECEIVE_PSN, 1); + } + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { + mqpcb->rdma_nr_atomic_resp_res = attr->max_dest_rd_atomic < 3 ? + attr->max_dest_rd_atomic : 2; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_RDMA_NR_ATOMIC_RESP_RES, 1); + } + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + mqpcb->rdma_atomic_outst_dest_qp = attr->max_rd_atomic < 3 ? + attr->max_rd_atomic : 2; + update_mask |= + EHCA_BMASK_SET + (MQPCB_MASK_RDMA_ATOMIC_OUTST_DEST_QP, 1); + } + if (attr_mask & IB_QP_ALT_PATH) { + if (attr->alt_port_num < 1 + || attr->alt_port_num > shca->num_ports) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid alt_port=%x. " + "ehca_qp=%p qp_num=%x num_ports=%x", + attr->alt_port_num, my_qp, ibqp->qp_num, + shca->num_ports); + goto modify_qp_exit2; + } + mqpcb->alt_phys_port = attr->alt_port_num; + + if (attr->alt_pkey_index >= 16) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid alt_pkey_index=%x. " + "ehca_qp=%p qp_num=%x max_pkey_index=f", + attr->pkey_index, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + mqpcb->alt_p_key_idx = attr->alt_pkey_index; + + mqpcb->timeout_al = attr->alt_timeout; + mqpcb->dlid_al = attr->alt_ah_attr.dlid; + mqpcb->source_path_bits_al = attr->alt_ah_attr.src_path_bits; + mqpcb->service_level_al = attr->alt_ah_attr.sl; + + if (ehca_calc_ipd(shca, mqpcb->alt_phys_port, + attr->alt_ah_attr.static_rate, + &mqpcb->max_static_rate_al)) { + ret = -EINVAL; + goto modify_qp_exit2; + } + + /* OpenIB doesn't support alternate retry counts - copy them */ + mqpcb->retry_count_al = mqpcb->retry_count; + mqpcb->rnr_retry_count_al = mqpcb->rnr_retry_count; + + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_ALT_PHYS_PORT, 1) + | EHCA_BMASK_SET(MQPCB_MASK_ALT_P_KEY_IDX, 1) + | EHCA_BMASK_SET(MQPCB_MASK_TIMEOUT_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_DLID_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_SOURCE_PATH_BITS_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_SERVICE_LEVEL_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_MAX_STATIC_RATE_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_RETRY_COUNT_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_RNR_RETRY_COUNT_AL, 1); + + /* + * Always supply the GRH flag, even if it's zero, to give the + * hypervisor a clear "yes" or "no" instead of a "perhaps" + */ + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG_AL, 1); + + /* + * only if GRH is TRUE we might consider SOURCE_GID_IDX + * and DEST_GID otherwise phype will return H_ATTR_PARM!!! + */ + if (attr->alt_ah_attr.ah_flags == IB_AH_GRH) { + mqpcb->send_grh_flag_al = 1; + + for (cnt = 0; cnt < 16; cnt++) + mqpcb->dest_gid_al.byte[cnt] = + attr->alt_ah_attr.grh.dgid.raw[cnt]; + mqpcb->source_gid_idx_al = + attr->alt_ah_attr.grh.sgid_index; + mqpcb->flow_label_al = attr->alt_ah_attr.grh.flow_label; + mqpcb->hop_limit_al = attr->alt_ah_attr.grh.hop_limit; + mqpcb->traffic_class_al = + attr->alt_ah_attr.grh.traffic_class; + + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_SOURCE_GID_IDX_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_DEST_GID_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_FLOW_LABEL_AL, 1) + | EHCA_BMASK_SET(MQPCB_MASK_HOP_LIMIT_AL, 1) | + EHCA_BMASK_SET(MQPCB_MASK_TRAFFIC_CLASS_AL, 1); + } + } + + if (attr_mask & IB_QP_MIN_RNR_TIMER) { + mqpcb->min_rnr_nak_timer_field = attr->min_rnr_timer; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MIN_RNR_NAK_TIMER_FIELD, 1); + } + + if (attr_mask & IB_QP_SQ_PSN) { + mqpcb->send_psn = attr->sq_psn; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_PSN, 1); + } + + if (attr_mask & IB_QP_DEST_QPN) { + mqpcb->dest_qp_nr = attr->dest_qp_num; + update_mask |= EHCA_BMASK_SET(MQPCB_MASK_DEST_QP_NR, 1); + } + + if (attr_mask & IB_QP_PATH_MIG_STATE) { + if (attr->path_mig_state != IB_MIG_REARM + && attr->path_mig_state != IB_MIG_MIGRATED) { + ret = -EINVAL; + ehca_err(ibqp->device, "Invalid mig_state=%x", + attr->path_mig_state); + goto modify_qp_exit2; + } + mqpcb->path_migration_state = attr->path_mig_state + 1; + if (attr->path_mig_state == IB_MIG_REARM) + my_qp->mig_armed = 1; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_PATH_MIGRATION_STATE, 1); + } + + if (attr_mask & IB_QP_CAP) { + mqpcb->max_nr_outst_send_wr = attr->cap.max_send_wr+1; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_SEND_WR, 1); + mqpcb->max_nr_outst_recv_wr = attr->cap.max_recv_wr+1; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_MAX_NR_OUTST_RECV_WR, 1); + /* no support for max_send/recv_sge yet */ + } + + if (ehca_debug_level >= 2) + ehca_dmp(mqpcb, 4*70, "qp_num=%x", ibqp->qp_num); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(ibqp->device, "hipz_h_modify_qp() failed h_ret=%lli " + "ehca_qp=%p qp_num=%x", h_ret, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + + if ((my_qp->qp_type == IB_QPT_UD || + my_qp->qp_type == IB_QPT_GSI || + my_qp->qp_type == IB_QPT_SMI) && + statetrans == IB_QPST_SQE2RTS) { + /* doorbell to reprocessing wqes */ + iosync(); /* serialize GAL register access */ + hipz_update_sqa(my_qp, bad_wqe_cnt-1); + ehca_gen_dbg("doorbell for %x wqes", bad_wqe_cnt); + } + + if (statetrans == IB_QPST_RESET2INIT || + statetrans == IB_QPST_INIT2INIT) { + mqpcb->qp_enable = 1; + mqpcb->qp_state = EHCA_QPS_INIT; + update_mask = 0; + update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + update_mask, + mqpcb, + my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(ibqp->device, "ENABLE in context of " + "RESET_2_INIT failed! Maybe you didn't get " + "a LID h_ret=%lli ehca_qp=%p qp_num=%x", + h_ret, my_qp, ibqp->qp_num); + goto modify_qp_exit2; + } + } + if ((qp_new_state == IB_QPS_ERR) && (qp_cur_state != IB_QPS_ERR) + && !is_user) { + ret = check_for_left_cqes(my_qp, shca); + if (ret) + goto modify_qp_exit2; + } + + if (statetrans == IB_QPST_ANY2RESET) { + ipz_qeit_reset(&my_qp->ipz_rqueue); + ipz_qeit_reset(&my_qp->ipz_squeue); + + if (qp_cur_state == IB_QPS_ERR && !is_user) { + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); + + if (HAS_RQ(my_qp)) + del_from_err_list(my_qp->recv_cq, + &my_qp->rq_err_node); + } + if (!is_user) + reset_queue_map(&my_qp->sq_map); + + if (HAS_RQ(my_qp) && !is_user) + reset_queue_map(&my_qp->rq_map); + } + + if (attr_mask & IB_QP_QKEY) + my_qp->qkey = attr->qkey; + +modify_qp_exit2: + if (squeue_locked) { /* this means: sqe -> rts */ + spin_unlock_irqrestore(&my_qp->spinlock_s, flags); + my_qp->sqerr_purgeflag = 1; + } + +modify_qp_exit1: + ehca_free_fw_ctrlblock(mqpcb); + + return ret; +} + +int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata) +{ + int ret = 0; + + struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca, + ib_device); + struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp); + + /* The if-block below caches qp_attr to be modified for GSI and SMI + * qps during the initialization by ib_mad. When the respective port + * is activated, ie we got an event PORT_ACTIVE, we'll replay the + * cached modify calls sequence, see ehca_recover_sqs() below. + * Why that is required: + * 1) If one port is connected, older code requires that port one + * to be connected and module option nr_ports=1 to be given by + * user, which is very inconvenient for end user. + * 2) Firmware accepts modify_qp() only if respective port has become + * active. Older code had a wait loop of 30sec create_qp()/ + * define_aqp1(), which is not appropriate in practice. This + * code now removes that wait loop, see define_aqp1(), and always + * reports all ports to ib_mad resp. users. Only activated ports + * will then usable for the users. + */ + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) { + int port = my_qp->init_attr.port_num; + struct ehca_sport *sport = &shca->sport[port - 1]; + unsigned long flags; + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + /* cache qp_attr only during init */ + if (my_qp->mod_qp_parm) { + struct ehca_mod_qp_parm *p; + if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) { + ehca_err(&shca->ib_device, + "mod_qp_parm overflow state=%x port=%x" + " type=%x", attr->qp_state, + my_qp->init_attr.port_num, + ibqp->qp_type); + spin_unlock_irqrestore(&sport->mod_sqp_lock, + flags); + return -EINVAL; + } + p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx]; + p->mask = attr_mask; + p->attr = *attr; + my_qp->mod_qp_parm_idx++; + ehca_dbg(&shca->ib_device, + "Saved qp_attr for state=%x port=%x type=%x", + attr->qp_state, my_qp->init_attr.port_num, + ibqp->qp_type); + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + goto out; + } + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + } + + ret = internal_modify_qp(ibqp, attr, attr_mask, 0); + +out: + if ((ret == 0) && (attr_mask & IB_QP_STATE)) + my_qp->state = attr->qp_state; + + return ret; +} + +void ehca_recover_sqp(struct ib_qp *sqp) +{ + struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp); + int port = my_sqp->init_attr.port_num; + struct ib_qp_attr attr; + struct ehca_mod_qp_parm *qp_parm; + int i, qp_parm_idx, ret; + unsigned long flags, wr_cnt; + + if (!my_sqp->mod_qp_parm) + return; + ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num); + + qp_parm = my_sqp->mod_qp_parm; + qp_parm_idx = my_sqp->mod_qp_parm_idx; + for (i = 0; i < qp_parm_idx; i++) { + attr = qp_parm[i].attr; + ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0); + if (ret) { + ehca_err(sqp->device, "Could not modify SQP port=%x " + "qp_num=%x ret=%x", port, sqp->qp_num, ret); + goto free_qp_parm; + } + ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x", + port, sqp->qp_num, attr.qp_state); + } + + /* re-trigger posted recv wrs */ + wr_cnt = my_sqp->ipz_rqueue.current_q_offset / + my_sqp->ipz_rqueue.qe_size; + if (wr_cnt) { + spin_lock_irqsave(&my_sqp->spinlock_r, flags); + hipz_update_rqa(my_sqp, wr_cnt); + spin_unlock_irqrestore(&my_sqp->spinlock_r, flags); + ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx", + port, sqp->qp_num, wr_cnt); + } + +free_qp_parm: + kfree(qp_parm); + /* this prevents subsequent calls to modify_qp() to cache qp_attr */ + my_sqp->mod_qp_parm = NULL; +} + +int ehca_query_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) +{ + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + struct ehca_shca *shca = container_of(qp->device, struct ehca_shca, + ib_device); + struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; + struct hcp_modify_qp_control_block *qpcb; + int cnt, ret = 0; + u64 h_ret; + + if (qp_attr_mask & QP_ATTR_QUERY_NOT_SUPPORTED) { + ehca_err(qp->device, "Invalid attribute mask " + "ehca_qp=%p qp_num=%x qp_attr_mask=%x ", + my_qp, qp->qp_num, qp_attr_mask); + return -EINVAL; + } + + qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!qpcb) { + ehca_err(qp->device, "Out of memory for qpcb " + "ehca_qp=%p qp_num=%x", my_qp, qp->qp_num); + return -ENOMEM; + } + + h_ret = hipz_h_query_qp(adapter_handle, + my_qp->ipz_qp_handle, + &my_qp->pf, + qpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(qp->device, "hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, qp->qp_num, h_ret); + goto query_qp_exit1; + } + + qp_attr->cur_qp_state = ehca2ib_qp_state(qpcb->qp_state); + qp_attr->qp_state = qp_attr->cur_qp_state; + + if (qp_attr->cur_qp_state == -EINVAL) { + ret = -EINVAL; + ehca_err(qp->device, "Got invalid ehca_qp_state=%x " + "ehca_qp=%p qp_num=%x", + qpcb->qp_state, my_qp, qp->qp_num); + goto query_qp_exit1; + } + + if (qp_attr->qp_state == IB_QPS_SQD) + qp_attr->sq_draining = 1; + + qp_attr->qkey = qpcb->qkey; + qp_attr->path_mtu = qpcb->path_mtu; + qp_attr->path_mig_state = qpcb->path_migration_state - 1; + qp_attr->rq_psn = qpcb->receive_psn; + qp_attr->sq_psn = qpcb->send_psn; + qp_attr->min_rnr_timer = qpcb->min_rnr_nak_timer_field; + qp_attr->cap.max_send_wr = qpcb->max_nr_outst_send_wr-1; + qp_attr->cap.max_recv_wr = qpcb->max_nr_outst_recv_wr-1; + /* UD_AV CIRCUMVENTION */ + if (my_qp->qp_type == IB_QPT_UD) { + qp_attr->cap.max_send_sge = + qpcb->actual_nr_sges_in_sq_wqe - 2; + qp_attr->cap.max_recv_sge = + qpcb->actual_nr_sges_in_rq_wqe - 2; + } else { + qp_attr->cap.max_send_sge = + qpcb->actual_nr_sges_in_sq_wqe; + qp_attr->cap.max_recv_sge = + qpcb->actual_nr_sges_in_rq_wqe; + } + + qp_attr->cap.max_inline_data = my_qp->sq_max_inline_data_size; + qp_attr->dest_qp_num = qpcb->dest_qp_nr; + + qp_attr->pkey_index = qpcb->prim_p_key_idx; + qp_attr->port_num = qpcb->prim_phys_port; + qp_attr->timeout = qpcb->timeout; + qp_attr->retry_cnt = qpcb->retry_count; + qp_attr->rnr_retry = qpcb->rnr_retry_count; + + qp_attr->alt_pkey_index = qpcb->alt_p_key_idx; + qp_attr->alt_port_num = qpcb->alt_phys_port; + qp_attr->alt_timeout = qpcb->timeout_al; + + qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res; + qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp; + + /* primary av */ + qp_attr->ah_attr.sl = qpcb->service_level; + + if (qpcb->send_grh_flag) { + qp_attr->ah_attr.ah_flags = IB_AH_GRH; + } + + qp_attr->ah_attr.static_rate = qpcb->max_static_rate; + qp_attr->ah_attr.dlid = qpcb->dlid; + qp_attr->ah_attr.src_path_bits = qpcb->source_path_bits; + qp_attr->ah_attr.port_num = qp_attr->port_num; + + /* primary GRH */ + qp_attr->ah_attr.grh.traffic_class = qpcb->traffic_class; + qp_attr->ah_attr.grh.hop_limit = qpcb->hop_limit; + qp_attr->ah_attr.grh.sgid_index = qpcb->source_gid_idx; + qp_attr->ah_attr.grh.flow_label = qpcb->flow_label; + + for (cnt = 0; cnt < 16; cnt++) + qp_attr->ah_attr.grh.dgid.raw[cnt] = + qpcb->dest_gid.byte[cnt]; + + /* alternate AV */ + qp_attr->alt_ah_attr.sl = qpcb->service_level_al; + if (qpcb->send_grh_flag_al) { + qp_attr->alt_ah_attr.ah_flags = IB_AH_GRH; + } + + qp_attr->alt_ah_attr.static_rate = qpcb->max_static_rate_al; + qp_attr->alt_ah_attr.dlid = qpcb->dlid_al; + qp_attr->alt_ah_attr.src_path_bits = qpcb->source_path_bits_al; + + /* alternate GRH */ + qp_attr->alt_ah_attr.grh.traffic_class = qpcb->traffic_class_al; + qp_attr->alt_ah_attr.grh.hop_limit = qpcb->hop_limit_al; + qp_attr->alt_ah_attr.grh.sgid_index = qpcb->source_gid_idx_al; + qp_attr->alt_ah_attr.grh.flow_label = qpcb->flow_label_al; + + for (cnt = 0; cnt < 16; cnt++) + qp_attr->alt_ah_attr.grh.dgid.raw[cnt] = + qpcb->dest_gid_al.byte[cnt]; + + /* return init attributes given in ehca_create_qp */ + if (qp_init_attr) + *qp_init_attr = my_qp->init_attr; + + if (ehca_debug_level >= 2) + ehca_dmp(qpcb, 4*70, "qp_num=%x", qp->qp_num); + +query_qp_exit1: + ehca_free_fw_ctrlblock(qpcb); + + return ret; +} + +int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) +{ + struct ehca_qp *my_qp = + container_of(ibsrq, struct ehca_qp, ib_srq); + struct ehca_shca *shca = + container_of(ibsrq->pd->device, struct ehca_shca, ib_device); + struct hcp_modify_qp_control_block *mqpcb; + u64 update_mask; + u64 h_ret; + int ret = 0; + + mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!mqpcb) { + ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb " + "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num); + return -ENOMEM; + } + + update_mask = 0; + if (attr_mask & IB_SRQ_LIMIT) { + attr_mask &= ~IB_SRQ_LIMIT; + update_mask |= + EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1) + | EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1); + mqpcb->curr_srq_limit = attr->srq_limit; + mqpcb->qp_aff_asyn_ev_log_reg = + EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1); + } + + /* by now, all bits in attr_mask should have been cleared */ + if (attr_mask) { + ehca_err(ibsrq->device, "invalid attribute mask bits set " + "attr_mask=%x", attr_mask); + ret = -EINVAL; + goto modify_srq_exit0; + } + + if (ehca_debug_level >= 2) + ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); + + h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle, + NULL, update_mask, mqpcb, + my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(ibsrq->device, "hipz_h_modify_qp() failed h_ret=%lli " + "ehca_qp=%p qp_num=%x", + h_ret, my_qp, my_qp->real_qp_num); + } + +modify_srq_exit0: + ehca_free_fw_ctrlblock(mqpcb); + + return ret; +} + +int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr) +{ + struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq); + struct ehca_shca *shca = container_of(srq->device, struct ehca_shca, + ib_device); + struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle; + struct hcp_modify_qp_control_block *qpcb; + int ret = 0; + u64 h_ret; + + qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL); + if (!qpcb) { + ehca_err(srq->device, "Out of memory for qpcb " + "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num); + return -ENOMEM; + } + + h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle, + NULL, qpcb, my_qp->galpas.kernel); + + if (h_ret != H_SUCCESS) { + ret = ehca2ib_return_code(h_ret); + ehca_err(srq->device, "hipz_h_query_qp() failed " + "ehca_qp=%p qp_num=%x h_ret=%lli", + my_qp, my_qp->real_qp_num, h_ret); + goto query_srq_exit1; + } + + srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1; + srq_attr->max_sge = 3; + srq_attr->srq_limit = qpcb->curr_srq_limit; + + if (ehca_debug_level >= 2) + ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num); + +query_srq_exit1: + ehca_free_fw_ctrlblock(qpcb); + + return ret; +} + +static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp, + struct ib_uobject *uobject) +{ + struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device); + struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd, + ib_pd); + struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1]; + u32 qp_num = my_qp->real_qp_num; + int ret; + u64 h_ret; + u8 port_num; + int is_user = 0; + enum ib_qp_type qp_type; + unsigned long flags; + + if (uobject) { + is_user = 1; + if (my_qp->mm_count_galpa || + my_qp->mm_count_rqueue || my_qp->mm_count_squeue) { + ehca_err(dev, "Resources still referenced in " + "user space qp_num=%x", qp_num); + return -EINVAL; + } + } + + if (my_qp->send_cq) { + ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num); + if (ret) { + ehca_err(dev, "Couldn't unassign qp from " + "send_cq ret=%i qp_num=%x cq_num=%x", ret, + qp_num, my_qp->send_cq->cq_number); + return ret; + } + } + + write_lock_irqsave(&ehca_qp_idr_lock, flags); + idr_remove(&ehca_qp_idr, my_qp->token); + write_unlock_irqrestore(&ehca_qp_idr_lock, flags); + + /* + * SRQs will never get into an error list and do not have a recv_cq, + * so we need to skip them here. + */ + if (HAS_RQ(my_qp) && !IS_SRQ(my_qp) && !is_user) + del_from_err_list(my_qp->recv_cq, &my_qp->rq_err_node); + + if (HAS_SQ(my_qp) && !is_user) + del_from_err_list(my_qp->send_cq, &my_qp->sq_err_node); + + /* now wait until all pending events have completed */ + wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events)); + + h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp); + if (h_ret != H_SUCCESS) { + ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%lli " + "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num); + return ehca2ib_return_code(h_ret); + } + + port_num = my_qp->init_attr.port_num; + qp_type = my_qp->init_attr.qp_type; + + if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) { + spin_lock_irqsave(&sport->mod_sqp_lock, flags); + kfree(my_qp->mod_qp_parm); + my_qp->mod_qp_parm = NULL; + shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL; + spin_unlock_irqrestore(&sport->mod_sqp_lock, flags); + } + + /* no support for IB_QPT_SMI yet */ + if (qp_type == IB_QPT_GSI) { + struct ib_event event; + ehca_info(dev, "device %s: port %x is inactive.", + shca->ib_device.name, port_num); + event.device = &shca->ib_device; + event.event = IB_EVENT_PORT_ERR; + event.element.port_num = port_num; + shca->sport[port_num - 1].port_state = IB_PORT_DOWN; + ib_dispatch_event(&event); + } + + if (HAS_RQ(my_qp)) { + ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue); + if (!is_user) + vfree(my_qp->rq_map.map); + } + if (HAS_SQ(my_qp)) { + ipz_queue_dtor(my_pd, &my_qp->ipz_squeue); + if (!is_user) + vfree(my_qp->sq_map.map); + } + kmem_cache_free(qp_cache, my_qp); + atomic_dec(&shca->num_qps); + return 0; +} + +int ehca_destroy_qp(struct ib_qp *qp) +{ + return internal_destroy_qp(qp->device, + container_of(qp, struct ehca_qp, ib_qp), + qp->uobject); +} + +int ehca_destroy_srq(struct ib_srq *srq) +{ + return internal_destroy_qp(srq->device, + container_of(srq, struct ehca_qp, ib_srq), + srq->uobject); +} + +int ehca_init_qp_cache(void) +{ + qp_cache = kmem_cache_create("ehca_cache_qp", + sizeof(struct ehca_qp), 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!qp_cache) + return -ENOMEM; + return 0; +} + +void ehca_cleanup_qp_cache(void) +{ + if (qp_cache) + kmem_cache_destroy(qp_cache); +} diff --git a/drivers/staging/rdma/ehca/ehca_reqs.c b/drivers/staging/rdma/ehca/ehca_reqs.c new file mode 100644 index 000000000000..47f94984353d --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_reqs.c @@ -0,0 +1,953 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * post_send/recv, poll_cq, req_notify + * + * Authors: Hoang-Nam Nguyen + * Waleri Fomin + * Joachim Fenkes + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_qes.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" +#include "hipz_fns.h" + +/* in RC traffic, insert an empty RDMA READ every this many packets */ +#define ACK_CIRC_THRESHOLD 2000000 + +static u64 replace_wr_id(u64 wr_id, u16 idx) +{ + u64 ret; + + ret = wr_id & ~QMAP_IDX_MASK; + ret |= idx & QMAP_IDX_MASK; + + return ret; +} + +static u16 get_app_wr_id(u64 wr_id) +{ + return wr_id & QMAP_IDX_MASK; +} + +static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue, + struct ehca_wqe *wqe_p, + struct ib_recv_wr *recv_wr, + u32 rq_map_idx) +{ + u8 cnt_ds; + if (unlikely((recv_wr->num_sge < 0) || + (recv_wr->num_sge > ipz_rqueue->act_nr_of_sg))) { + ehca_gen_err("Invalid number of WQE SGE. " + "num_sqe=%x max_nr_of_sg=%x", + recv_wr->num_sge, ipz_rqueue->act_nr_of_sg); + return -EINVAL; /* invalid SG list length */ + } + + /* clear wqe header until sglist */ + memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); + + wqe_p->work_request_id = replace_wr_id(recv_wr->wr_id, rq_map_idx); + wqe_p->nr_of_data_seg = recv_wr->num_sge; + + for (cnt_ds = 0; cnt_ds < recv_wr->num_sge; cnt_ds++) { + wqe_p->u.all_rcv.sg_list[cnt_ds].vaddr = + recv_wr->sg_list[cnt_ds].addr; + wqe_p->u.all_rcv.sg_list[cnt_ds].lkey = + recv_wr->sg_list[cnt_ds].lkey; + wqe_p->u.all_rcv.sg_list[cnt_ds].length = + recv_wr->sg_list[cnt_ds].length; + } + + if (ehca_debug_level >= 3) { + ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p", + ipz_rqueue); + ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe"); + } + + return 0; +} + +#if defined(DEBUG_GSI_SEND_WR) + +/* need ib_mad struct */ +#include + +static void trace_send_wr_ud(const struct ib_send_wr *send_wr) +{ + int idx; + int j; + while (send_wr) { + struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr; + struct ib_sge *sge = send_wr->sg_list; + ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x " + "send_flags=%x opcode=%x", idx, send_wr->wr_id, + send_wr->num_sge, send_wr->send_flags, + send_wr->opcode); + if (mad_hdr) { + ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x " + "mgmt_class=%x class_version=%x method=%x " + "status=%x class_specific=%x tid=%lx " + "attr_id=%x resv=%x attr_mod=%x", + idx, mad_hdr->base_version, + mad_hdr->mgmt_class, + mad_hdr->class_version, mad_hdr->method, + mad_hdr->status, mad_hdr->class_specific, + mad_hdr->tid, mad_hdr->attr_id, + mad_hdr->resv, + mad_hdr->attr_mod); + } + for (j = 0; j < send_wr->num_sge; j++) { + u8 *data = __va(sge->addr); + ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " + "lkey=%x", + idx, j, data, sge->length, sge->lkey); + /* assume length is n*16 */ + ehca_dmp(data, sge->length, "send_wr#%x sge#%x", + idx, j); + sge++; + } /* eof for j */ + idx++; + send_wr = send_wr->next; + } /* eof while send_wr */ +} + +#endif /* DEBUG_GSI_SEND_WR */ + +static inline int ehca_write_swqe(struct ehca_qp *qp, + struct ehca_wqe *wqe_p, + const struct ib_send_wr *send_wr, + u32 sq_map_idx, + int hidden) +{ + u32 idx; + u64 dma_length; + struct ehca_av *my_av; + u32 remote_qkey = send_wr->wr.ud.remote_qkey; + struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx]; + + if (unlikely((send_wr->num_sge < 0) || + (send_wr->num_sge > qp->ipz_squeue.act_nr_of_sg))) { + ehca_gen_err("Invalid number of WQE SGE. " + "num_sqe=%x max_nr_of_sg=%x", + send_wr->num_sge, qp->ipz_squeue.act_nr_of_sg); + return -EINVAL; /* invalid SG list length */ + } + + /* clear wqe header until sglist */ + memset(wqe_p, 0, offsetof(struct ehca_wqe, u.ud_av.sg_list)); + + wqe_p->work_request_id = replace_wr_id(send_wr->wr_id, sq_map_idx); + + qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); + qmap_entry->reported = 0; + qmap_entry->cqe_req = 0; + + switch (send_wr->opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + wqe_p->optype = WQE_OPTYPE_SEND; + break; + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + wqe_p->optype = WQE_OPTYPE_RDMAWRITE; + break; + case IB_WR_RDMA_READ: + wqe_p->optype = WQE_OPTYPE_RDMAREAD; + break; + default: + ehca_gen_err("Invalid opcode=%x", send_wr->opcode); + return -EINVAL; /* invalid opcode */ + } + + wqe_p->wqef = (send_wr->opcode) & WQEF_HIGH_NIBBLE; + + wqe_p->wr_flag = 0; + + if ((send_wr->send_flags & IB_SEND_SIGNALED || + qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR) + && !hidden) { + wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; + qmap_entry->cqe_req = 1; + } + + if (send_wr->opcode == IB_WR_SEND_WITH_IMM || + send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { + /* this might not work as long as HW does not support it */ + wqe_p->immediate_data = be32_to_cpu(send_wr->ex.imm_data); + wqe_p->wr_flag |= WQE_WRFLAG_IMM_DATA_PRESENT; + } + + wqe_p->nr_of_data_seg = send_wr->num_sge; + + switch (qp->qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + /* no break is intential here */ + case IB_QPT_UD: + /* IB 1.2 spec C10-15 compliance */ + if (send_wr->wr.ud.remote_qkey & 0x80000000) + remote_qkey = qp->qkey; + + wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8; + wqe_p->local_ee_context_qkey = remote_qkey; + if (unlikely(!send_wr->wr.ud.ah)) { + ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp); + return -EINVAL; + } + if (unlikely(send_wr->wr.ud.remote_qpn == 0)) { + ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num); + return -EINVAL; + } + my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah); + wqe_p->u.ud_av.ud_av = my_av->av; + + /* + * omitted check of IB_SEND_INLINE + * since HW does not support it + */ + for (idx = 0; idx < send_wr->num_sge; idx++) { + wqe_p->u.ud_av.sg_list[idx].vaddr = + send_wr->sg_list[idx].addr; + wqe_p->u.ud_av.sg_list[idx].lkey = + send_wr->sg_list[idx].lkey; + wqe_p->u.ud_av.sg_list[idx].length = + send_wr->sg_list[idx].length; + } /* eof for idx */ + if (qp->qp_type == IB_QPT_SMI || + qp->qp_type == IB_QPT_GSI) + wqe_p->u.ud_av.ud_av.pmtu = 1; + if (qp->qp_type == IB_QPT_GSI) { + wqe_p->pkeyi = send_wr->wr.ud.pkey_index; +#ifdef DEBUG_GSI_SEND_WR + trace_send_wr_ud(send_wr); +#endif /* DEBUG_GSI_SEND_WR */ + } + break; + + case IB_QPT_UC: + if (send_wr->send_flags & IB_SEND_FENCE) + wqe_p->wr_flag |= WQE_WRFLAG_FENCE; + /* no break is intentional here */ + case IB_QPT_RC: + /* TODO: atomic not implemented */ + wqe_p->u.nud.remote_virtual_address = + send_wr->wr.rdma.remote_addr; + wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey; + + /* + * omitted checking of IB_SEND_INLINE + * since HW does not support it + */ + dma_length = 0; + for (idx = 0; idx < send_wr->num_sge; idx++) { + wqe_p->u.nud.sg_list[idx].vaddr = + send_wr->sg_list[idx].addr; + wqe_p->u.nud.sg_list[idx].lkey = + send_wr->sg_list[idx].lkey; + wqe_p->u.nud.sg_list[idx].length = + send_wr->sg_list[idx].length; + dma_length += send_wr->sg_list[idx].length; + } /* eof idx */ + wqe_p->u.nud.atomic_1st_op_dma_len = dma_length; + + /* unsolicited ack circumvention */ + if (send_wr->opcode == IB_WR_RDMA_READ) { + /* on RDMA read, switch on and reset counters */ + qp->message_count = qp->packet_count = 0; + qp->unsol_ack_circ = 1; + } else + /* else estimate #packets */ + qp->packet_count += (dma_length >> qp->mtu_shift) + 1; + + break; + + default: + ehca_gen_err("Invalid qptype=%x", qp->qp_type); + return -EINVAL; + } + + if (ehca_debug_level >= 3) { + ehca_gen_dbg("SEND WQE written into queue qp=%p ", qp); + ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "send wqe"); + } + return 0; +} + +/* map_ib_wc_status converts raw cqe_status to ib_wc_status */ +static inline void map_ib_wc_status(u32 cqe_status, + enum ib_wc_status *wc_status) +{ + if (unlikely(cqe_status & WC_STATUS_ERROR_BIT)) { + switch (cqe_status & 0x3F) { + case 0x01: + case 0x21: + *wc_status = IB_WC_LOC_LEN_ERR; + break; + case 0x02: + case 0x22: + *wc_status = IB_WC_LOC_QP_OP_ERR; + break; + case 0x03: + case 0x23: + *wc_status = IB_WC_LOC_EEC_OP_ERR; + break; + case 0x04: + case 0x24: + *wc_status = IB_WC_LOC_PROT_ERR; + break; + case 0x05: + case 0x25: + *wc_status = IB_WC_WR_FLUSH_ERR; + break; + case 0x06: + *wc_status = IB_WC_MW_BIND_ERR; + break; + case 0x07: /* remote error - look into bits 20:24 */ + switch ((cqe_status + & WC_STATUS_REMOTE_ERROR_FLAGS) >> 11) { + case 0x0: + /* + * PSN Sequence Error! + * couldn't find a matching status! + */ + *wc_status = IB_WC_GENERAL_ERR; + break; + case 0x1: + *wc_status = IB_WC_REM_INV_REQ_ERR; + break; + case 0x2: + *wc_status = IB_WC_REM_ACCESS_ERR; + break; + case 0x3: + *wc_status = IB_WC_REM_OP_ERR; + break; + case 0x4: + *wc_status = IB_WC_REM_INV_RD_REQ_ERR; + break; + } + break; + case 0x08: + *wc_status = IB_WC_RETRY_EXC_ERR; + break; + case 0x09: + *wc_status = IB_WC_RNR_RETRY_EXC_ERR; + break; + case 0x0A: + case 0x2D: + *wc_status = IB_WC_REM_ABORT_ERR; + break; + case 0x0B: + case 0x2E: + *wc_status = IB_WC_INV_EECN_ERR; + break; + case 0x0C: + case 0x2F: + *wc_status = IB_WC_INV_EEC_STATE_ERR; + break; + case 0x0D: + *wc_status = IB_WC_BAD_RESP_ERR; + break; + case 0x10: + /* WQE purged */ + *wc_status = IB_WC_WR_FLUSH_ERR; + break; + default: + *wc_status = IB_WC_FATAL_ERR; + + } + } else + *wc_status = IB_WC_SUCCESS; +} + +static inline int post_one_send(struct ehca_qp *my_qp, + struct ib_send_wr *cur_send_wr, + int hidden) +{ + struct ehca_wqe *wqe_p; + int ret; + u32 sq_map_idx; + u64 start_offset = my_qp->ipz_squeue.current_q_offset; + + /* get pointer next to free WQE */ + wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue); + if (unlikely(!wqe_p)) { + /* too many posted work requests: queue overflow */ + ehca_err(my_qp->ib_qp.device, "Too many posted WQEs " + "qp_num=%x", my_qp->ib_qp.qp_num); + return -ENOMEM; + } + + /* + * Get the index of the WQE in the send queue. The same index is used + * for writing into the sq_map. + */ + sq_map_idx = start_offset / my_qp->ipz_squeue.qe_size; + + /* write a SEND WQE into the QUEUE */ + ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, sq_map_idx, hidden); + /* + * if something failed, + * reset the free entry pointer to the start value + */ + if (unlikely(ret)) { + my_qp->ipz_squeue.current_q_offset = start_offset; + ehca_err(my_qp->ib_qp.device, "Could not write WQE " + "qp_num=%x", my_qp->ib_qp.qp_num); + return -EINVAL; + } + + return 0; +} + +int ehca_post_send(struct ib_qp *qp, + struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr) +{ + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + int wqe_cnt = 0; + int ret = 0; + unsigned long flags; + + /* Reject WR if QP is in RESET, INIT or RTR state */ + if (unlikely(my_qp->state < IB_QPS_RTS)) { + ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", + my_qp->state, qp->qp_num); + ret = -EINVAL; + goto out; + } + + /* LOCK the QUEUE */ + spin_lock_irqsave(&my_qp->spinlock_s, flags); + + /* Send an empty extra RDMA read if: + * 1) there has been an RDMA read on this connection before + * 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets + * 3) we can be sure that any previous extra RDMA read has been + * processed so we don't overflow the SQ + */ + if (unlikely(my_qp->unsol_ack_circ && + my_qp->packet_count > ACK_CIRC_THRESHOLD && + my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) { + /* insert an empty RDMA READ to fix up the remote QP state */ + struct ib_send_wr circ_wr; + memset(&circ_wr, 0, sizeof(circ_wr)); + circ_wr.opcode = IB_WR_RDMA_READ; + post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */ + wqe_cnt++; + ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num); + my_qp->message_count = my_qp->packet_count = 0; + } + + /* loop processes list of send reqs */ + while (send_wr) { + ret = post_one_send(my_qp, send_wr, 0); + if (unlikely(ret)) { + goto post_send_exit0; + } + wqe_cnt++; + send_wr = send_wr->next; + } + +post_send_exit0: + iosync(); /* serialize GAL register access */ + hipz_update_sqa(my_qp, wqe_cnt); + if (unlikely(ret || ehca_debug_level >= 2)) + ehca_dbg(qp->device, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", + my_qp, qp->qp_num, wqe_cnt, ret); + my_qp->message_count += wqe_cnt; + spin_unlock_irqrestore(&my_qp->spinlock_s, flags); + +out: + if (ret) + *bad_send_wr = send_wr; + return ret; +} + +static int internal_post_recv(struct ehca_qp *my_qp, + struct ib_device *dev, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + struct ehca_wqe *wqe_p; + int wqe_cnt = 0; + int ret = 0; + u32 rq_map_idx; + unsigned long flags; + struct ehca_qmap_entry *qmap_entry; + + if (unlikely(!HAS_RQ(my_qp))) { + ehca_err(dev, "QP has no RQ ehca_qp=%p qp_num=%x ext_type=%d", + my_qp, my_qp->real_qp_num, my_qp->ext_type); + ret = -ENODEV; + goto out; + } + + /* LOCK the QUEUE */ + spin_lock_irqsave(&my_qp->spinlock_r, flags); + + /* loop processes list of recv reqs */ + while (recv_wr) { + u64 start_offset = my_qp->ipz_rqueue.current_q_offset; + /* get pointer next to free WQE */ + wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue); + if (unlikely(!wqe_p)) { + /* too many posted work requests: queue overflow */ + ret = -ENOMEM; + ehca_err(dev, "Too many posted WQEs " + "qp_num=%x", my_qp->real_qp_num); + goto post_recv_exit0; + } + /* + * Get the index of the WQE in the recv queue. The same index + * is used for writing into the rq_map. + */ + rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size; + + /* write a RECV WQE into the QUEUE */ + ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr, + rq_map_idx); + /* + * if something failed, + * reset the free entry pointer to the start value + */ + if (unlikely(ret)) { + my_qp->ipz_rqueue.current_q_offset = start_offset; + ret = -EINVAL; + ehca_err(dev, "Could not write WQE " + "qp_num=%x", my_qp->real_qp_num); + goto post_recv_exit0; + } + + qmap_entry = &my_qp->rq_map.map[rq_map_idx]; + qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id); + qmap_entry->reported = 0; + qmap_entry->cqe_req = 1; + + wqe_cnt++; + recv_wr = recv_wr->next; + } /* eof for recv_wr */ + +post_recv_exit0: + iosync(); /* serialize GAL register access */ + hipz_update_rqa(my_qp, wqe_cnt); + if (unlikely(ret || ehca_debug_level >= 2)) + ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i", + my_qp, my_qp->real_qp_num, wqe_cnt, ret); + spin_unlock_irqrestore(&my_qp->spinlock_r, flags); + +out: + if (ret) + *bad_recv_wr = recv_wr; + + return ret; +} + +int ehca_post_recv(struct ib_qp *qp, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp); + + /* Reject WR if QP is in RESET state */ + if (unlikely(my_qp->state == IB_QPS_RESET)) { + ehca_err(qp->device, "Invalid QP state qp_state=%d qpn=%x", + my_qp->state, qp->qp_num); + *bad_recv_wr = recv_wr; + return -EINVAL; + } + + return internal_post_recv(my_qp, qp->device, recv_wr, bad_recv_wr); +} + +int ehca_post_srq_recv(struct ib_srq *srq, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq), + srq->device, recv_wr, bad_recv_wr); +} + +/* + * ib_wc_opcode table converts ehca wc opcode to ib + * Since we use zero to indicate invalid opcode, the actual ib opcode must + * be decremented!!! + */ +static const u8 ib_wc_opcode[255] = { + [0x01] = IB_WC_RECV+1, + [0x02] = IB_WC_RECV_RDMA_WITH_IMM+1, + [0x04] = IB_WC_BIND_MW+1, + [0x08] = IB_WC_FETCH_ADD+1, + [0x10] = IB_WC_COMP_SWAP+1, + [0x20] = IB_WC_RDMA_WRITE+1, + [0x40] = IB_WC_RDMA_READ+1, + [0x80] = IB_WC_SEND+1 +}; + +/* internal function to poll one entry of cq */ +static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc) +{ + int ret = 0, qmap_tail_idx; + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + struct ehca_cqe *cqe; + struct ehca_qp *my_qp; + struct ehca_qmap_entry *qmap_entry; + struct ehca_queue_map *qmap; + int cqe_count = 0, is_error; + +repoll: + cqe = (struct ehca_cqe *) + ipz_qeit_get_inc_valid(&my_cq->ipz_queue); + if (!cqe) { + ret = -EAGAIN; + if (ehca_debug_level >= 3) + ehca_dbg(cq->device, "Completion queue is empty " + "my_cq=%p cq_num=%x", my_cq, my_cq->cq_number); + goto poll_cq_one_exit0; + } + + /* prevents loads being reordered across this point */ + rmb(); + + cqe_count++; + if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) { + struct ehca_qp *qp; + int purgeflag; + unsigned long flags; + + qp = ehca_cq_get_qp(my_cq, cqe->local_qp_number); + if (!qp) { + ehca_err(cq->device, "cq_num=%x qp_num=%x " + "could not find qp -> ignore cqe", + my_cq->cq_number, cqe->local_qp_number); + ehca_dmp(cqe, 64, "cq_num=%x qp_num=%x", + my_cq->cq_number, cqe->local_qp_number); + /* ignore this purged cqe */ + goto repoll; + } + spin_lock_irqsave(&qp->spinlock_s, flags); + purgeflag = qp->sqerr_purgeflag; + spin_unlock_irqrestore(&qp->spinlock_s, flags); + + if (purgeflag) { + ehca_dbg(cq->device, + "Got CQE with purged bit qp_num=%x src_qp=%x", + cqe->local_qp_number, cqe->remote_qp_number); + if (ehca_debug_level >= 2) + ehca_dmp(cqe, 64, "qp_num=%x src_qp=%x", + cqe->local_qp_number, + cqe->remote_qp_number); + /* + * ignore this to avoid double cqes of bad wqe + * that caused sqe and turn off purge flag + */ + qp->sqerr_purgeflag = 0; + goto repoll; + } + } + + is_error = cqe->status & WC_STATUS_ERROR_BIT; + + /* trace error CQEs if debug_level >= 1, trace all CQEs if >= 3 */ + if (unlikely(ehca_debug_level >= 3 || (ehca_debug_level && is_error))) { + ehca_dbg(cq->device, + "Received %sCOMPLETION ehca_cq=%p cq_num=%x -----", + is_error ? "ERROR " : "", my_cq, my_cq->cq_number); + ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", + my_cq, my_cq->cq_number); + ehca_dbg(cq->device, + "ehca_cq=%p cq_num=%x -------------------------", + my_cq, my_cq->cq_number); + } + + read_lock(&ehca_qp_idr_lock); + my_qp = idr_find(&ehca_qp_idr, cqe->qp_token); + read_unlock(&ehca_qp_idr_lock); + if (!my_qp) + goto repoll; + wc->qp = &my_qp->ib_qp; + + qmap_tail_idx = get_app_wr_id(cqe->work_request_id); + if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) + /* We got a send completion. */ + qmap = &my_qp->sq_map; + else + /* We got a receive completion. */ + qmap = &my_qp->rq_map; + + /* advance the tail pointer */ + qmap->tail = qmap_tail_idx; + + if (is_error) { + /* + * set left_to_poll to 0 because in error state, we will not + * get any additional CQEs + */ + my_qp->sq_map.next_wqe_idx = next_index(my_qp->sq_map.tail, + my_qp->sq_map.entries); + my_qp->sq_map.left_to_poll = 0; + ehca_add_to_err_list(my_qp, 1); + + my_qp->rq_map.next_wqe_idx = next_index(my_qp->rq_map.tail, + my_qp->rq_map.entries); + my_qp->rq_map.left_to_poll = 0; + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + } + + qmap_entry = &qmap->map[qmap_tail_idx]; + if (qmap_entry->reported) { + ehca_warn(cq->device, "Double cqe on qp_num=%#x", + my_qp->real_qp_num); + /* found a double cqe, discard it and read next one */ + goto repoll; + } + + wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); + qmap_entry->reported = 1; + + /* if left_to_poll is decremented to 0, add the QP to the error list */ + if (qmap->left_to_poll > 0) { + qmap->left_to_poll--; + if ((my_qp->sq_map.left_to_poll == 0) && + (my_qp->rq_map.left_to_poll == 0)) { + ehca_add_to_err_list(my_qp, 1); + if (HAS_RQ(my_qp)) + ehca_add_to_err_list(my_qp, 0); + } + } + + /* eval ib_wc_opcode */ + wc->opcode = ib_wc_opcode[cqe->optype]-1; + if (unlikely(wc->opcode == -1)) { + ehca_err(cq->device, "Invalid cqe->OPType=%x cqe->status=%x " + "ehca_cq=%p cq_num=%x", + cqe->optype, cqe->status, my_cq, my_cq->cq_number); + /* dump cqe for other infos */ + ehca_dmp(cqe, 64, "ehca_cq=%p cq_num=%x", + my_cq, my_cq->cq_number); + /* update also queue adder to throw away this entry!!! */ + goto repoll; + } + + /* eval ib_wc_status */ + if (unlikely(is_error)) { + /* complete with errors */ + map_ib_wc_status(cqe->status, &wc->status); + wc->vendor_err = wc->status; + } else + wc->status = IB_WC_SUCCESS; + + wc->byte_len = cqe->nr_bytes_transferred; + wc->pkey_index = cqe->pkey_index; + wc->slid = cqe->rlid; + wc->dlid_path_bits = cqe->dlid; + wc->src_qp = cqe->remote_qp_number; + /* + * HW has "Immed data present" and "GRH present" in bits 6 and 5. + * SW defines those in bits 1 and 0, so we can just shift and mask. + */ + wc->wc_flags = (cqe->w_completion_flags >> 5) & 3; + wc->ex.imm_data = cpu_to_be32(cqe->immediate_data); + wc->sl = cqe->service_level; + +poll_cq_one_exit0: + if (cqe_count > 0) + hipz_update_feca(my_cq, cqe_count); + + return ret; +} + +static int generate_flush_cqes(struct ehca_qp *my_qp, struct ib_cq *cq, + struct ib_wc *wc, int num_entries, + struct ipz_queue *ipz_queue, int on_sq) +{ + int nr = 0; + struct ehca_wqe *wqe; + u64 offset; + struct ehca_queue_map *qmap; + struct ehca_qmap_entry *qmap_entry; + + if (on_sq) + qmap = &my_qp->sq_map; + else + qmap = &my_qp->rq_map; + + qmap_entry = &qmap->map[qmap->next_wqe_idx]; + + while ((nr < num_entries) && (qmap_entry->reported == 0)) { + /* generate flush CQE */ + + memset(wc, 0, sizeof(*wc)); + + offset = qmap->next_wqe_idx * ipz_queue->qe_size; + wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); + if (!wqe) { + ehca_err(cq->device, "Invalid wqe offset=%#llx on " + "qp_num=%#x", offset, my_qp->real_qp_num); + return nr; + } + + wc->wr_id = replace_wr_id(wqe->work_request_id, + qmap_entry->app_wr_id); + + if (on_sq) { + switch (wqe->optype) { + case WQE_OPTYPE_SEND: + wc->opcode = IB_WC_SEND; + break; + case WQE_OPTYPE_RDMAWRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case WQE_OPTYPE_RDMAREAD: + wc->opcode = IB_WC_RDMA_READ; + break; + default: + ehca_err(cq->device, "Invalid optype=%x", + wqe->optype); + return nr; + } + } else + wc->opcode = IB_WC_RECV; + + if (wqe->wr_flag & WQE_WRFLAG_IMM_DATA_PRESENT) { + wc->ex.imm_data = wqe->immediate_data; + wc->wc_flags |= IB_WC_WITH_IMM; + } + + wc->status = IB_WC_WR_FLUSH_ERR; + + wc->qp = &my_qp->ib_qp; + + /* mark as reported and advance next_wqe pointer */ + qmap_entry->reported = 1; + qmap->next_wqe_idx = next_index(qmap->next_wqe_idx, + qmap->entries); + qmap_entry = &qmap->map[qmap->next_wqe_idx]; + + wc++; nr++; + } + + return nr; + +} + +int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc) +{ + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + int nr; + struct ehca_qp *err_qp; + struct ib_wc *current_wc = wc; + int ret = 0; + unsigned long flags; + int entries_left = num_entries; + + if (num_entries < 1) { + ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p " + "cq_num=%x", num_entries, my_cq, my_cq->cq_number); + ret = -EINVAL; + goto poll_cq_exit0; + } + + spin_lock_irqsave(&my_cq->spinlock, flags); + + /* generate flush cqes for send queues */ + list_for_each_entry(err_qp, &my_cq->sqp_err_list, sq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_squeue, 1); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + /* generate flush cqes for receive queues */ + list_for_each_entry(err_qp, &my_cq->rqp_err_list, rq_err_node) { + nr = generate_flush_cqes(err_qp, cq, current_wc, entries_left, + &err_qp->ipz_rqueue, 0); + entries_left -= nr; + current_wc += nr; + + if (entries_left == 0) + break; + } + + for (nr = 0; nr < entries_left; nr++) { + ret = ehca_poll_cq_one(cq, current_wc); + if (ret) + break; + current_wc++; + } /* eof for nr */ + entries_left -= nr; + + spin_unlock_irqrestore(&my_cq->spinlock, flags); + if (ret == -EAGAIN || !ret) + ret = num_entries - entries_left; + +poll_cq_exit0: + return ret; +} + +int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags) +{ + struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq); + int ret = 0; + + switch (notify_flags & IB_CQ_SOLICITED_MASK) { + case IB_CQ_SOLICITED: + hipz_set_cqx_n0(my_cq, 1); + break; + case IB_CQ_NEXT_COMP: + hipz_set_cqx_n1(my_cq, 1); + break; + default: + return -EINVAL; + } + + if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { + unsigned long spl_flags; + spin_lock_irqsave(&my_cq->spinlock, spl_flags); + ret = ipz_qeit_is_valid(&my_cq->ipz_queue); + spin_unlock_irqrestore(&my_cq->spinlock, spl_flags); + } + + return ret; +} diff --git a/drivers/staging/rdma/ehca/ehca_sqp.c b/drivers/staging/rdma/ehca/ehca_sqp.c new file mode 100644 index 000000000000..376b031c2c7f --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_sqp.c @@ -0,0 +1,245 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * SQP functions + * + * Authors: Khadija Souissi + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "ehca_iverbs.h" +#include "hcp_if.h" + +#define IB_MAD_STATUS_REDIRECT cpu_to_be16(0x0002) +#define IB_MAD_STATUS_UNSUP_VERSION cpu_to_be16(0x0004) +#define IB_MAD_STATUS_UNSUP_METHOD cpu_to_be16(0x0008) + +#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001) + +/** + * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue + * pair is created successfully, the corresponding port gets active. + * + * Define Special Queue pair 0 (SMI QP) is still not supported. + * + * @qp_init_attr: Queue pair init attributes with port and queue pair type + */ + +u64 ehca_define_sqp(struct ehca_shca *shca, + struct ehca_qp *ehca_qp, + struct ib_qp_init_attr *qp_init_attr) +{ + u32 pma_qp_nr, bma_qp_nr; + u64 ret; + u8 port = qp_init_attr->port_num; + int counter; + + shca->sport[port - 1].port_state = IB_PORT_DOWN; + + switch (qp_init_attr->qp_type) { + case IB_QPT_SMI: + /* function not supported yet */ + break; + case IB_QPT_GSI: + ret = hipz_h_define_aqp1(shca->ipz_hca_handle, + ehca_qp->ipz_qp_handle, + ehca_qp->galpas.kernel, + (u32) qp_init_attr->port_num, + &pma_qp_nr, &bma_qp_nr); + + if (ret != H_SUCCESS) { + ehca_err(&shca->ib_device, + "Can't define AQP1 for port %x. h_ret=%lli", + port, ret); + return ret; + } + shca->sport[port - 1].pma_qp_nr = pma_qp_nr; + ehca_dbg(&shca->ib_device, "port=%x pma_qp_nr=%x", + port, pma_qp_nr); + break; + default: + ehca_err(&shca->ib_device, "invalid qp_type=%x", + qp_init_attr->qp_type); + return H_PARAMETER; + } + + if (ehca_nr_ports < 0) /* autodetect mode */ + return H_SUCCESS; + + for (counter = 0; + shca->sport[port - 1].port_state != IB_PORT_ACTIVE && + counter < ehca_port_act_time; + counter++) { + ehca_dbg(&shca->ib_device, "... wait until port %x is active", + port); + msleep_interruptible(1000); + } + + if (counter == ehca_port_act_time) { + ehca_err(&shca->ib_device, "Port %x is not active.", port); + return H_HARDWARE; + } + + return H_SUCCESS; +} + +struct ib_perf { + struct ib_mad_hdr mad_hdr; + u8 reserved[40]; + u8 data[192]; +} __attribute__ ((packed)); + +/* TC/SL/FL packed into 32 bits, as in ClassPortInfo */ +struct tcslfl { + u32 tc:8; + u32 sl:4; + u32 fl:20; +} __attribute__ ((packed)); + +/* IP Version/TC/FL packed into 32 bits, as in GRH */ +struct vertcfl { + u32 ver:4; + u32 tc:8; + u32 fl:20; +} __attribute__ ((packed)); + +static int ehca_process_perf(struct ib_device *ibdev, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + const struct ib_perf *in_perf = (const struct ib_perf *)in_mad; + struct ib_perf *out_perf = (struct ib_perf *)out_mad; + struct ib_class_port_info *poi = + (struct ib_class_port_info *)out_perf->data; + struct tcslfl *tcslfl = + (struct tcslfl *)&poi->redirect_tcslfl; + struct ehca_shca *shca = + container_of(ibdev, struct ehca_shca, ib_device); + struct ehca_sport *sport = &shca->sport[port_num - 1]; + + ehca_dbg(ibdev, "method=%x", in_perf->mad_hdr.method); + + *out_mad = *in_mad; + + if (in_perf->mad_hdr.class_version != 1) { + ehca_warn(ibdev, "Unsupported class_version=%x", + in_perf->mad_hdr.class_version); + out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_VERSION; + goto perf_reply; + } + + switch (in_perf->mad_hdr.method) { + case IB_MGMT_METHOD_GET: + case IB_MGMT_METHOD_SET: + /* set class port info for redirection */ + out_perf->mad_hdr.attr_id = IB_PMA_CLASS_PORT_INFO; + out_perf->mad_hdr.status = IB_MAD_STATUS_REDIRECT; + memset(poi, 0, sizeof(*poi)); + poi->base_version = 1; + poi->class_version = 1; + poi->resp_time_value = 18; + + /* copy local routing information from WC where applicable */ + tcslfl->sl = in_wc->sl; + poi->redirect_lid = + sport->saved_attr.lid | in_wc->dlid_path_bits; + poi->redirect_qp = sport->pma_qp_nr; + poi->redirect_qkey = IB_QP1_QKEY; + + ehca_query_pkey(ibdev, port_num, in_wc->pkey_index, + &poi->redirect_pkey); + + /* if request was globally routed, copy route info */ + if (in_grh) { + const struct vertcfl *vertcfl = + (const struct vertcfl *)&in_grh->version_tclass_flow; + memcpy(poi->redirect_gid, in_grh->dgid.raw, + sizeof(poi->redirect_gid)); + tcslfl->tc = vertcfl->tc; + tcslfl->fl = vertcfl->fl; + } else + /* else only fill in default GID */ + ehca_query_gid(ibdev, port_num, 0, + (union ib_gid *)&poi->redirect_gid); + + ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x", + sport->saved_attr.lid, sport->pma_qp_nr); + break; + + case IB_MGMT_METHOD_GET_RESP: + return IB_MAD_RESULT_FAILURE; + + default: + out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_METHOD; + break; + } + +perf_reply: + out_perf->mad_hdr.method = IB_MGMT_METHOD_GET_RESP; + + return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; +} + +int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, + const struct ib_wc *in_wc, const struct ib_grh *in_grh, + const struct ib_mad_hdr *in, size_t in_mad_size, + struct ib_mad_hdr *out, size_t *out_mad_size, + u16 *out_mad_pkey_index) +{ + int ret; + const struct ib_mad *in_mad = (const struct ib_mad *)in; + struct ib_mad *out_mad = (struct ib_mad *)out; + + if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) || + *out_mad_size != sizeof(*out_mad))) + return IB_MAD_RESULT_FAILURE; + + if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc) + return IB_MAD_RESULT_FAILURE; + + /* accept only pma request */ + if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT) + return IB_MAD_RESULT_SUCCESS; + + ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp); + ret = ehca_process_perf(ibdev, port_num, in_wc, in_grh, + in_mad, out_mad); + + return ret; +} diff --git a/drivers/staging/rdma/ehca/ehca_tools.h b/drivers/staging/rdma/ehca/ehca_tools.h new file mode 100644 index 000000000000..d280b12aae64 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_tools.h @@ -0,0 +1,155 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * auxiliary functions + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Khadija Souissi + * Waleri Fomin + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef EHCA_TOOLS_H +#define EHCA_TOOLS_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +extern int ehca_debug_level; + +#define ehca_dbg(ib_dev, format, arg...) \ + do { \ + if (unlikely(ehca_debug_level)) \ + dev_printk(KERN_DEBUG, (ib_dev)->dma_device, \ + "PU%04x EHCA_DBG:%s " format "\n", \ + raw_smp_processor_id(), __func__, \ + ## arg); \ + } while (0) + +#define ehca_info(ib_dev, format, arg...) \ + dev_info((ib_dev)->dma_device, "PU%04x EHCA_INFO:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) + +#define ehca_warn(ib_dev, format, arg...) \ + dev_warn((ib_dev)->dma_device, "PU%04x EHCA_WARN:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) + +#define ehca_err(ib_dev, format, arg...) \ + dev_err((ib_dev)->dma_device, "PU%04x EHCA_ERR:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) + +/* use this one only if no ib_dev available */ +#define ehca_gen_dbg(format, arg...) \ + do { \ + if (unlikely(ehca_debug_level)) \ + printk(KERN_DEBUG "PU%04x EHCA_DBG:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg); \ + } while (0) + +#define ehca_gen_warn(format, arg...) \ + printk(KERN_INFO "PU%04x EHCA_WARN:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) + +#define ehca_gen_err(format, arg...) \ + printk(KERN_ERR "PU%04x EHCA_ERR:%s " format "\n", \ + raw_smp_processor_id(), __func__, ## arg) + +/** + * ehca_dmp - printk a memory block, whose length is n*8 bytes. + * Each line has the following layout: + * adr=X ofs=Y <8 bytes hex> <8 bytes hex> + */ +#define ehca_dmp(adr, len, format, args...) \ + do { \ + unsigned int x; \ + unsigned int l = (unsigned int)(len); \ + unsigned char *deb = (unsigned char *)(adr); \ + for (x = 0; x < l; x += 16) { \ + printk(KERN_INFO "EHCA_DMP:%s " format \ + " adr=%p ofs=%04x %016llx %016llx\n", \ + __func__, ##args, deb, x, \ + *((u64 *)&deb[0]), *((u64 *)&deb[8])); \ + deb += 16; \ + } \ + } while (0) + +/* define a bitmask, little endian version */ +#define EHCA_BMASK(pos, length) (((pos) << 16) + (length)) + +/* define a bitmask, the ibm way... */ +#define EHCA_BMASK_IBM(from, to) (((63 - to) << 16) + ((to) - (from) + 1)) + +/* internal function, don't use */ +#define EHCA_BMASK_SHIFTPOS(mask) (((mask) >> 16) & 0xffff) + +/* internal function, don't use */ +#define EHCA_BMASK_MASK(mask) (~0ULL >> ((64 - (mask)) & 0xffff)) + +/** + * EHCA_BMASK_SET - return value shifted and masked by mask + * variable|=EHCA_BMASK_SET(MY_MASK,0x4711) ORs the bits in variable + * variable&=~EHCA_BMASK_SET(MY_MASK,-1) clears the bits from the mask + * in variable + */ +#define EHCA_BMASK_SET(mask, value) \ + ((EHCA_BMASK_MASK(mask) & ((u64)(value))) << EHCA_BMASK_SHIFTPOS(mask)) + +/** + * EHCA_BMASK_GET - extract a parameter from value by mask + */ +#define EHCA_BMASK_GET(mask, value) \ + (EHCA_BMASK_MASK(mask) & (((u64)(value)) >> EHCA_BMASK_SHIFTPOS(mask))) + +/* Converts ehca to ib return code */ +int ehca2ib_return_code(u64 ehca_rc); + +#endif /* EHCA_TOOLS_H */ diff --git a/drivers/staging/rdma/ehca/ehca_uverbs.c b/drivers/staging/rdma/ehca/ehca_uverbs.c new file mode 100644 index 000000000000..1a1d5d99fcf9 --- /dev/null +++ b/drivers/staging/rdma/ehca/ehca_uverbs.c @@ -0,0 +1,309 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * userspace support verbs + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Heiko J Schick + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_classes.h" +#include "ehca_iverbs.h" +#include "ehca_mrmw.h" +#include "ehca_tools.h" +#include "hcp_if.h" + +struct ib_ucontext *ehca_alloc_ucontext(struct ib_device *device, + struct ib_udata *udata) +{ + struct ehca_ucontext *my_context; + + my_context = kzalloc(sizeof *my_context, GFP_KERNEL); + if (!my_context) { + ehca_err(device, "Out of memory device=%p", device); + return ERR_PTR(-ENOMEM); + } + + return &my_context->ib_ucontext; +} + +int ehca_dealloc_ucontext(struct ib_ucontext *context) +{ + kfree(container_of(context, struct ehca_ucontext, ib_ucontext)); + return 0; +} + +static void ehca_mm_open(struct vm_area_struct *vma) +{ + u32 *count = (u32 *)vma->vm_private_data; + if (!count) { + ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + return; + } + (*count)++; + if (!(*count)) + ehca_gen_err("Use count overflow vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", + vma->vm_start, vma->vm_end, *count); +} + +static void ehca_mm_close(struct vm_area_struct *vma) +{ + u32 *count = (u32 *)vma->vm_private_data; + if (!count) { + ehca_gen_err("Invalid vma struct vm_start=%lx vm_end=%lx", + vma->vm_start, vma->vm_end); + return; + } + (*count)--; + ehca_gen_dbg("vm_start=%lx vm_end=%lx count=%x", + vma->vm_start, vma->vm_end, *count); +} + +static const struct vm_operations_struct vm_ops = { + .open = ehca_mm_open, + .close = ehca_mm_close, +}; + +static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas, + u32 *mm_count) +{ + int ret; + u64 vsize, physical; + + vsize = vma->vm_end - vma->vm_start; + if (vsize < EHCA_PAGESIZE) { + ehca_gen_err("invalid vsize=%lx", vma->vm_end - vma->vm_start); + return -EINVAL; + } + + physical = galpas->user.fw_handle; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical); + /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ + ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT, + vma->vm_page_prot); + if (unlikely(ret)) { + ehca_gen_err("remap_pfn_range() failed ret=%i", ret); + return -ENOMEM; + } + + vma->vm_private_data = mm_count; + (*mm_count)++; + vma->vm_ops = &vm_ops; + + return 0; +} + +static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue, + u32 *mm_count) +{ + int ret; + u64 start, ofs; + struct page *page; + + vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; + start = vma->vm_start; + for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) { + u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs); + page = virt_to_page(virt_addr); + ret = vm_insert_page(vma, start, page); + if (unlikely(ret)) { + ehca_gen_err("vm_insert_page() failed rc=%i", ret); + return ret; + } + start += PAGE_SIZE; + } + vma->vm_private_data = mm_count; + (*mm_count)++; + vma->vm_ops = &vm_ops; + + return 0; +} + +static int ehca_mmap_cq(struct vm_area_struct *vma, struct ehca_cq *cq, + u32 rsrc_type) +{ + int ret; + + switch (rsrc_type) { + case 0: /* galpa fw handle */ + ehca_dbg(cq->ib_cq.device, "cq_num=%x fw", cq->cq_number); + ret = ehca_mmap_fw(vma, &cq->galpas, &cq->mm_count_galpa); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_fw() failed rc=%i cq_num=%x", + ret, cq->cq_number); + return ret; + } + break; + + case 1: /* cq queue_addr */ + ehca_dbg(cq->ib_cq.device, "cq_num=%x queue", cq->cq_number); + ret = ehca_mmap_queue(vma, &cq->ipz_queue, &cq->mm_count_queue); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_queue() failed rc=%i cq_num=%x", + ret, cq->cq_number); + return ret; + } + break; + + default: + ehca_err(cq->ib_cq.device, "bad resource type=%x cq_num=%x", + rsrc_type, cq->cq_number); + return -EINVAL; + } + + return 0; +} + +static int ehca_mmap_qp(struct vm_area_struct *vma, struct ehca_qp *qp, + u32 rsrc_type) +{ + int ret; + + switch (rsrc_type) { + case 0: /* galpa fw handle */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x fw", qp->ib_qp.qp_num); + ret = ehca_mmap_fw(vma, &qp->galpas, &qp->mm_count_galpa); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "remap_pfn_range() failed ret=%i qp_num=%x", + ret, qp->ib_qp.qp_num); + return -ENOMEM; + } + break; + + case 1: /* qp rqueue_addr */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x rq", qp->ib_qp.qp_num); + ret = ehca_mmap_queue(vma, &qp->ipz_rqueue, + &qp->mm_count_rqueue); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_queue(rq) failed rc=%i qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; + } + break; + + case 2: /* qp squeue_addr */ + ehca_dbg(qp->ib_qp.device, "qp_num=%x sq", qp->ib_qp.qp_num); + ret = ehca_mmap_queue(vma, &qp->ipz_squeue, + &qp->mm_count_squeue); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_queue(sq) failed rc=%i qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; + } + break; + + default: + ehca_err(qp->ib_qp.device, "bad resource type=%x qp=num=%x", + rsrc_type, qp->ib_qp.qp_num); + return -EINVAL; + } + + return 0; +} + +int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + u64 fileoffset = vma->vm_pgoff; + u32 idr_handle = fileoffset & 0x1FFFFFF; + u32 q_type = (fileoffset >> 27) & 0x1; /* CQ, QP,... */ + u32 rsrc_type = (fileoffset >> 25) & 0x3; /* sq,rq,cmnd_window */ + u32 ret; + struct ehca_cq *cq; + struct ehca_qp *qp; + struct ib_uobject *uobject; + + switch (q_type) { + case 0: /* CQ */ + read_lock(&ehca_cq_idr_lock); + cq = idr_find(&ehca_cq_idr, idr_handle); + read_unlock(&ehca_cq_idr_lock); + + /* make sure this mmap really belongs to the authorized user */ + if (!cq) + return -EINVAL; + + if (!cq->ib_cq.uobject || cq->ib_cq.uobject->context != context) + return -EINVAL; + + ret = ehca_mmap_cq(vma, cq, rsrc_type); + if (unlikely(ret)) { + ehca_err(cq->ib_cq.device, + "ehca_mmap_cq() failed rc=%i cq_num=%x", + ret, cq->cq_number); + return ret; + } + break; + + case 1: /* QP */ + read_lock(&ehca_qp_idr_lock); + qp = idr_find(&ehca_qp_idr, idr_handle); + read_unlock(&ehca_qp_idr_lock); + + /* make sure this mmap really belongs to the authorized user */ + if (!qp) + return -EINVAL; + + uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject; + if (!uobject || uobject->context != context) + return -EINVAL; + + ret = ehca_mmap_qp(vma, qp, rsrc_type); + if (unlikely(ret)) { + ehca_err(qp->ib_qp.device, + "ehca_mmap_qp() failed rc=%i qp_num=%x", + ret, qp->ib_qp.qp_num); + return ret; + } + break; + + default: + ehca_gen_err("bad queue type %x", q_type); + return -EINVAL; + } + + return 0; +} diff --git a/drivers/staging/rdma/ehca/hcp_if.c b/drivers/staging/rdma/ehca/hcp_if.c new file mode 100644 index 000000000000..89517ffb4389 --- /dev/null +++ b/drivers/staging/rdma/ehca/hcp_if.c @@ -0,0 +1,949 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware Infiniband Interface code for POWER + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Joachim Fenkes + * Gerd Bayer + * Waleri Fomin + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "ehca_tools.h" +#include "hcp_if.h" +#include "hcp_phyp.h" +#include "hipz_fns.h" +#include "ipz_pt_fn.h" + +#define H_ALL_RES_QP_ENHANCED_OPS EHCA_BMASK_IBM(9, 11) +#define H_ALL_RES_QP_PTE_PIN EHCA_BMASK_IBM(12, 12) +#define H_ALL_RES_QP_SERVICE_TYPE EHCA_BMASK_IBM(13, 15) +#define H_ALL_RES_QP_STORAGE EHCA_BMASK_IBM(16, 17) +#define H_ALL_RES_QP_LL_RQ_CQE_POSTING EHCA_BMASK_IBM(18, 18) +#define H_ALL_RES_QP_LL_SQ_CQE_POSTING EHCA_BMASK_IBM(19, 21) +#define H_ALL_RES_QP_SIGNALING_TYPE EHCA_BMASK_IBM(22, 23) +#define H_ALL_RES_QP_UD_AV_LKEY_CTRL EHCA_BMASK_IBM(31, 31) +#define H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE EHCA_BMASK_IBM(32, 35) +#define H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE EHCA_BMASK_IBM(36, 39) +#define H_ALL_RES_QP_RESOURCE_TYPE EHCA_BMASK_IBM(56, 63) + +#define H_ALL_RES_QP_MAX_OUTST_SEND_WR EHCA_BMASK_IBM(0, 15) +#define H_ALL_RES_QP_MAX_OUTST_RECV_WR EHCA_BMASK_IBM(16, 31) +#define H_ALL_RES_QP_MAX_SEND_SGE EHCA_BMASK_IBM(32, 39) +#define H_ALL_RES_QP_MAX_RECV_SGE EHCA_BMASK_IBM(40, 47) + +#define H_ALL_RES_QP_UD_AV_LKEY EHCA_BMASK_IBM(32, 63) +#define H_ALL_RES_QP_SRQ_QP_TOKEN EHCA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_SRQ_QP_HANDLE EHCA_BMASK_IBM(0, 64) +#define H_ALL_RES_QP_SRQ_LIMIT EHCA_BMASK_IBM(48, 63) +#define H_ALL_RES_QP_SRQ_QPN EHCA_BMASK_IBM(40, 63) + +#define H_ALL_RES_QP_ACT_OUTST_SEND_WR EHCA_BMASK_IBM(16, 31) +#define H_ALL_RES_QP_ACT_OUTST_RECV_WR EHCA_BMASK_IBM(48, 63) +#define H_ALL_RES_QP_ACT_SEND_SGE EHCA_BMASK_IBM(8, 15) +#define H_ALL_RES_QP_ACT_RECV_SGE EHCA_BMASK_IBM(24, 31) + +#define H_ALL_RES_QP_SQUEUE_SIZE_PAGES EHCA_BMASK_IBM(0, 31) +#define H_ALL_RES_QP_RQUEUE_SIZE_PAGES EHCA_BMASK_IBM(32, 63) + +#define H_MP_INIT_TYPE EHCA_BMASK_IBM(44, 47) +#define H_MP_SHUTDOWN EHCA_BMASK_IBM(48, 48) +#define H_MP_RESET_QKEY_CTR EHCA_BMASK_IBM(49, 49) + +#define HCALL4_REGS_FORMAT "r4=%lx r5=%lx r6=%lx r7=%lx" +#define HCALL7_REGS_FORMAT HCALL4_REGS_FORMAT " r8=%lx r9=%lx r10=%lx" +#define HCALL9_REGS_FORMAT HCALL7_REGS_FORMAT " r11=%lx r12=%lx" + +static DEFINE_SPINLOCK(hcall_lock); + +static long ehca_plpar_hcall_norets(unsigned long opcode, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6, + unsigned long arg7) +{ + long ret; + int i, sleep_msecs; + unsigned long flags = 0; + + if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("opcode=%lx " HCALL7_REGS_FORMAT, + opcode, arg1, arg2, arg3, arg4, arg5, arg6, arg7); + + for (i = 0; i < 5; i++) { + /* serialize hCalls to work around firmware issue */ + if (ehca_lock_hcalls) + spin_lock_irqsave(&hcall_lock, flags); + + ret = plpar_hcall_norets(opcode, arg1, arg2, arg3, arg4, + arg5, arg6, arg7); + + if (ehca_lock_hcalls) + spin_unlock_irqrestore(&hcall_lock, flags); + + if (H_IS_LONG_BUSY(ret)) { + sleep_msecs = get_longbusy_msecs(ret); + msleep_interruptible(sleep_msecs); + continue; + } + + if (ret < H_SUCCESS) + ehca_gen_err("opcode=%lx ret=%li " HCALL7_REGS_FORMAT, + opcode, ret, arg1, arg2, arg3, + arg4, arg5, arg6, arg7); + else + if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("opcode=%lx ret=%li", opcode, ret); + + return ret; + } + + return H_BUSY; +} + +static long ehca_plpar_hcall9(unsigned long opcode, + unsigned long *outs, /* array of 9 outputs */ + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6, + unsigned long arg7, + unsigned long arg8, + unsigned long arg9) +{ + long ret; + int i, sleep_msecs; + unsigned long flags = 0; + + if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, opcode, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9); + + for (i = 0; i < 5; i++) { + /* serialize hCalls to work around firmware issue */ + if (ehca_lock_hcalls) + spin_lock_irqsave(&hcall_lock, flags); + + ret = plpar_hcall9(opcode, outs, + arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9); + + if (ehca_lock_hcalls) + spin_unlock_irqrestore(&hcall_lock, flags); + + if (H_IS_LONG_BUSY(ret)) { + sleep_msecs = get_longbusy_msecs(ret); + msleep_interruptible(sleep_msecs); + continue; + } + + if (ret < H_SUCCESS) { + ehca_gen_err("INPUT -- opcode=%lx " HCALL9_REGS_FORMAT, + opcode, arg1, arg2, arg3, arg4, arg5, + arg6, arg7, arg8, arg9); + ehca_gen_err("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT, + ret, outs[0], outs[1], outs[2], outs[3], + outs[4], outs[5], outs[6], outs[7], + outs[8]); + } else if (unlikely(ehca_debug_level >= 2)) + ehca_gen_dbg("OUTPUT -- ret=%li " HCALL9_REGS_FORMAT, + ret, outs[0], outs[1], outs[2], outs[3], + outs[4], outs[5], outs[6], outs[7], + outs[8]); + return ret; + } + + return H_BUSY; +} + +u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_pfeq *pfeq, + const u32 neq_control, + const u32 number_of_entries, + struct ipz_eq_handle *eq_handle, + u32 *act_nr_of_entries, + u32 *act_pages, + u32 *eq_ist) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + u64 allocate_controls; + + /* resource type */ + allocate_controls = 3ULL; + + /* ISN is associated */ + if (neq_control != 1) + allocate_controls = (1ULL << (63 - 7)) | allocate_controls; + else /* notification event queue */ + allocate_controls = (1ULL << 63) | allocate_controls; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + allocate_controls, /* r5 */ + number_of_entries, /* r6 */ + 0, 0, 0, 0, 0, 0); + eq_handle->handle = outs[0]; + *act_nr_of_entries = (u32)outs[3]; + *act_pages = (u32)outs[4]; + *eq_ist = (u32)outs[5]; + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resource - ret=%lli ", ret); + + return ret; +} + +u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, + struct ipz_eq_handle eq_handle, + const u64 event_mask) +{ + return ehca_plpar_hcall_norets(H_RESET_EVENTS, + adapter_handle.handle, /* r4 */ + eq_handle.handle, /* r5 */ + event_mask, /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + struct ehca_alloc_cq_parms *param) +{ + int rc; + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + 2, /* r5 */ + param->eq_handle.handle, /* r6 */ + cq->token, /* r7 */ + param->nr_cqe, /* r8 */ + 0, 0, 0, 0); + cq->ipz_cq_handle.handle = outs[0]; + param->act_nr_of_entries = (u32)outs[3]; + param->act_pages = (u32)outs[4]; + + if (ret == H_SUCCESS) { + rc = hcp_galpas_ctor(&cq->galpas, 0, outs[5], outs[6]); + if (rc) { + ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", + rc, outs[5]); + + ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + cq->ipz_cq_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + ret = H_NO_MEM; + } + } + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resources. ret=%lli", ret); + + return ret; +} + +u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_alloc_qp_parms *parms, int is_user) +{ + int rc; + u64 ret; + u64 allocate_controls, max_r10_reg, r11, r12; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + allocate_controls = + EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type) + | EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0) + | EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype) + | EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype) + | EHCA_BMASK_SET(H_ALL_RES_QP_STORAGE, parms->qp_storage) + | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_SQ_PAGE_SIZE, + parms->squeue.page_size) + | EHCA_BMASK_SET(H_ALL_RES_QP_SMALL_RQ_PAGE_SIZE, + parms->rqueue.page_size) + | EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING, + !!(parms->ll_comp_flags & LLQP_RECV_COMP)) + | EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING, + !!(parms->ll_comp_flags & LLQP_SEND_COMP)) + | EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL, + parms->ud_av_l_key_ctl) + | EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1); + + max_r10_reg = + EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR, + parms->squeue.max_wr + 1) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR, + parms->rqueue.max_wr + 1) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE, + parms->squeue.max_sge) + | EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE, + parms->rqueue.max_sge); + + r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token); + + if (parms->ext_type == EQPT_SRQ) + r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit); + else + r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn); + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + allocate_controls, /* r5 */ + parms->send_cq_handle.handle, + parms->recv_cq_handle.handle, + parms->eq_handle.handle, + ((u64)parms->token << 32) | parms->pd.value, + max_r10_reg, r11, r12); + + parms->qp_handle.handle = outs[0]; + parms->real_qp_num = (u32)outs[1]; + parms->squeue.act_nr_wqes = + (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]); + parms->rqueue.act_nr_wqes = + (u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_RECV_WR, outs[2]); + parms->squeue.act_nr_sges = + (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_SEND_SGE, outs[3]); + parms->rqueue.act_nr_sges = + (u8)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_RECV_SGE, outs[3]); + parms->squeue.queue_size = + (u32)EHCA_BMASK_GET(H_ALL_RES_QP_SQUEUE_SIZE_PAGES, outs[4]); + parms->rqueue.queue_size = + (u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]); + + if (ret == H_SUCCESS) { + rc = hcp_galpas_ctor(&parms->galpas, is_user, outs[6], outs[6]); + if (rc) { + ehca_gen_err("Could not establish HW access. rc=%d paddr=%#lx", + rc, outs[6]); + + ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + parms->qp_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + ret = H_NO_MEM; + } + } + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resources. ret=%lli", ret); + + return ret; +} + +u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, + struct hipz_query_port *query_port_response_block) +{ + u64 ret; + u64 r_cb = __pa(query_port_response_block); + + if (r_cb & (EHCA_PAGESIZE-1)) { + ehca_gen_err("response block not page aligned"); + return H_PARAMETER; + } + + ret = ehca_plpar_hcall_norets(H_QUERY_PORT, + adapter_handle.handle, /* r4 */ + port_id, /* r5 */ + r_cb, /* r6 */ + 0, 0, 0, 0); + + if (ehca_debug_level >= 2) + ehca_dmp(query_port_response_block, 64, "response_block"); + + return ret; +} + +u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, const u32 port_cap, + const u8 init_type, const int modify_mask) +{ + u64 port_attributes = port_cap; + + if (modify_mask & IB_PORT_SHUTDOWN) + port_attributes |= EHCA_BMASK_SET(H_MP_SHUTDOWN, 1); + if (modify_mask & IB_PORT_INIT_TYPE) + port_attributes |= EHCA_BMASK_SET(H_MP_INIT_TYPE, init_type); + if (modify_mask & IB_PORT_RESET_QKEY_CNTR) + port_attributes |= EHCA_BMASK_SET(H_MP_RESET_QKEY_CTR, 1); + + return ehca_plpar_hcall_norets(H_MODIFY_PORT, + adapter_handle.handle, /* r4 */ + port_id, /* r5 */ + port_attributes, /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, + struct hipz_query_hca *query_hca_rblock) +{ + u64 r_cb = __pa(query_hca_rblock); + + if (r_cb & (EHCA_PAGESIZE-1)) { + ehca_gen_err("response_block=%p not page aligned", + query_hca_rblock); + return H_PARAMETER; + } + + return ehca_plpar_hcall_norets(H_QUERY_HCA, + adapter_handle.handle, /* r4 */ + r_cb, /* r5 */ + 0, 0, 0, 0, 0); +} + +u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, + const u8 pagesize, + const u8 queue_type, + const u64 resource_handle, + const u64 logical_address_of_page, + u64 count) +{ + return ehca_plpar_hcall_norets(H_REGISTER_RPAGES, + adapter_handle.handle, /* r4 */ + (u64)queue_type | ((u64)pagesize) << 8, + /* r5 */ + resource_handle, /* r6 */ + logical_address_of_page, /* r7 */ + count, /* r8 */ + 0, 0); +} + +u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_eq_handle eq_handle, + struct ehca_pfeq *pfeq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count) +{ + if (count != 1) { + ehca_gen_err("Ppage counter=%llx", count); + return H_PARAMETER; + } + return hipz_h_register_rpage(adapter_handle, + pagesize, + queue_type, + eq_handle.handle, + logical_address_of_page, count); +} + +u64 hipz_h_query_int_state(const struct ipz_adapter_handle adapter_handle, + u32 ist) +{ + u64 ret; + ret = ehca_plpar_hcall_norets(H_QUERY_INT_STATE, + adapter_handle.handle, /* r4 */ + ist, /* r5 */ + 0, 0, 0, 0, 0); + + if (ret != H_SUCCESS && ret != H_BUSY) + ehca_gen_err("Could not query interrupt state."); + + return ret; +} + +u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_cq_handle cq_handle, + struct ehca_pfcq *pfcq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa gal) +{ + if (count != 1) { + ehca_gen_err("Page counter=%llx", count); + return H_PARAMETER; + } + + return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, + cq_handle.handle, logical_address_of_page, + count); +} + +u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa galpa) +{ + if (count > 1) { + ehca_gen_err("Page counter=%llx", count); + return H_PARAMETER; + } + + return hipz_h_register_rpage(adapter_handle, pagesize, queue_type, + qp_handle.handle, logical_address_of_page, + count); +} + +u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + void **log_addr_next_sq_wqe2processed, + void **log_addr_next_rq_wqe2processed, + int dis_and_get_function_code) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, + adapter_handle.handle, /* r4 */ + dis_and_get_function_code, /* r5 */ + qp_handle.handle, /* r6 */ + 0, 0, 0, 0, 0, 0); + if (log_addr_next_sq_wqe2processed) + *log_addr_next_sq_wqe2processed = (void *)outs[0]; + if (log_addr_next_rq_wqe2processed) + *log_addr_next_rq_wqe2processed = (void *)outs[1]; + + return ret; +} + +u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u64 update_mask, + struct hcp_modify_qp_control_block *mqpcb, + struct h_galpa gal) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + ret = ehca_plpar_hcall9(H_MODIFY_QP, outs, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + update_mask, /* r6 */ + __pa(mqpcb), /* r7 */ + 0, 0, 0, 0, 0); + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Insufficient resources ret=%lli", ret); + + return ret; +} + +u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + struct hcp_modify_qp_control_block *qqpcb, + struct h_galpa gal) +{ + return ehca_plpar_hcall_norets(H_QUERY_QP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + __pa(qqpcb), /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_qp *qp) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = hcp_galpas_dtor(&qp->galpas); + if (ret) { + ehca_gen_err("Could not destruct qp->galpas"); + return H_RESOURCE; + } + ret = ehca_plpar_hcall9(H_DISABLE_AND_GETC, outs, + adapter_handle.handle, /* r4 */ + /* function code */ + 1, /* r5 */ + qp->ipz_qp_handle.handle, /* r6 */ + 0, 0, 0, 0, 0, 0); + if (ret == H_HARDWARE) + ehca_gen_err("HCA not operational. ret=%lli", ret); + + ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + qp->ipz_qp_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + + if (ret == H_RESOURCE) + ehca_gen_err("Resource still in use. ret=%lli", ret); + + return ret; +} + +u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port) +{ + return ehca_plpar_hcall_norets(H_DEFINE_AQP0, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + port, /* r6 */ + 0, 0, 0, 0); +} + +u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port, u32 * pma_qp_nr, + u32 * bma_qp_nr) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_DEFINE_AQP1, outs, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + port, /* r6 */ + 0, 0, 0, 0, 0, 0); + *pma_qp_nr = (u32)outs[0]; + *bma_qp_nr = (u32)outs[1]; + + if (ret == H_ALIAS_EXIST) + ehca_gen_err("AQP1 already exists. ret=%lli", ret); + + return ret; +} + +u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id) +{ + u64 ret; + + ret = ehca_plpar_hcall_norets(H_ATTACH_MCQP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + mcg_dlid, /* r6 */ + interface_id, /* r7 */ + subnet_prefix, /* r8 */ + 0, 0); + + if (ret == H_NOT_ENOUGH_RESOURCES) + ehca_gen_err("Not enough resources. ret=%lli", ret); + + return ret; +} + +u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id) +{ + return ehca_plpar_hcall_norets(H_DETACH_MCQP, + adapter_handle.handle, /* r4 */ + qp_handle.handle, /* r5 */ + mcg_dlid, /* r6 */ + interface_id, /* r7 */ + subnet_prefix, /* r8 */ + 0, 0); +} + +u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + u8 force_flag) +{ + u64 ret; + + ret = hcp_galpas_dtor(&cq->galpas); + if (ret) { + ehca_gen_err("Could not destruct cp->galpas"); + return H_RESOURCE; + } + + ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + cq->ipz_cq_handle.handle, /* r5 */ + force_flag != 0 ? 1L : 0L, /* r6 */ + 0, 0, 0, 0); + + if (ret == H_RESOURCE) + ehca_gen_err("H_FREE_RESOURCE failed ret=%lli ", ret); + + return ret; +} + +u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_eq *eq) +{ + u64 ret; + + ret = hcp_galpas_dtor(&eq->galpas); + if (ret) { + ehca_gen_err("Could not destruct eq->galpas"); + return H_RESOURCE; + } + + ret = ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + eq->ipz_eq_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); + + if (ret == H_RESOURCE) + ehca_gen_err("Resource in use. ret=%lli ", ret); + + return ret; +} + +u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + 5, /* r5 */ + vaddr, /* r6 */ + length, /* r7 */ + (((u64)access_ctrl) << 32ULL), /* r8 */ + pd.value, /* r9 */ + 0, 0, 0); + outparms->handle.handle = outs[0]; + outparms->lkey = (u32)outs[2]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count) +{ + u64 ret; + + if (unlikely(ehca_debug_level >= 3)) { + if (count > 1) { + u64 *kpage; + int i; + kpage = __va(logical_address_of_page); + for (i = 0; i < count; i++) + ehca_gen_dbg("kpage[%d]=%p", + i, (void *)kpage[i]); + } else + ehca_gen_dbg("kpage=%p", + (void *)logical_address_of_page); + } + + if ((count > 1) && (logical_address_of_page & (EHCA_PAGESIZE-1))) { + ehca_gen_err("logical_address_of_page not on a 4k boundary " + "adapter_handle=%llx mr=%p mr_handle=%llx " + "pagesize=%x queue_type=%x " + "logical_address_of_page=%llx count=%llx", + adapter_handle.handle, mr, + mr->ipz_mr_handle.handle, pagesize, queue_type, + logical_address_of_page, count); + ret = H_PARAMETER; + } else + ret = hipz_h_register_rpage(adapter_handle, pagesize, + queue_type, + mr->ipz_mr_handle.handle, + logical_address_of_page, count); + return ret; +} + +u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_QUERY_MR, outs, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + 0, 0, 0, 0, 0, 0, 0); + outparms->len = outs[0]; + outparms->vaddr = outs[1]; + outparms->acl = outs[4] >> 32; + outparms->lkey = (u32)(outs[5] >> 32); + outparms->rkey = (u32)(outs[5] & (0xffffffff)); + + return ret; +} + +u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr) +{ + return ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); +} + +u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr_in, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + const u64 mr_addr_cb, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_REREGISTER_PMR, outs, + adapter_handle.handle, /* r4 */ + mr->ipz_mr_handle.handle, /* r5 */ + vaddr_in, /* r6 */ + length, /* r7 */ + /* r8 */ + ((((u64)access_ctrl) << 32ULL) | pd.value), + mr_addr_cb, /* r9 */ + 0, 0, 0); + outparms->vaddr = outs[1]; + outparms->lkey = (u32)outs[2]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const struct ehca_mr *orig_mr, + const u64 vaddr_in, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_REGISTER_SMR, outs, + adapter_handle.handle, /* r4 */ + orig_mr->ipz_mr_handle.handle, /* r5 */ + vaddr_in, /* r6 */ + (((u64)access_ctrl) << 32ULL), /* r7 */ + pd.value, /* r8 */ + 0, 0, 0, 0); + outparms->handle.handle = outs[0]; + outparms->lkey = (u32)outs[2]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + const struct ipz_pd pd, + struct ehca_mw_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs, + adapter_handle.handle, /* r4 */ + 6, /* r5 */ + pd.value, /* r6 */ + 0, 0, 0, 0, 0, 0); + outparms->handle.handle = outs[0]; + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + struct ehca_mw_hipzout_parms *outparms) +{ + u64 ret; + unsigned long outs[PLPAR_HCALL9_BUFSIZE]; + + ret = ehca_plpar_hcall9(H_QUERY_MW, outs, + adapter_handle.handle, /* r4 */ + mw->ipz_mw_handle.handle, /* r5 */ + 0, 0, 0, 0, 0, 0, 0); + outparms->rkey = (u32)outs[3]; + + return ret; +} + +u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw) +{ + return ehca_plpar_hcall_norets(H_FREE_RESOURCE, + adapter_handle.handle, /* r4 */ + mw->ipz_mw_handle.handle, /* r5 */ + 0, 0, 0, 0, 0); +} + +u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, + const u64 ressource_handle, + void *rblock, + unsigned long *byte_count) +{ + u64 r_cb = __pa(rblock); + + if (r_cb & (EHCA_PAGESIZE-1)) { + ehca_gen_err("rblock not page aligned."); + return H_PARAMETER; + } + + return ehca_plpar_hcall_norets(H_ERROR_DATA, + adapter_handle.handle, + ressource_handle, + r_cb, + 0, 0, 0, 0); +} + +u64 hipz_h_eoi(int irq) +{ + unsigned long xirr; + + iosync(); + xirr = (0xffULL << 24) | irq; + + return plpar_hcall_norets(H_EOI, xirr); +} diff --git a/drivers/staging/rdma/ehca/hcp_if.h b/drivers/staging/rdma/ehca/hcp_if.h new file mode 100644 index 000000000000..a46e514c367b --- /dev/null +++ b/drivers/staging/rdma/ehca/hcp_if.h @@ -0,0 +1,265 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware Infiniband Interface code for POWER + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Gerd Bayer + * Waleri Fomin + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HCP_IF_H__ +#define __HCP_IF_H__ + +#include "ehca_classes.h" +#include "ehca_tools.h" +#include "hipz_hw.h" + +/* + * hipz_h_alloc_resource_eq allocates EQ resources in HW and FW, initialize + * resources, create the empty EQPT (ring). + */ +u64 hipz_h_alloc_resource_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_pfeq *pfeq, + const u32 neq_control, + const u32 number_of_entries, + struct ipz_eq_handle *eq_handle, + u32 * act_nr_of_entries, + u32 * act_pages, + u32 * eq_ist); + +u64 hipz_h_reset_event(const struct ipz_adapter_handle adapter_handle, + struct ipz_eq_handle eq_handle, + const u64 event_mask); +/* + * hipz_h_allocate_resource_cq allocates CQ resources in HW and FW, initialize + * resources, create the empty CQPT (ring). + */ +u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + struct ehca_alloc_cq_parms *param); + + +/* + * hipz_h_alloc_resource_qp allocates QP resources in HW and FW, + * initialize resources, create empty QPPTs (2 rings). + */ +u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_alloc_qp_parms *parms, int is_user); + +u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, + struct hipz_query_port *query_port_response_block); + +u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle, + const u8 port_id, const u32 port_cap, + const u8 init_type, const int modify_mask); + +u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, + struct hipz_query_hca *query_hca_rblock); + +/* + * hipz_h_register_rpage internal function in hcp_if.h for all + * hcp_H_REGISTER_RPAGE calls. + */ +u64 hipz_h_register_rpage(const struct ipz_adapter_handle adapter_handle, + const u8 pagesize, + const u8 queue_type, + const u64 resource_handle, + const u64 logical_address_of_page, + u64 count); + +u64 hipz_h_register_rpage_eq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_eq_handle eq_handle, + struct ehca_pfeq *pfeq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count); + +u64 hipz_h_query_int_state(const struct ipz_adapter_handle + hcp_adapter_handle, + u32 ist); + +u64 hipz_h_register_rpage_cq(const struct ipz_adapter_handle adapter_handle, + const struct ipz_cq_handle cq_handle, + struct ehca_pfcq *pfcq, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa gal); + +u64 hipz_h_register_rpage_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count, + const struct h_galpa galpa); + +u64 hipz_h_disable_and_get_wqe(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + void **log_addr_next_sq_wqe_tb_processed, + void **log_addr_next_rq_wqe_tb_processed, + int dis_and_get_function_code); +enum hcall_sigt { + HCALL_SIGT_NO_CQE = 0, + HCALL_SIGT_BY_WQE = 1, + HCALL_SIGT_EVERY = 2 +}; + +u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + const u64 update_mask, + struct hcp_modify_qp_control_block *mqpcb, + struct h_galpa gal); + +u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct ehca_pfqp *pfqp, + struct hcp_modify_qp_control_block *qqpcb, + struct h_galpa gal); + +u64 hipz_h_destroy_qp(const struct ipz_adapter_handle adapter_handle, + struct ehca_qp *qp); + +u64 hipz_h_define_aqp0(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port); + +u64 hipz_h_define_aqp1(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u32 port, u32 * pma_qp_nr, + u32 * bma_qp_nr); + +u64 hipz_h_attach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id); + +u64 hipz_h_detach_mcqp(const struct ipz_adapter_handle adapter_handle, + const struct ipz_qp_handle qp_handle, + struct h_galpa gal, + u16 mcg_dlid, + u64 subnet_prefix, u64 interface_id); + +u64 hipz_h_destroy_cq(const struct ipz_adapter_handle adapter_handle, + struct ehca_cq *cq, + u8 force_flag); + +u64 hipz_h_destroy_eq(const struct ipz_adapter_handle adapter_handle, + struct ehca_eq *eq); + +/* + * hipz_h_alloc_resource_mr allocates MR resources in HW and FW, initialize + * resources. + */ +u64 hipz_h_alloc_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_register_rpage_mr registers MR resource pages in HW and FW */ +u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u8 pagesize, + const u8 queue_type, + const u64 logical_address_of_page, + const u64 count); + +/* hipz_h_query_mr queries MR in HW and FW */ +u64 hipz_h_query_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_free_resource_mr frees MR resources in HW and FW */ +u64 hipz_h_free_resource_mr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr); + +/* hipz_h_reregister_pmr reregisters MR in HW and FW */ +u64 hipz_h_reregister_pmr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const u64 vaddr_in, + const u64 length, + const u32 access_ctrl, + const struct ipz_pd pd, + const u64 mr_addr_cb, + struct ehca_mr_hipzout_parms *outparms); + +/* hipz_h_register_smr register shared MR in HW and FW */ +u64 hipz_h_register_smr(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mr *mr, + const struct ehca_mr *orig_mr, + const u64 vaddr_in, + const u32 access_ctrl, + const struct ipz_pd pd, + struct ehca_mr_hipzout_parms *outparms); + +/* + * hipz_h_alloc_resource_mw allocates MW resources in HW and FW, initialize + * resources. + */ +u64 hipz_h_alloc_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + const struct ipz_pd pd, + struct ehca_mw_hipzout_parms *outparms); + +/* hipz_h_query_mw queries MW in HW and FW */ +u64 hipz_h_query_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw, + struct ehca_mw_hipzout_parms *outparms); + +/* hipz_h_free_resource_mw frees MW resources in HW and FW */ +u64 hipz_h_free_resource_mw(const struct ipz_adapter_handle adapter_handle, + const struct ehca_mw *mw); + +u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle, + const u64 ressource_handle, + void *rblock, + unsigned long *byte_count); +u64 hipz_h_eoi(int irq); + +#endif /* __HCP_IF_H__ */ diff --git a/drivers/staging/rdma/ehca/hcp_phyp.c b/drivers/staging/rdma/ehca/hcp_phyp.c new file mode 100644 index 000000000000..077376ff3d28 --- /dev/null +++ b/drivers/staging/rdma/ehca/hcp_phyp.c @@ -0,0 +1,82 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * load store abstraction for ehca register access with tracing + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "ehca_classes.h" +#include "hipz_hw.h" + +u64 hcall_map_page(u64 physaddr) +{ + return (u64)ioremap(physaddr, EHCA_PAGESIZE); +} + +int hcall_unmap_page(u64 mapaddr) +{ + iounmap((volatile void __iomem *) mapaddr); + return 0; +} + +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, + u64 paddr_kernel, u64 paddr_user) +{ + if (!is_user) { + galpas->kernel.fw_handle = hcall_map_page(paddr_kernel); + if (!galpas->kernel.fw_handle) + return -ENOMEM; + } else + galpas->kernel.fw_handle = 0; + + galpas->user.fw_handle = paddr_user; + + return 0; +} + +int hcp_galpas_dtor(struct h_galpas *galpas) +{ + if (galpas->kernel.fw_handle) { + int ret = hcall_unmap_page(galpas->kernel.fw_handle); + if (ret) + return ret; + } + + galpas->user.fw_handle = galpas->kernel.fw_handle = 0; + + return 0; +} diff --git a/drivers/staging/rdma/ehca/hcp_phyp.h b/drivers/staging/rdma/ehca/hcp_phyp.h new file mode 100644 index 000000000000..d1b029910249 --- /dev/null +++ b/drivers/staging/rdma/ehca/hcp_phyp.h @@ -0,0 +1,90 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * Firmware calls + * + * Authors: Christoph Raisch + * Hoang-Nam Nguyen + * Waleri Fomin + * Gerd Bayer + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HCP_PHYP_H__ +#define __HCP_PHYP_H__ + + +/* + * eHCA page (mapped into memory) + * resource to access eHCA register pages in CPU address space +*/ +struct h_galpa { + u64 fw_handle; + /* for pSeries this is a 64bit memory address where + I/O memory is mapped into CPU address space (kv) */ +}; + +/* + * resource to access eHCA address space registers, all types + */ +struct h_galpas { + u32 pid; /*PID of userspace galpa checking */ + struct h_galpa user; /* user space accessible resource, + set to 0 if unused */ + struct h_galpa kernel; /* kernel space accessible resource, + set to 0 if unused */ +}; + +static inline u64 hipz_galpa_load(struct h_galpa galpa, u32 offset) +{ + u64 addr = galpa.fw_handle + offset; + return *(volatile u64 __force *)addr; +} + +static inline void hipz_galpa_store(struct h_galpa galpa, u32 offset, u64 value) +{ + u64 addr = galpa.fw_handle + offset; + *(volatile u64 __force *)addr = value; +} + +int hcp_galpas_ctor(struct h_galpas *galpas, int is_user, + u64 paddr_kernel, u64 paddr_user); + +int hcp_galpas_dtor(struct h_galpas *galpas); + +u64 hcall_map_page(u64 physaddr); + +int hcall_unmap_page(u64 mapaddr); + +#endif diff --git a/drivers/staging/rdma/ehca/hipz_fns.h b/drivers/staging/rdma/ehca/hipz_fns.h new file mode 100644 index 000000000000..9dac93d02140 --- /dev/null +++ b/drivers/staging/rdma/ehca/hipz_fns.h @@ -0,0 +1,68 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HW abstraction register functions + * + * Authors: Christoph Raisch + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_FNS_H__ +#define __HIPZ_FNS_H__ + +#include "ehca_classes.h" +#include "hipz_hw.h" + +#include "hipz_fns_core.h" + +#define hipz_galpa_store_eq(gal, offset, value) \ + hipz_galpa_store(gal, EQTEMM_OFFSET(offset), value) + +#define hipz_galpa_load_eq(gal, offset) \ + hipz_galpa_load(gal, EQTEMM_OFFSET(offset)) + +#define hipz_galpa_store_qped(gal, offset, value) \ + hipz_galpa_store(gal, QPEDMM_OFFSET(offset), value) + +#define hipz_galpa_load_qped(gal, offset) \ + hipz_galpa_load(gal, QPEDMM_OFFSET(offset)) + +#define hipz_galpa_store_mrmw(gal, offset, value) \ + hipz_galpa_store(gal, MRMWMM_OFFSET(offset), value) + +#define hipz_galpa_load_mrmw(gal, offset) \ + hipz_galpa_load(gal, MRMWMM_OFFSET(offset)) + +#endif diff --git a/drivers/staging/rdma/ehca/hipz_fns_core.h b/drivers/staging/rdma/ehca/hipz_fns_core.h new file mode 100644 index 000000000000..868735fd3187 --- /dev/null +++ b/drivers/staging/rdma/ehca/hipz_fns_core.h @@ -0,0 +1,100 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * HW abstraction register functions + * + * Authors: Christoph Raisch + * Heiko J Schick + * Hoang-Nam Nguyen + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_FNS_CORE_H__ +#define __HIPZ_FNS_CORE_H__ + +#include "hcp_phyp.h" +#include "hipz_hw.h" + +#define hipz_galpa_store_cq(gal, offset, value) \ + hipz_galpa_store(gal, CQTEMM_OFFSET(offset), value) + +#define hipz_galpa_load_cq(gal, offset) \ + hipz_galpa_load(gal, CQTEMM_OFFSET(offset)) + +#define hipz_galpa_store_qp(gal, offset, value) \ + hipz_galpa_store(gal, QPTEMM_OFFSET(offset), value) +#define hipz_galpa_load_qp(gal, offset) \ + hipz_galpa_load(gal, QPTEMM_OFFSET(offset)) + +static inline void hipz_update_sqa(struct ehca_qp *qp, u16 nr_wqes) +{ + /* ringing doorbell :-) */ + hipz_galpa_store_qp(qp->galpas.kernel, qpx_sqa, + EHCA_BMASK_SET(QPX_SQADDER, nr_wqes)); +} + +static inline void hipz_update_rqa(struct ehca_qp *qp, u16 nr_wqes) +{ + /* ringing doorbell :-) */ + hipz_galpa_store_qp(qp->galpas.kernel, qpx_rqa, + EHCA_BMASK_SET(QPX_RQADDER, nr_wqes)); +} + +static inline void hipz_update_feca(struct ehca_cq *cq, u32 nr_cqes) +{ + hipz_galpa_store_cq(cq->galpas.kernel, cqx_feca, + EHCA_BMASK_SET(CQX_FECADDER, nr_cqes)); +} + +static inline void hipz_set_cqx_n0(struct ehca_cq *cq, u32 value) +{ + u64 cqx_n0_reg; + + hipz_galpa_store_cq(cq->galpas.kernel, cqx_n0, + EHCA_BMASK_SET(CQX_N0_GENERATE_SOLICITED_COMP_EVENT, + value)); + cqx_n0_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n0); +} + +static inline void hipz_set_cqx_n1(struct ehca_cq *cq, u32 value) +{ + u64 cqx_n1_reg; + + hipz_galpa_store_cq(cq->galpas.kernel, cqx_n1, + EHCA_BMASK_SET(CQX_N1_GENERATE_COMP_EVENT, value)); + cqx_n1_reg = hipz_galpa_load_cq(cq->galpas.kernel, cqx_n1); +} + +#endif /* __HIPZ_FNC_CORE_H__ */ diff --git a/drivers/staging/rdma/ehca/hipz_hw.h b/drivers/staging/rdma/ehca/hipz_hw.h new file mode 100644 index 000000000000..bf996c7acc42 --- /dev/null +++ b/drivers/staging/rdma/ehca/hipz_hw.h @@ -0,0 +1,414 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * eHCA register definitions + * + * Authors: Waleri Fomin + * Christoph Raisch + * Reinhard Ernst + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __HIPZ_HW_H__ +#define __HIPZ_HW_H__ + +#include "ehca_tools.h" + +#define EHCA_MAX_MTU 4 + +/* QP Table Entry Memory Map */ +struct hipz_qptemm { + u64 qpx_hcr; + u64 qpx_c; + u64 qpx_herr; + u64 qpx_aer; +/* 0x20*/ + u64 qpx_sqa; + u64 qpx_sqc; + u64 qpx_rqa; + u64 qpx_rqc; +/* 0x40*/ + u64 qpx_st; + u64 qpx_pmstate; + u64 qpx_pmfa; + u64 qpx_pkey; +/* 0x60*/ + u64 qpx_pkeya; + u64 qpx_pkeyb; + u64 qpx_pkeyc; + u64 qpx_pkeyd; +/* 0x80*/ + u64 qpx_qkey; + u64 qpx_dqp; + u64 qpx_dlidp; + u64 qpx_portp; +/* 0xa0*/ + u64 qpx_slidp; + u64 qpx_slidpp; + u64 qpx_dlida; + u64 qpx_porta; +/* 0xc0*/ + u64 qpx_slida; + u64 qpx_slidpa; + u64 qpx_slvl; + u64 qpx_ipd; +/* 0xe0*/ + u64 qpx_mtu; + u64 qpx_lato; + u64 qpx_rlimit; + u64 qpx_rnrlimit; +/* 0x100*/ + u64 qpx_t; + u64 qpx_sqhp; + u64 qpx_sqptp; + u64 qpx_nspsn; +/* 0x120*/ + u64 qpx_nspsnhwm; + u64 reserved1; + u64 qpx_sdsi; + u64 qpx_sdsbc; +/* 0x140*/ + u64 qpx_sqwsize; + u64 qpx_sqwts; + u64 qpx_lsn; + u64 qpx_nssn; +/* 0x160 */ + u64 qpx_mor; + u64 qpx_cor; + u64 qpx_sqsize; + u64 qpx_erc; +/* 0x180*/ + u64 qpx_rnrrc; + u64 qpx_ernrwt; + u64 qpx_rnrresp; + u64 qpx_lmsna; +/* 0x1a0 */ + u64 qpx_sqhpc; + u64 qpx_sqcptp; + u64 qpx_sigt; + u64 qpx_wqecnt; +/* 0x1c0*/ + u64 qpx_rqhp; + u64 qpx_rqptp; + u64 qpx_rqsize; + u64 qpx_nrr; +/* 0x1e0*/ + u64 qpx_rdmac; + u64 qpx_nrpsn; + u64 qpx_lapsn; + u64 qpx_lcr; +/* 0x200*/ + u64 qpx_rwc; + u64 qpx_rwva; + u64 qpx_rdsi; + u64 qpx_rdsbc; +/* 0x220*/ + u64 qpx_rqwsize; + u64 qpx_crmsn; + u64 qpx_rdd; + u64 qpx_larpsn; +/* 0x240*/ + u64 qpx_pd; + u64 qpx_scqn; + u64 qpx_rcqn; + u64 qpx_aeqn; +/* 0x260*/ + u64 qpx_aaelog; + u64 qpx_ram; + u64 qpx_rdmaqe0; + u64 qpx_rdmaqe1; +/* 0x280*/ + u64 qpx_rdmaqe2; + u64 qpx_rdmaqe3; + u64 qpx_nrpsnhwm; +/* 0x298*/ + u64 reserved[(0x400 - 0x298) / 8]; +/* 0x400 extended data */ + u64 reserved_ext[(0x500 - 0x400) / 8]; +/* 0x500 */ + u64 reserved2[(0x1000 - 0x500) / 8]; +/* 0x1000 */ +}; + +#define QPX_SQADDER EHCA_BMASK_IBM(48, 63) +#define QPX_RQADDER EHCA_BMASK_IBM(48, 63) +#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3, 3) + +#define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm, x) + +/* MRMWPT Entry Memory Map */ +struct hipz_mrmwmm { + /* 0x00 */ + u64 mrx_hcr; + + u64 mrx_c; + u64 mrx_herr; + u64 mrx_aer; + /* 0x20 */ + u64 mrx_pp; + u64 reserved1; + u64 reserved2; + u64 reserved3; + /* 0x40 */ + u64 reserved4[(0x200 - 0x40) / 8]; + /* 0x200 */ + u64 mrx_ctl[64]; + +}; + +#define MRMWMM_OFFSET(x) offsetof(struct hipz_mrmwmm, x) + +struct hipz_qpedmm { + /* 0x00 */ + u64 reserved0[(0x400) / 8]; + /* 0x400 */ + u64 qpedx_phh; + u64 qpedx_ppsgp; + /* 0x410 */ + u64 qpedx_ppsgu; + u64 qpedx_ppdgp; + /* 0x420 */ + u64 qpedx_ppdgu; + u64 qpedx_aph; + /* 0x430 */ + u64 qpedx_apsgp; + u64 qpedx_apsgu; + /* 0x440 */ + u64 qpedx_apdgp; + u64 qpedx_apdgu; + /* 0x450 */ + u64 qpedx_apav; + u64 qpedx_apsav; + /* 0x460 */ + u64 qpedx_hcr; + u64 reserved1[4]; + /* 0x488 */ + u64 qpedx_rrl0; + /* 0x490 */ + u64 qpedx_rrrkey0; + u64 qpedx_rrva0; + /* 0x4a0 */ + u64 reserved2; + u64 qpedx_rrl1; + /* 0x4b0 */ + u64 qpedx_rrrkey1; + u64 qpedx_rrva1; + /* 0x4c0 */ + u64 reserved3; + u64 qpedx_rrl2; + /* 0x4d0 */ + u64 qpedx_rrrkey2; + u64 qpedx_rrva2; + /* 0x4e0 */ + u64 reserved4; + u64 qpedx_rrl3; + /* 0x4f0 */ + u64 qpedx_rrrkey3; + u64 qpedx_rrva3; +}; + +#define QPEDMM_OFFSET(x) offsetof(struct hipz_qpedmm, x) + +/* CQ Table Entry Memory Map */ +struct hipz_cqtemm { + u64 cqx_hcr; + u64 cqx_c; + u64 cqx_herr; + u64 cqx_aer; +/* 0x20 */ + u64 cqx_ptp; + u64 cqx_tp; + u64 cqx_fec; + u64 cqx_feca; +/* 0x40 */ + u64 cqx_ep; + u64 cqx_eq; +/* 0x50 */ + u64 reserved1; + u64 cqx_n0; +/* 0x60 */ + u64 cqx_n1; + u64 reserved2[(0x1000 - 0x60) / 8]; +/* 0x1000 */ +}; + +#define CQX_FEC_CQE_CNT EHCA_BMASK_IBM(32, 63) +#define CQX_FECADDER EHCA_BMASK_IBM(32, 63) +#define CQX_N0_GENERATE_SOLICITED_COMP_EVENT EHCA_BMASK_IBM(0, 0) +#define CQX_N1_GENERATE_COMP_EVENT EHCA_BMASK_IBM(0, 0) + +#define CQTEMM_OFFSET(x) offsetof(struct hipz_cqtemm, x) + +/* EQ Table Entry Memory Map */ +struct hipz_eqtemm { + u64 eqx_hcr; + u64 eqx_c; + + u64 eqx_herr; + u64 eqx_aer; +/* 0x20 */ + u64 eqx_ptp; + u64 eqx_tp; + u64 eqx_ssba; + u64 eqx_psba; + +/* 0x40 */ + u64 eqx_cec; + u64 eqx_meql; + u64 eqx_xisbi; + u64 eqx_xisc; +/* 0x60 */ + u64 eqx_it; + +}; + +#define EQTEMM_OFFSET(x) offsetof(struct hipz_eqtemm, x) + +/* access control defines for MR/MW */ +#define HIPZ_ACCESSCTRL_L_WRITE 0x00800000 +#define HIPZ_ACCESSCTRL_R_WRITE 0x00400000 +#define HIPZ_ACCESSCTRL_R_READ 0x00200000 +#define HIPZ_ACCESSCTRL_R_ATOMIC 0x00100000 +#define HIPZ_ACCESSCTRL_MW_BIND 0x00080000 + +/* query hca response block */ +struct hipz_query_hca { + u32 cur_reliable_dg; + u32 cur_qp; + u32 cur_cq; + u32 cur_eq; + u32 cur_mr; + u32 cur_mw; + u32 cur_ee_context; + u32 cur_mcast_grp; + u32 cur_qp_attached_mcast_grp; + u32 reserved1; + u32 cur_ipv6_qp; + u32 cur_eth_qp; + u32 cur_hp_mr; + u32 reserved2[3]; + u32 max_rd_domain; + u32 max_qp; + u32 max_cq; + u32 max_eq; + u32 max_mr; + u32 max_hp_mr; + u32 max_mw; + u32 max_mrwpte; + u32 max_special_mrwpte; + u32 max_rd_ee_context; + u32 max_mcast_grp; + u32 max_total_mcast_qp_attach; + u32 max_mcast_qp_attach; + u32 max_raw_ipv6_qp; + u32 max_raw_ethy_qp; + u32 internal_clock_frequency; + u32 max_pd; + u32 max_ah; + u32 max_cqe; + u32 max_wqes_wq; + u32 max_partitions; + u32 max_rr_ee_context; + u32 max_rr_qp; + u32 max_rr_hca; + u32 max_act_wqs_ee_context; + u32 max_act_wqs_qp; + u32 max_sge; + u32 max_sge_rd; + u32 memory_page_size_supported; + u64 max_mr_size; + u32 local_ca_ack_delay; + u32 num_ports; + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + u64 node_guid; + u64 hca_cap_indicators; + u32 data_counter_register_size; + u32 max_shared_rq; + u32 max_isns_eq; + u32 max_neq; +} __attribute__ ((packed)); + +#define HCA_CAP_AH_PORT_NR_CHECK EHCA_BMASK_IBM( 0, 0) +#define HCA_CAP_ATOMIC EHCA_BMASK_IBM( 1, 1) +#define HCA_CAP_AUTO_PATH_MIG EHCA_BMASK_IBM( 2, 2) +#define HCA_CAP_BAD_P_KEY_CTR EHCA_BMASK_IBM( 3, 3) +#define HCA_CAP_SQD_RTS_PORT_CHANGE EHCA_BMASK_IBM( 4, 4) +#define HCA_CAP_CUR_QP_STATE_MOD EHCA_BMASK_IBM( 5, 5) +#define HCA_CAP_INIT_TYPE EHCA_BMASK_IBM( 6, 6) +#define HCA_CAP_PORT_ACTIVE_EVENT EHCA_BMASK_IBM( 7, 7) +#define HCA_CAP_Q_KEY_VIOL_CTR EHCA_BMASK_IBM( 8, 8) +#define HCA_CAP_WQE_RESIZE EHCA_BMASK_IBM( 9, 9) +#define HCA_CAP_RAW_PACKET_MCAST EHCA_BMASK_IBM(10, 10) +#define HCA_CAP_SHUTDOWN_PORT EHCA_BMASK_IBM(11, 11) +#define HCA_CAP_RC_LL_QP EHCA_BMASK_IBM(12, 12) +#define HCA_CAP_SRQ EHCA_BMASK_IBM(13, 13) +#define HCA_CAP_UD_LL_QP EHCA_BMASK_IBM(16, 16) +#define HCA_CAP_RESIZE_MR EHCA_BMASK_IBM(17, 17) +#define HCA_CAP_MINI_QP EHCA_BMASK_IBM(18, 18) +#define HCA_CAP_H_ALLOC_RES_SYNC EHCA_BMASK_IBM(19, 19) + +/* query port response block */ +struct hipz_query_port { + u32 state; + u32 bad_pkey_cntr; + u32 lmc; + u32 lid; + u32 subnet_timeout; + u32 qkey_viol_cntr; + u32 sm_sl; + u32 sm_lid; + u32 capability_mask; + u32 init_type_reply; + u32 pkey_tbl_len; + u32 gid_tbl_len; + u64 gid_prefix; + u32 port_nr; + u16 pkey_entries[16]; + u8 reserved1[32]; + u32 trent_size; + u32 trbuf_size; + u64 max_msg_sz; + u32 max_mtu; + u32 vl_cap; + u32 phys_pstate; + u32 phys_state; + u32 phys_speed; + u32 phys_width; + u8 reserved2[1884]; + u64 guid_entries[255]; +} __attribute__ ((packed)); + +#endif diff --git a/drivers/staging/rdma/ehca/ipz_pt_fn.c b/drivers/staging/rdma/ehca/ipz_pt_fn.c new file mode 100644 index 000000000000..7ffc748cb973 --- /dev/null +++ b/drivers/staging/rdma/ehca/ipz_pt_fn.c @@ -0,0 +1,289 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * internal queue handling + * + * Authors: Waleri Fomin + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "ehca_tools.h" +#include "ipz_pt_fn.h" +#include "ehca_classes.h" + +#define PAGES_PER_KPAGE (PAGE_SIZE >> EHCA_PAGESHIFT) + +struct kmem_cache *small_qp_cache; + +void *ipz_qpageit_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + queue->current_q_offset += queue->pagesize; + if (queue->current_q_offset > queue->queue_length) { + queue->current_q_offset -= queue->pagesize; + ret = NULL; + } + if (((u64)ret) % queue->pagesize) { + ehca_gen_err("ERROR!! not at PAGE-Boundary"); + return NULL; + } + return ret; +} + +void *ipz_qeit_eq_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + u64 last_entry_in_q = queue->queue_length - queue->qe_size; + + queue->current_q_offset += queue->qe_size; + if (queue->current_q_offset > last_entry_in_q) { + queue->current_q_offset = 0; + queue->toggle_state = (~queue->toggle_state) & 1; + } + + return ret; +} + +int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset) +{ + int i; + for (i = 0; i < queue->queue_length / queue->pagesize; i++) { + u64 page = __pa(queue->queue_pages[i]); + if (addr >= page && addr < page + queue->pagesize) { + *q_offset = addr - page + i * queue->pagesize; + return 0; + } + } + return -EINVAL; +} + +#if PAGE_SHIFT < EHCA_PAGESHIFT +#error Kernel pages must be at least as large than eHCA pages (4K) ! +#endif + +/* + * allocate pages for queue: + * outer loop allocates whole kernel pages (page aligned) and + * inner loop divides a kernel page into smaller hca queue pages + */ +static int alloc_queue_pages(struct ipz_queue *queue, const u32 nr_of_pages) +{ + int k, f = 0; + u8 *kpage; + + while (f < nr_of_pages) { + kpage = (u8 *)get_zeroed_page(GFP_KERNEL); + if (!kpage) + goto out; + + for (k = 0; k < PAGES_PER_KPAGE && f < nr_of_pages; k++) { + queue->queue_pages[f] = (struct ipz_page *)kpage; + kpage += EHCA_PAGESIZE; + f++; + } + } + return 1; + +out: + for (f = 0; f < nr_of_pages && queue->queue_pages[f]; + f += PAGES_PER_KPAGE) + free_page((unsigned long)(queue->queue_pages)[f]); + return 0; +} + +static int alloc_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) +{ + int order = ilog2(queue->pagesize) - 9; + struct ipz_small_queue_page *page; + unsigned long bit; + + mutex_lock(&pd->lock); + + if (!list_empty(&pd->free[order])) + page = list_entry(pd->free[order].next, + struct ipz_small_queue_page, list); + else { + page = kmem_cache_zalloc(small_qp_cache, GFP_KERNEL); + if (!page) + goto out; + + page->page = get_zeroed_page(GFP_KERNEL); + if (!page->page) { + kmem_cache_free(small_qp_cache, page); + goto out; + } + + list_add(&page->list, &pd->free[order]); + } + + bit = find_first_zero_bit(page->bitmap, IPZ_SPAGE_PER_KPAGE >> order); + __set_bit(bit, page->bitmap); + page->fill++; + + if (page->fill == IPZ_SPAGE_PER_KPAGE >> order) + list_move(&page->list, &pd->full[order]); + + mutex_unlock(&pd->lock); + + queue->queue_pages[0] = (void *)(page->page | (bit << (order + 9))); + queue->small_page = page; + queue->offset = bit << (order + 9); + return 1; + +out: + ehca_err(pd->ib_pd.device, "failed to allocate small queue page"); + mutex_unlock(&pd->lock); + return 0; +} + +static void free_small_queue_page(struct ipz_queue *queue, struct ehca_pd *pd) +{ + int order = ilog2(queue->pagesize) - 9; + struct ipz_small_queue_page *page = queue->small_page; + unsigned long bit; + int free_page = 0; + + bit = ((unsigned long)queue->queue_pages[0] & ~PAGE_MASK) + >> (order + 9); + + mutex_lock(&pd->lock); + + __clear_bit(bit, page->bitmap); + page->fill--; + + if (page->fill == 0) { + list_del(&page->list); + free_page = 1; + } + + if (page->fill == (IPZ_SPAGE_PER_KPAGE >> order) - 1) + /* the page was full until we freed the chunk */ + list_move_tail(&page->list, &pd->free[order]); + + mutex_unlock(&pd->lock); + + if (free_page) { + free_page(page->page); + kmem_cache_free(small_qp_cache, page); + } +} + +int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, + const u32 nr_of_pages, const u32 pagesize, + const u32 qe_size, const u32 nr_of_sg, + int is_small) +{ + if (pagesize > PAGE_SIZE) { + ehca_gen_err("FATAL ERROR: pagesize=%x " + "is greater than kernel page size", pagesize); + return 0; + } + + /* init queue fields */ + queue->queue_length = nr_of_pages * pagesize; + queue->pagesize = pagesize; + queue->qe_size = qe_size; + queue->act_nr_of_sg = nr_of_sg; + queue->current_q_offset = 0; + queue->toggle_state = 1; + queue->small_page = NULL; + + /* allocate queue page pointers */ + queue->queue_pages = kzalloc(nr_of_pages * sizeof(void *), + GFP_KERNEL | __GFP_NOWARN); + if (!queue->queue_pages) { + queue->queue_pages = vzalloc(nr_of_pages * sizeof(void *)); + if (!queue->queue_pages) { + ehca_gen_err("Couldn't allocate queue page list"); + return 0; + } + } + + /* allocate actual queue pages */ + if (is_small) { + if (!alloc_small_queue_page(queue, pd)) + goto ipz_queue_ctor_exit0; + } else + if (!alloc_queue_pages(queue, nr_of_pages)) + goto ipz_queue_ctor_exit0; + + return 1; + +ipz_queue_ctor_exit0: + ehca_gen_err("Couldn't alloc pages queue=%p " + "nr_of_pages=%x", queue, nr_of_pages); + kvfree(queue->queue_pages); + + return 0; +} + +int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue) +{ + int i, nr_pages; + + if (!queue || !queue->queue_pages) { + ehca_gen_dbg("queue or queue_pages is NULL"); + return 0; + } + + if (queue->small_page) + free_small_queue_page(queue, pd); + else { + nr_pages = queue->queue_length / queue->pagesize; + for (i = 0; i < nr_pages; i += PAGES_PER_KPAGE) + free_page((unsigned long)queue->queue_pages[i]); + } + + kvfree(queue->queue_pages); + + return 1; +} + +int ehca_init_small_qp_cache(void) +{ + small_qp_cache = kmem_cache_create("ehca_cache_small_qp", + sizeof(struct ipz_small_queue_page), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!small_qp_cache) + return -ENOMEM; + + return 0; +} + +void ehca_cleanup_small_qp_cache(void) +{ + kmem_cache_destroy(small_qp_cache); +} diff --git a/drivers/staging/rdma/ehca/ipz_pt_fn.h b/drivers/staging/rdma/ehca/ipz_pt_fn.h new file mode 100644 index 000000000000..a801274ea337 --- /dev/null +++ b/drivers/staging/rdma/ehca/ipz_pt_fn.h @@ -0,0 +1,289 @@ +/* + * IBM eServer eHCA Infiniband device driver for Linux on POWER + * + * internal queue handling + * + * Authors: Waleri Fomin + * Reinhard Ernst + * Christoph Raisch + * + * Copyright (c) 2005 IBM Corporation + * + * All rights reserved. + * + * This source code is distributed under a dual license of GPL v2.0 and OpenIB + * BSD. + * + * OpenIB BSD License + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __IPZ_PT_FN_H__ +#define __IPZ_PT_FN_H__ + +#define EHCA_PAGESHIFT 12 +#define EHCA_PAGESIZE 4096UL +#define EHCA_PAGEMASK (~(EHCA_PAGESIZE-1)) +#define EHCA_PT_ENTRIES 512UL + +#include "ehca_tools.h" +#include "ehca_qes.h" + +struct ehca_pd; +struct ipz_small_queue_page; + +extern struct kmem_cache *small_qp_cache; + +/* struct generic ehca page */ +struct ipz_page { + u8 entries[EHCA_PAGESIZE]; +}; + +#define IPZ_SPAGE_PER_KPAGE (PAGE_SIZE / 512) + +struct ipz_small_queue_page { + unsigned long page; + unsigned long bitmap[IPZ_SPAGE_PER_KPAGE / BITS_PER_LONG]; + int fill; + void *mapped_addr; + u32 mmap_count; + struct list_head list; +}; + +/* struct generic queue in linux kernel virtual memory (kv) */ +struct ipz_queue { + u64 current_q_offset; /* current queue entry */ + + struct ipz_page **queue_pages; /* array of pages belonging to queue */ + u32 qe_size; /* queue entry size */ + u32 act_nr_of_sg; + u32 queue_length; /* queue length allocated in bytes */ + u32 pagesize; + u32 toggle_state; /* toggle flag - per page */ + u32 offset; /* save offset within page for small_qp */ + struct ipz_small_queue_page *small_page; +}; + +/* + * return current Queue Entry for a certain q_offset + * returns address (kv) of Queue Entry + */ +static inline void *ipz_qeit_calc(struct ipz_queue *queue, u64 q_offset) +{ + struct ipz_page *current_page; + if (q_offset >= queue->queue_length) + return NULL; + current_page = (queue->queue_pages)[q_offset >> EHCA_PAGESHIFT]; + return ¤t_page->entries[q_offset & (EHCA_PAGESIZE - 1)]; +} + +/* + * return current Queue Entry + * returns address (kv) of Queue Entry + */ +static inline void *ipz_qeit_get(struct ipz_queue *queue) +{ + return ipz_qeit_calc(queue, queue->current_q_offset); +} + +/* + * return current Queue Page , increment Queue Page iterator from + * page to page in struct ipz_queue, last increment will return 0! and + * NOT wrap + * returns address (kv) of Queue Page + * warning don't use in parallel with ipz_QE_get_inc() + */ +void *ipz_qpageit_get_inc(struct ipz_queue *queue); + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * warning don't use in parallel with ipz_qpageit_get_inc() + */ +static inline void *ipz_qeit_get_inc(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + queue->current_q_offset += queue->qe_size; + if (queue->current_q_offset >= queue->queue_length) { + queue->current_q_offset = 0; + /* toggle the valid flag */ + queue->toggle_state = (~queue->toggle_state) & 1; + } + + return ret; +} + +/* + * return a bool indicating whether current Queue Entry is valid + */ +static inline int ipz_qeit_is_valid(struct ipz_queue *queue) +{ + struct ehca_cqe *cqe = ipz_qeit_get(queue); + return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1)); +} + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * returns 0 and does not increment, if wrong valid state + * warning don't use in parallel with ipz_qpageit_get_inc() + */ +static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue) +{ + return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL; +} + +/* + * returns and resets Queue Entry iterator + * returns address (kv) of first Queue Entry + */ +static inline void *ipz_qeit_reset(struct ipz_queue *queue) +{ + queue->current_q_offset = 0; + return ipz_qeit_get(queue); +} + +/* + * return the q_offset corresponding to an absolute address + */ +int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset); + +/* + * return the next queue offset. don't modify the queue. + */ +static inline u64 ipz_queue_advance_offset(struct ipz_queue *queue, u64 offset) +{ + offset += queue->qe_size; + if (offset >= queue->queue_length) offset = 0; + return offset; +} + +/* struct generic page table */ +struct ipz_pt { + u64 entries[EHCA_PT_ENTRIES]; +}; + +/* struct page table for a queue, only to be used in pf */ +struct ipz_qpt { + /* queue page tables (kv), use u64 because we know the element length */ + u64 *qpts; + u32 n_qpts; + u32 n_ptes; /* number of page table entries */ + u64 *current_pte_addr; +}; + +/* + * constructor for a ipz_queue_t, placement new for ipz_queue_t, + * new for all dependent datastructors + * all QP Tables are the same + * flow: + * allocate+pin queue + * see ipz_qpt_ctor() + * returns true if ok, false if out of memory + */ +int ipz_queue_ctor(struct ehca_pd *pd, struct ipz_queue *queue, + const u32 nr_of_pages, const u32 pagesize, + const u32 qe_size, const u32 nr_of_sg, + int is_small); + +/* + * destructor for a ipz_queue_t + * -# free queue + * see ipz_queue_ctor() + * returns true if ok, false if queue was NULL-ptr of free failed + */ +int ipz_queue_dtor(struct ehca_pd *pd, struct ipz_queue *queue); + +/* + * constructor for a ipz_qpt_t, + * placement new for struct ipz_queue, new for all dependent datastructors + * all QP Tables are the same, + * flow: + * -# allocate+pin queue + * -# initialise ptcb + * -# allocate+pin PTs + * -# link PTs to a ring, according to HCA Arch, set bit62 id needed + * -# the ring must have room for exactly nr_of_PTEs + * see ipz_qpt_ctor() + */ +void ipz_qpt_ctor(struct ipz_qpt *qpt, + const u32 nr_of_qes, + const u32 pagesize, + const u32 qe_size, + const u8 lowbyte, const u8 toggle, + u32 * act_nr_of_QEs, u32 * act_nr_of_pages); + +/* + * return current Queue Entry, increment Queue Entry iterator by one + * step in struct ipz_queue, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * warning don't use in parallel with ipz_qpageit_get_inc() + * warning unpredictable results may occur if steps>act_nr_of_queue_entries + * fix EQ page problems + */ +void *ipz_qeit_eq_get_inc(struct ipz_queue *queue); + +/* + * return current Event Queue Entry, increment Queue Entry iterator + * by one step in struct ipz_queue if valid, will wrap in ringbuffer + * returns address (kv) of Queue Entry BEFORE increment + * returns 0 and does not increment, if wrong valid state + * warning don't use in parallel with ipz_queue_QPageit_get_inc() + * warning unpredictable results may occur if steps>act_nr_of_queue_entries + */ +static inline void *ipz_eqit_eq_get_inc_valid(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + u32 qe = *(u8 *)ret; + if ((qe >> 7) != (queue->toggle_state & 1)) + return NULL; + ipz_qeit_eq_get_inc(queue); /* this is a good one */ + return ret; +} + +static inline void *ipz_eqit_eq_peek_valid(struct ipz_queue *queue) +{ + void *ret = ipz_qeit_get(queue); + u32 qe = *(u8 *)ret; + if ((qe >> 7) != (queue->toggle_state & 1)) + return NULL; + return ret; +} + +/* returns address (GX) of first queue entry */ +static inline u64 ipz_qpt_get_firstpage(struct ipz_qpt *qpt) +{ + return be64_to_cpu(qpt->qpts[0]); +} + +/* returns address (kv) of first page of queue page table */ +static inline void *ipz_qpt_get_qpt(struct ipz_qpt *qpt) +{ + return qpt->qpts; +} + +#endif /* __IPZ_PT_FN_H__ */ diff --git a/drivers/staging/rdma/hfi1/chip.c b/drivers/staging/rdma/hfi1/chip.c index 654eafef1d30..aa58e597df06 100644 --- a/drivers/staging/rdma/hfi1/chip.c +++ b/drivers/staging/rdma/hfi1/chip.c @@ -2710,7 +2710,7 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok) if (sleep_ok) { mutex_lock(&ppd->hls_lock); } else { - while (mutex_trylock(&ppd->hls_lock) == EBUSY) + while (!mutex_trylock(&ppd->hls_lock)) udelay(1); } @@ -2758,7 +2758,7 @@ int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok) if (sleep_ok) { mutex_lock(&dd->pport->hls_lock); } else { - while (mutex_trylock(&dd->pport->hls_lock) == EBUSY) + while (!mutex_trylock(&dd->pport->hls_lock)) udelay(1); } diff --git a/drivers/staging/rdma/hfi1/device.c b/drivers/staging/rdma/hfi1/device.c index 07c87a87775f..bc26a5392712 100644 --- a/drivers/staging/rdma/hfi1/device.c +++ b/drivers/staging/rdma/hfi1/device.c @@ -57,11 +57,13 @@ #include "device.h" static struct class *class; +static struct class *user_class; static dev_t hfi1_dev; int hfi1_cdev_init(int minor, const char *name, const struct file_operations *fops, - struct cdev *cdev, struct device **devp) + struct cdev *cdev, struct device **devp, + bool user_accessible) { const dev_t dev = MKDEV(MAJOR(hfi1_dev), minor); struct device *device = NULL; @@ -78,7 +80,11 @@ int hfi1_cdev_init(int minor, const char *name, goto done; } - device = device_create(class, NULL, dev, NULL, "%s", name); + if (user_accessible) + device = device_create(user_class, NULL, dev, NULL, "%s", name); + else + device = device_create(class, NULL, dev, NULL, "%s", name); + if (!IS_ERR(device)) goto done; ret = PTR_ERR(device); @@ -110,6 +116,26 @@ const char *class_name(void) return hfi1_class_name; } +static char *hfi1_devnode(struct device *dev, umode_t *mode) +{ + if (mode) + *mode = 0600; + return kasprintf(GFP_KERNEL, "%s", dev_name(dev)); +} + +static const char *hfi1_class_name_user = "hfi1_user"; +const char *class_name_user(void) +{ + return hfi1_class_name_user; +} + +static char *hfi1_user_devnode(struct device *dev, umode_t *mode) +{ + if (mode) + *mode = 0666; + return kasprintf(GFP_KERNEL, "%s", dev_name(dev)); +} + int __init dev_init(void) { int ret; @@ -125,7 +151,22 @@ int __init dev_init(void) ret = PTR_ERR(class); pr_err("Could not create device class (err %d)\n", -ret); unregister_chrdev_region(hfi1_dev, HFI1_NMINORS); + goto done; } + class->devnode = hfi1_devnode; + + user_class = class_create(THIS_MODULE, class_name_user()); + if (IS_ERR(user_class)) { + ret = PTR_ERR(user_class); + pr_err("Could not create device class for user accessible files (err %d)\n", + -ret); + class_destroy(class); + class = NULL; + user_class = NULL; + unregister_chrdev_region(hfi1_dev, HFI1_NMINORS); + goto done; + } + user_class->devnode = hfi1_user_devnode; done: return ret; @@ -133,10 +174,11 @@ done: void dev_cleanup(void) { - if (class) { - class_destroy(class); - class = NULL; - } + class_destroy(class); + class = NULL; + + class_destroy(user_class); + user_class = NULL; unregister_chrdev_region(hfi1_dev, HFI1_NMINORS); } diff --git a/drivers/staging/rdma/hfi1/device.h b/drivers/staging/rdma/hfi1/device.h index 98caecd3d807..2850ff739d81 100644 --- a/drivers/staging/rdma/hfi1/device.h +++ b/drivers/staging/rdma/hfi1/device.h @@ -52,7 +52,8 @@ int hfi1_cdev_init(int minor, const char *name, const struct file_operations *fops, - struct cdev *cdev, struct device **devp); + struct cdev *cdev, struct device **devp, + bool user_accessible); void hfi1_cdev_cleanup(struct cdev *cdev, struct device **devp); const char *class_name(void); int __init dev_init(void); diff --git a/drivers/staging/rdma/hfi1/diag.c b/drivers/staging/rdma/hfi1/diag.c index 6777d6b659cf..3e8d5ac4c626 100644 --- a/drivers/staging/rdma/hfi1/diag.c +++ b/drivers/staging/rdma/hfi1/diag.c @@ -292,7 +292,7 @@ int hfi1_diag_add(struct hfi1_devdata *dd) if (atomic_inc_return(&diagpkt_count) == 1) { ret = hfi1_cdev_init(HFI1_DIAGPKT_MINOR, name, &diagpkt_file_ops, &diagpkt_cdev, - &diagpkt_device); + &diagpkt_device, false); } return ret; @@ -592,7 +592,8 @@ static int hfi1_snoop_add(struct hfi1_devdata *dd, const char *name) ret = hfi1_cdev_init(HFI1_SNOOP_CAPTURE_BASE + dd->unit, name, &snoop_file_ops, - &dd->hfi1_snoop.cdev, &dd->hfi1_snoop.class_dev); + &dd->hfi1_snoop.cdev, &dd->hfi1_snoop.class_dev, + false); if (ret) { dd_dev_err(dd, "Couldn't create %s device: %d", name, ret); @@ -1012,11 +1013,10 @@ static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) case HFI1_SNOOP_IOCSETLINKSTATE_EXTRA: memset(&link_info, 0, sizeof(link_info)); - ret = copy_from_user(&link_info, + if (copy_from_user(&link_info, (struct hfi1_link_info __user *)arg, - sizeof(link_info)); - if (ret) - break; + sizeof(link_info))) + ret = -EFAULT; value = link_info.port_state; index = link_info.port_number; @@ -1080,9 +1080,10 @@ static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) case HFI1_SNOOP_IOCGETLINKSTATE_EXTRA: if (cmd == HFI1_SNOOP_IOCGETLINKSTATE_EXTRA) { memset(&link_info, 0, sizeof(link_info)); - ret = copy_from_user(&link_info, + if (copy_from_user(&link_info, (struct hfi1_link_info __user *)arg, - sizeof(link_info)); + sizeof(link_info))) + ret = -EFAULT; index = link_info.port_number; } else { ret = __get_user(index, (int __user *) arg); @@ -1114,9 +1115,10 @@ static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) ppd->link_speed_active; link_info.link_width_active = ppd->link_width_active; - ret = copy_to_user( + if (copy_to_user( (struct hfi1_link_info __user *)arg, - &link_info, sizeof(link_info)); + &link_info, sizeof(link_info))) + ret = -EFAULT; } else { ret = __put_user(value, (int __user *)arg); } @@ -1142,10 +1144,9 @@ static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) snoop_dbg("Setting filter"); /* just copy command structure */ argp = (unsigned long *)arg; - ret = copy_from_user(&filter_cmd, (void __user *)argp, - sizeof(filter_cmd)); - if (ret < 0) { - pr_alert("Error copying filter command\n"); + if (copy_from_user(&filter_cmd, (void __user *)argp, + sizeof(filter_cmd))) { + ret = -EFAULT; break; } if (filter_cmd.opcode >= HFI1_MAX_FILTERS) { @@ -1167,12 +1168,11 @@ static long hfi1_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) break; } /* copy remaining data from userspace */ - ret = copy_from_user((u8 *)filter_value, + if (copy_from_user((u8 *)filter_value, (void __user *)filter_cmd.value_ptr, - filter_cmd.length); - if (ret < 0) { + filter_cmd.length)) { kfree(filter_value); - pr_alert("Error copying filter data\n"); + ret = -EFAULT; break; } /* Drain packets first */ diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c index 469861750b76..72d38500d8ce 100644 --- a/drivers/staging/rdma/hfi1/file_ops.c +++ b/drivers/staging/rdma/hfi1/file_ops.c @@ -1181,6 +1181,7 @@ static int get_ctxt_info(struct file *fp, void __user *ubase, __u32 len) struct hfi1_filedata *fd = fp->private_data; int ret = 0; + memset(&cinfo, 0, sizeof(cinfo)); ret = hfi1_get_base_kinfo(uctxt, &cinfo); if (ret < 0) goto done; @@ -2089,14 +2090,16 @@ static int user_add(struct hfi1_devdata *dd) if (atomic_inc_return(&user_count) == 1) { ret = hfi1_cdev_init(0, class_name(), &hfi1_file_ops, - &wildcard_cdev, &wildcard_device); + &wildcard_cdev, &wildcard_device, + true); if (ret) goto done; } snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit); ret = hfi1_cdev_init(dd->unit + 1, name, &hfi1_file_ops, - &dd->user_cdev, &dd->user_device); + &dd->user_cdev, &dd->user_device, + true); if (ret) goto done; @@ -2104,7 +2107,8 @@ static int user_add(struct hfi1_devdata *dd) snprintf(name, sizeof(name), "%s_ui%d", class_name(), dd->unit); ret = hfi1_cdev_init(dd->unit + UI_OFFSET, name, &ui_file_ops, - &dd->ui_cdev, &dd->ui_device); + &dd->ui_cdev, &dd->ui_device, + false); if (ret) goto done; } diff --git a/drivers/staging/rdma/hfi1/mad.c b/drivers/staging/rdma/hfi1/mad.c index 37269eb90c34..b2c1b72d38ce 100644 --- a/drivers/staging/rdma/hfi1/mad.c +++ b/drivers/staging/rdma/hfi1/mad.c @@ -1717,9 +1717,9 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, psi->port_states.portphysstate_portstate = (hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf); psi->link_width_downgrade_tx_active = - ppd->link_width_downgrade_tx_active; + cpu_to_be16(ppd->link_width_downgrade_tx_active); psi->link_width_downgrade_rx_active = - ppd->link_width_downgrade_rx_active; + cpu_to_be16(ppd->link_width_downgrade_rx_active); if (resp_len) *resp_len += sizeof(struct opa_port_state_info); diff --git a/drivers/staging/rdma/hfi1/sdma.c b/drivers/staging/rdma/hfi1/sdma.c index a8c903caecce..aecd1a74741c 100644 --- a/drivers/staging/rdma/hfi1/sdma.c +++ b/drivers/staging/rdma/hfi1/sdma.c @@ -737,7 +737,7 @@ u16 sdma_get_descq_cnt(void) */ if (!is_power_of_2(count)) return SDMA_DESCQ_CNT; - if (count < 64 && count > 32768) + if (count < 64 || count > 32768) return SDMA_DESCQ_CNT; return count; } @@ -1848,7 +1848,7 @@ static void dump_sdma_state(struct sdma_engine *sde) dd_dev_err(sde->dd, "\taidx: %u amode: %u alen: %u\n", (u8)((desc[1] & SDMA_DESC1_HEADER_INDEX_SMASK) - >> SDMA_DESC1_HEADER_INDEX_MASK), + >> SDMA_DESC1_HEADER_INDEX_SHIFT), (u8)((desc[1] & SDMA_DESC1_HEADER_MODE_SMASK) >> SDMA_DESC1_HEADER_MODE_SHIFT), (u8)((desc[1] & SDMA_DESC1_HEADER_DWS_SMASK) @@ -1926,7 +1926,7 @@ void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *sde) if (desc[0] & SDMA_DESC0_FIRST_DESC_FLAG) seq_printf(s, "\t\tahgidx: %u ahgmode: %u\n", (u8)((desc[1] & SDMA_DESC1_HEADER_INDEX_SMASK) - >> SDMA_DESC1_HEADER_INDEX_MASK), + >> SDMA_DESC1_HEADER_INDEX_SHIFT), (u8)((desc[1] & SDMA_DESC1_HEADER_MODE_SMASK) >> SDMA_DESC1_HEADER_MODE_SHIFT)); head = (head + 1) & sde->sdma_mask; diff --git a/drivers/staging/rdma/hfi1/sdma.h b/drivers/staging/rdma/hfi1/sdma.h index 1e613fcd8f4c..496086903891 100644 --- a/drivers/staging/rdma/hfi1/sdma.h +++ b/drivers/staging/rdma/hfi1/sdma.h @@ -109,53 +109,53 @@ /* * Bits defined in the send DMA descriptor. */ -#define SDMA_DESC0_FIRST_DESC_FLAG (1ULL<<63) -#define SDMA_DESC0_LAST_DESC_FLAG (1ULL<<62) +#define SDMA_DESC0_FIRST_DESC_FLAG (1ULL << 63) +#define SDMA_DESC0_LAST_DESC_FLAG (1ULL << 62) #define SDMA_DESC0_BYTE_COUNT_SHIFT 48 #define SDMA_DESC0_BYTE_COUNT_WIDTH 14 #define SDMA_DESC0_BYTE_COUNT_MASK \ - ((1ULL<verbs_txreq_cache, GFP_ATOMIC); - if (!tx) + if (!tx) { /* call slow path to get the lock */ tx = __get_txreq(dev, qp); - if (tx) - tx->qp = qp; + if (IS_ERR(tx)) + return tx; + } + tx->qp = qp; return tx; } diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 7d137a43cc86..9eda69e40678 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -61,8 +61,7 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" enum { VHOST_NET_FEATURES = VHOST_FEATURES | (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | - (1ULL << VIRTIO_NET_F_MRG_RXBUF) | - (1ULL << VIRTIO_F_VERSION_1), + (1ULL << VIRTIO_NET_F_MRG_RXBUF) }; enum { diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index f114a9dbb48f..e25a23692822 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -166,9 +166,7 @@ enum { /* Note: can't set VIRTIO_F_VERSION_1 yet, since that implies ANY_LAYOUT. */ enum { VHOST_SCSI_FEATURES = VHOST_FEATURES | (1ULL << VIRTIO_SCSI_F_HOTPLUG) | - (1ULL << VIRTIO_SCSI_F_T10_PI) | - (1ULL << VIRTIO_F_ANY_LAYOUT) | - (1ULL << VIRTIO_F_VERSION_1) + (1ULL << VIRTIO_SCSI_F_T10_PI) }; #define VHOST_SCSI_MAX_TARGET 256 diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index d9c501eaa6c3..f2882ac98726 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -277,10 +277,13 @@ static long vhost_test_ioctl(struct file *f, unsigned int ioctl, return -EFAULT; return 0; case VHOST_SET_FEATURES: + printk(KERN_ERR "1\n"); if (copy_from_user(&features, featurep, sizeof features)) return -EFAULT; + printk(KERN_ERR "2\n"); if (features & ~VHOST_FEATURES) return -EOPNOTSUPP; + printk(KERN_ERR "3\n"); return vhost_test_set_features(n, features); case VHOST_RESET_OWNER: return vhost_test_reset_owner(n); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index ce6f6da4b09f..4772862b71a7 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -173,7 +173,9 @@ enum { VHOST_FEATURES = (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | (1ULL << VIRTIO_RING_F_EVENT_IDX) | - (1ULL << VHOST_F_LOG_ALL), + (1ULL << VHOST_F_LOG_ALL) | + (1ULL << VIRTIO_F_ANY_LAYOUT) | + (1ULL << VIRTIO_F_VERSION_1) }; static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit) diff --git a/fs/block_dev.c b/fs/block_dev.c index 22ea424ee741..073bb57adab1 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1242,6 +1242,13 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) goto out_clear; } bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); + /* + * If the partition is not aligned on a page + * boundary, we can't do dax I/O to it. + */ + if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) || + (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) + bdev->bd_inode->i_flags &= ~S_DAX; } } else { if (bdev->bd_contains == bdev) { diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 6a1119e87fbb..e739950ca084 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -325,8 +325,11 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) static void cifs_show_security(struct seq_file *s, struct cifs_ses *ses) { - if (ses->sectype == Unspecified) + if (ses->sectype == Unspecified) { + if (ses->user_name == NULL) + seq_puts(s, ",sec=none"); return; + } seq_puts(s, ",sec="); diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index c63f5227b681..28a77bf1d559 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c @@ -67,6 +67,12 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, goto out_drop_write; } + if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) { + rc = -EBADF; + cifs_dbg(VFS, "src file seems to be from a different filesystem type\n"); + goto out_fput; + } + if ((!src_file.file->private_data) || (!dst_file->private_data)) { rc = -EBADF; cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); diff --git a/fs/dax.c b/fs/dax.c index 93bf2f990ace..7ae6df7ea1d2 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -119,7 +119,8 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, size_t len; if (pos == max) { unsigned blkbits = inode->i_blkbits; - sector_t block = pos >> blkbits; + long page = pos >> PAGE_SHIFT; + sector_t block = page << (PAGE_SHIFT - blkbits); unsigned first = pos - (block << blkbits); long size; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 587ac08eabb6..091a36444972 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1481,6 +1481,21 @@ static long writeback_sb_inodes(struct super_block *sb, wbc_detach_inode(&wbc); work->nr_pages -= write_chunk - wbc.nr_to_write; wrote += write_chunk - wbc.nr_to_write; + + if (need_resched()) { + /* + * We're trying to balance between building up a nice + * long list of IOs to improve our merge rate, and + * getting those IOs out quickly for anyone throttling + * in balance_dirty_pages(). cond_resched() doesn't + * unplug, so get our IOs out the door before we + * give up the CPU. + */ + blk_flush_plug(current); + cond_resched(); + } + + spin_lock(&wb->list_lock); spin_lock(&inode->i_lock); if (!(inode->i_state & I_DIRTY_ALL)) @@ -1488,7 +1503,7 @@ static long writeback_sb_inodes(struct super_block *sb, requeue_inode(inode, wb, &wbc); inode_sync_complete(inode); spin_unlock(&inode->i_lock); - cond_resched_lock(&wb->list_lock); + /* * bail out to wb_writeback() often enough to check * background threshold and other termination conditions. diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 634e676072cb..f9aeb40a7197 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1287,8 +1287,10 @@ static struct file *userfaultfd_file_create(int flags) file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx, O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS)); - if (IS_ERR(file)) + if (IS_ERR(file)) { + mmput(ctx->mm); kmem_cache_free(userfaultfd_ctx_cachep, ctx); + } out: return file; } diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index f20f407ce45d..4b4b056a6eb0 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -73,7 +73,7 @@ * Convert a physical address to a Page Frame Number and back */ #define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT)) -#define __pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT) +#define __pfn_to_phys(pfn) PFN_PHYS(pfn) #define page_to_pfn __page_to_pfn #define pfn_to_page __pfn_to_page diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index 83bfb87f5bf1..e2aadbc7151f 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -111,8 +111,8 @@ static inline void queued_spin_unlock_wait(struct qspinlock *lock) cpu_relax(); } -#ifndef virt_queued_spin_lock -static __always_inline bool virt_queued_spin_lock(struct qspinlock *lock) +#ifndef virt_spin_lock +static __always_inline bool virt_spin_lock(struct qspinlock *lock) { return false; } diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index d901f1a47be6..4e14dac282bb 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -35,11 +35,7 @@ #define VGIC_V3_MAX_LRS 16 #define VGIC_MAX_IRQS 1024 #define VGIC_V2_MAX_CPUS 8 - -/* Sanity checks... */ -#if (KVM_MAX_VCPUS > 255) -#error Too many KVM VCPUs, the VGIC only supports up to 255 VCPUs for now -#endif +#define VGIC_V3_MAX_CPUS 255 #if (VGIC_NR_IRQS_LEGACY & 31) #error "VGIC_NR_IRQS must be a multiple of 32" diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 38a5ff772a37..99da9ebc7377 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1368,6 +1368,26 @@ static inline bool bvec_gap_to_prev(struct request_queue *q, ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); } +static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, + struct bio *next) +{ + if (!bio_has_data(prev)) + return false; + + return bvec_gap_to_prev(q, &prev->bi_io_vec[prev->bi_vcnt - 1], + next->bi_io_vec[0].bv_offset); +} + +static inline bool req_gap_back_merge(struct request *req, struct bio *bio) +{ + return bio_will_gap(req->q, req->biotail, bio); +} + +static inline bool req_gap_front_merge(struct request *req, struct bio *bio) +{ + return bio_will_gap(req->q, bio, req->bio); +} + struct work_struct; int kblockd_schedule_work(struct work_struct *work); int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay); @@ -1494,6 +1514,26 @@ queue_max_integrity_segments(struct request_queue *q) return q->limits.max_integrity_segments; } +static inline bool integrity_req_gap_back_merge(struct request *req, + struct bio *next) +{ + struct bio_integrity_payload *bip = bio_integrity(req->bio); + struct bio_integrity_payload *bip_next = bio_integrity(next); + + return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], + bip_next->bip_vec[0].bv_offset); +} + +static inline bool integrity_req_gap_front_merge(struct request *req, + struct bio *bio) +{ + struct bio_integrity_payload *bip = bio_integrity(bio); + struct bio_integrity_payload *bip_next = bio_integrity(req->bio); + + return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1], + bip_next->bip_vec[0].bv_offset); +} + #else /* CONFIG_BLK_DEV_INTEGRITY */ struct bio; @@ -1560,6 +1600,16 @@ static inline bool blk_integrity_is_initialized(struct gendisk *g) { return 0; } +static inline bool integrity_req_gap_back_merge(struct request *req, + struct bio *next) +{ + return false; +} +static inline bool integrity_req_gap_front_merge(struct request *req, + struct bio *bio) +{ + return false; +} #endif /* CONFIG_BLK_DEV_INTEGRITY */ diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 4763ad64e832..f89b31d45cc8 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -107,6 +107,7 @@ static inline u64 ceph_sanitize_features(u64 features) CEPH_FEATURE_OSDMAP_ENC | \ CEPH_FEATURE_CRUSH_TUNABLES3 | \ CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \ + CEPH_FEATURE_MSGR_KEEPALIVE2 | \ CEPH_FEATURE_CRUSH_V4) #define CEPH_FEATURES_REQUIRED_DEFAULT \ diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 7e1252e97a30..b2371d9b51fa 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -238,6 +238,8 @@ struct ceph_connection { bool out_kvec_is_msg; /* kvec refers to out_msg */ int out_more; /* there is more data after the kvecs */ __le64 out_temp_ack; /* for writing an ack */ + struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2 + stamp */ /* message in temps */ struct ceph_msg_header in_hdr; @@ -248,7 +250,7 @@ struct ceph_connection { int in_base_pos; /* bytes read */ __le64 in_temp_ack; /* for reading an ack */ - struct timespec last_keepalive_ack; + struct timespec last_keepalive_ack; /* keepalive2 ack stamp */ struct delayed_work work; /* send|recv work */ unsigned long delay; /* current delay interval */ diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 31ce435981fe..bdcf358dfce2 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -18,15 +18,6 @@ struct clock_event_device; struct module; -/* Clock event mode commands for legacy ->set_mode(): OBSOLETE */ -enum clock_event_mode { - CLOCK_EVT_MODE_UNUSED, - CLOCK_EVT_MODE_SHUTDOWN, - CLOCK_EVT_MODE_PERIODIC, - CLOCK_EVT_MODE_ONESHOT, - CLOCK_EVT_MODE_RESUME, -}; - /* * Possible states of a clock event device. * @@ -86,16 +77,14 @@ enum clock_event_state { * @min_delta_ns: minimum delta value in ns * @mult: nanosecond to cycles multiplier * @shift: nanoseconds to cycles divisor (power of two) - * @mode: operating mode, relevant only to ->set_mode(), OBSOLETE * @state_use_accessors:current state of the device, assigned by the core code * @features: features * @retries: number of forced programming retries - * @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME. - * @set_state_periodic: switch state to periodic, if !set_mode - * @set_state_oneshot: switch state to oneshot, if !set_mode - * @set_state_oneshot_stopped: switch state to oneshot_stopped, if !set_mode - * @set_state_shutdown: switch state to shutdown, if !set_mode - * @tick_resume: resume clkevt device, if !set_mode + * @set_state_periodic: switch state to periodic + * @set_state_oneshot: switch state to oneshot + * @set_state_oneshot_stopped: switch state to oneshot_stopped + * @set_state_shutdown: switch state to shutdown + * @tick_resume: resume clkevt device * @broadcast: function to broadcast events * @min_delta_ticks: minimum delta value in ticks stored for reconfiguration * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration @@ -116,18 +105,10 @@ struct clock_event_device { u64 min_delta_ns; u32 mult; u32 shift; - enum clock_event_mode mode; enum clock_event_state state_use_accessors; unsigned int features; unsigned long retries; - /* - * State transition callback(s): Only one of the two groups should be - * defined: - * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME. - * - set_state_{shutdown|periodic|oneshot|oneshot_stopped}(), tick_resume(). - */ - void (*set_mode)(enum clock_event_mode mode, struct clock_event_device *); int (*set_state_periodic)(struct clock_event_device *); int (*set_state_oneshot)(struct clock_event_device *); int (*set_state_oneshot_stopped)(struct clock_event_device *); diff --git a/include/linux/irq.h b/include/linux/irq.h index 6f8b34066442..11bf09288ddb 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -110,8 +110,8 @@ enum { /* * Return value for chip->irq_set_affinity() * - * IRQ_SET_MASK_OK - OK, core updates irq_data.affinity - * IRQ_SET_MASK_NOCPY - OK, chip did update irq_data.affinity + * IRQ_SET_MASK_OK - OK, core updates irq_common_data.affinity + * IRQ_SET_MASK_NOCPY - OK, chip did update irq_common_data.affinity * IRQ_SET_MASK_OK_DONE - Same as IRQ_SET_MASK_OK for core. Special code to * support stacked irqchips, which indicates skipping * all descendent irqchips. @@ -129,9 +129,19 @@ struct irq_domain; * struct irq_common_data - per irq data shared by all irqchips * @state_use_accessors: status information for irq chip functions. * Use accessor functions to deal with it + * @node: node index useful for balancing + * @handler_data: per-IRQ data for the irq_chip methods + * @affinity: IRQ affinity on SMP + * @msi_desc: MSI descriptor */ struct irq_common_data { unsigned int state_use_accessors; +#ifdef CONFIG_NUMA + unsigned int node; +#endif + void *handler_data; + struct msi_desc *msi_desc; + cpumask_var_t affinity; }; /** @@ -139,38 +149,26 @@ struct irq_common_data { * @mask: precomputed bitmask for accessing the chip registers * @irq: interrupt number * @hwirq: hardware interrupt number, local to the interrupt domain - * @node: node index useful for balancing * @common: point to data shared by all irqchips * @chip: low level interrupt hardware access * @domain: Interrupt translation domain; responsible for mapping * between hwirq number and linux irq number. * @parent_data: pointer to parent struct irq_data to support hierarchy * irq_domain - * @handler_data: per-IRQ data for the irq_chip methods * @chip_data: platform-specific per-chip private data for the chip * methods, to allow shared chip implementations - * @msi_desc: MSI descriptor - * @affinity: IRQ affinity on SMP - * - * The fields here need to overlay the ones in irq_desc until we - * cleaned up the direct references and switched everything over to - * irq_data. */ struct irq_data { u32 mask; unsigned int irq; unsigned long hwirq; - unsigned int node; struct irq_common_data *common; struct irq_chip *chip; struct irq_domain *domain; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY struct irq_data *parent_data; #endif - void *handler_data; void *chip_data; - struct msi_desc *msi_desc; - cpumask_var_t affinity; }; /* @@ -190,6 +188,7 @@ struct irq_data { * IRQD_IRQ_MASKED - Masked state of the interrupt * IRQD_IRQ_INPROGRESS - In progress state of the interrupt * IRQD_WAKEUP_ARMED - Wakeup mode armed + * IRQD_FORWARDED_TO_VCPU - The interrupt is forwarded to a VCPU */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -204,6 +203,7 @@ enum { IRQD_IRQ_MASKED = (1 << 17), IRQD_IRQ_INPROGRESS = (1 << 18), IRQD_WAKEUP_ARMED = (1 << 19), + IRQD_FORWARDED_TO_VCPU = (1 << 20), }; #define __irqd_to_state(d) ((d)->common->state_use_accessors) @@ -282,6 +282,20 @@ static inline bool irqd_is_wakeup_armed(struct irq_data *d) return __irqd_to_state(d) & IRQD_WAKEUP_ARMED; } +static inline bool irqd_is_forwarded_to_vcpu(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_FORWARDED_TO_VCPU; +} + +static inline void irqd_set_forwarded_to_vcpu(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_FORWARDED_TO_VCPU; +} + +static inline void irqd_clr_forwarded_to_vcpu(struct irq_data *d) +{ + __irqd_to_state(d) &= ~IRQD_FORWARDED_TO_VCPU; +} /* * Functions for chained handlers which can be enabled/disabled by the @@ -461,14 +475,14 @@ static inline int irq_set_parent(int irq, int parent_irq) * Built-in IRQ handlers for various IRQ types, * callable via desc->handle_irq() */ -extern void handle_level_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_edge_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_simple_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_percpu_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc); -extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc); +extern void handle_level_irq(struct irq_desc *desc); +extern void handle_fasteoi_irq(struct irq_desc *desc); +extern void handle_edge_irq(struct irq_desc *desc); +extern void handle_edge_eoi_irq(struct irq_desc *desc); +extern void handle_simple_irq(struct irq_desc *desc); +extern void handle_percpu_irq(struct irq_desc *desc); +extern void handle_percpu_devid_irq(struct irq_desc *desc); +extern void handle_bad_irq(struct irq_desc *desc); extern void handle_nested_irq(unsigned int irq); extern int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg); @@ -627,23 +641,23 @@ static inline void *irq_data_get_irq_chip_data(struct irq_data *d) static inline void *irq_get_handler_data(unsigned int irq) { struct irq_data *d = irq_get_irq_data(irq); - return d ? d->handler_data : NULL; + return d ? d->common->handler_data : NULL; } static inline void *irq_data_get_irq_handler_data(struct irq_data *d) { - return d->handler_data; + return d->common->handler_data; } static inline struct msi_desc *irq_get_msi_desc(unsigned int irq) { struct irq_data *d = irq_get_irq_data(irq); - return d ? d->msi_desc : NULL; + return d ? d->common->msi_desc : NULL; } static inline struct msi_desc *irq_data_get_msi_desc(struct irq_data *d) { - return d->msi_desc; + return d->common->msi_desc; } static inline u32 irq_get_trigger_type(unsigned int irq) @@ -652,21 +666,30 @@ static inline u32 irq_get_trigger_type(unsigned int irq) return d ? irqd_get_trigger_type(d) : 0; } -static inline int irq_data_get_node(struct irq_data *d) +static inline int irq_common_data_get_node(struct irq_common_data *d) { +#ifdef CONFIG_NUMA return d->node; +#else + return 0; +#endif +} + +static inline int irq_data_get_node(struct irq_data *d) +{ + return irq_common_data_get_node(d->common); } static inline struct cpumask *irq_get_affinity_mask(int irq) { struct irq_data *d = irq_get_irq_data(irq); - return d ? d->affinity : NULL; + return d ? d->common->affinity : NULL; } static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d) { - return d->affinity; + return d->common->affinity; } unsigned int arch_dynirq_lower_bound(unsigned int from); diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 5acfa26602e1..a587a33363c7 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -98,11 +98,7 @@ extern struct irq_desc irq_desc[NR_IRQS]; static inline struct irq_desc *irq_data_to_desc(struct irq_data *data) { -#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY - return irq_to_desc(data->irq); -#else - return container_of(data, struct irq_desc, irq_data); -#endif + return container_of(data->common, struct irq_desc, irq_common_data); } static inline unsigned int irq_desc_get_irq(struct irq_desc *desc) @@ -127,23 +123,21 @@ static inline void *irq_desc_get_chip_data(struct irq_desc *desc) static inline void *irq_desc_get_handler_data(struct irq_desc *desc) { - return desc->irq_data.handler_data; + return desc->irq_common_data.handler_data; } static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc) { - return desc->irq_data.msi_desc; + return desc->irq_common_data.msi_desc; } /* * Architectures call this to let the generic IRQ layer - * handle an interrupt. If the descriptor is attached to an - * irqchip-style controller then we call the ->handle_irq() handler, - * and it calls __do_IRQ() if it's attached to an irqtype-style controller. + * handle an interrupt. */ -static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc) +static inline void generic_handle_irq_desc(struct irq_desc *desc) { - desc->handle_irq(irq, desc); + desc->handle_irq(desc); } int generic_handle_irq(unsigned int irq); @@ -176,29 +170,6 @@ static inline int irq_has_action(unsigned int irq) return irq_desc_has_action(irq_to_desc(irq)); } -/* caller has locked the irq_desc and both params are valid */ -static inline void __irq_set_handler_locked(unsigned int irq, - irq_flow_handler_t handler) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - desc->handle_irq = handler; -} - -/* caller has locked the irq_desc and both params are valid */ -static inline void -__irq_set_chip_handler_name_locked(unsigned int irq, struct irq_chip *chip, - irq_flow_handler_t handler, const char *name) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - irq_desc_get_irq_data(desc)->chip = chip; - desc->handle_irq = handler; - desc->name = name; -} - /** * irq_set_handler_locked - Set irq handler from a locked region * @data: Pointer to the irq_data structure which identifies the irq diff --git a/include/linux/irqhandler.h b/include/linux/irqhandler.h index 62d543004197..661bed0ed1f3 100644 --- a/include/linux/irqhandler.h +++ b/include/linux/irqhandler.h @@ -8,7 +8,7 @@ struct irq_desc; struct irq_data; -typedef void (*irq_flow_handler_t)(unsigned int irq, struct irq_desc *desc); +typedef void (*irq_flow_handler_t)(struct irq_desc *desc); typedef void (*irq_preflow_handler_t)(struct irq_data *data); #endif diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 7f653e8f6690..f1094238ab2a 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -21,8 +21,8 @@ * * DEFINE_STATIC_KEY_TRUE(key); * DEFINE_STATIC_KEY_FALSE(key); - * static_key_likely() - * statick_key_unlikely() + * static_branch_likely() + * static_branch_unlikely() * * Jump labels provide an interface to generate dynamic branches using * self-modifying code. Assuming toolchain and architecture support, if we @@ -45,12 +45,10 @@ * statement, setting the key to true requires us to patch in a jump * to the out-of-line of true branch. * - * In addtion to static_branch_{enable,disable}, we can also reference count + * In addition to static_branch_{enable,disable}, we can also reference count * the key or branch direction via static_branch_{inc,dec}. Thus, * static_branch_inc() can be thought of as a 'make more true' and - * static_branch_dec() as a 'make more false'. The inc()/dec() - * interface is meant to be used exclusively from the inc()/dec() for a given - * key. + * static_branch_dec() as a 'make more false'. * * Since this relies on modifying code, the branch modifying functions * must be considered absolute slow paths (machine wide synchronization etc.). diff --git a/include/linux/tick.h b/include/linux/tick.h index 48d901f83f92..e312219ff823 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -147,11 +147,20 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) cpumask_or(mask, mask, tick_nohz_full_mask); } +static inline int housekeeping_any_cpu(void) +{ + return cpumask_any_and(housekeeping_mask, cpu_online_mask); +} + extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_cpu(int cpu); extern void tick_nohz_full_kick_all(void); extern void __tick_nohz_task_switch(void); #else +static inline int housekeeping_any_cpu(void) +{ + return smp_processor_id(); +} static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h index 391dae1931c0..a0fa975cd1c1 100644 --- a/include/rdma/opa_port_info.h +++ b/include/rdma/opa_port_info.h @@ -294,8 +294,8 @@ struct opa_port_states { struct opa_port_state_info { struct opa_port_states port_states; - u16 link_width_downgrade_tx_active; - u16 link_width_downgrade_rx_active; + __be16 link_width_downgrade_tx_active; + __be16 link_width_downgrade_rx_active; }; struct opa_port_info { diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 6e40a9539763..e28169dd1c36 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -83,7 +83,7 @@ int irq_set_handler_data(unsigned int irq, void *data) if (!desc) return -EINVAL; - desc->irq_data.handler_data = data; + desc->irq_common_data.handler_data = data; irq_put_desc_unlock(desc, flags); return 0; } @@ -105,7 +105,7 @@ int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset, if (!desc) return -EINVAL; - desc->irq_data.msi_desc = entry; + desc->irq_common_data.msi_desc = entry; if (entry && !irq_offset) entry->irq = irq_base; irq_put_desc_unlock(desc, flags); @@ -372,7 +372,6 @@ static bool irq_may_run(struct irq_desc *desc) /** * handle_simple_irq - Simple and software-decoded IRQs. - * @irq: the interrupt number * @desc: the interrupt description structure for this irq * * Simple interrupts are either sent from a demultiplexing interrupt @@ -382,8 +381,7 @@ static bool irq_may_run(struct irq_desc *desc) * Note: The caller is expected to handle the ack, clear, mask and * unmask issues if necessary. */ -void -handle_simple_irq(unsigned int irq, struct irq_desc *desc) +void handle_simple_irq(struct irq_desc *desc) { raw_spin_lock(&desc->lock); @@ -425,7 +423,6 @@ static void cond_unmask_irq(struct irq_desc *desc) /** * handle_level_irq - Level type irq handler - * @irq: the interrupt number * @desc: the interrupt description structure for this irq * * Level type interrupts are active as long as the hardware line has @@ -433,8 +430,7 @@ static void cond_unmask_irq(struct irq_desc *desc) * it after the associated handler has acknowledged the device, so the * interrupt line is back to inactive. */ -void -handle_level_irq(unsigned int irq, struct irq_desc *desc) +void handle_level_irq(struct irq_desc *desc) { raw_spin_lock(&desc->lock); mask_ack_irq(desc); @@ -496,7 +492,6 @@ static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip) /** * handle_fasteoi_irq - irq handler for transparent controllers - * @irq: the interrupt number * @desc: the interrupt description structure for this irq * * Only a single callback will be issued to the chip: an ->eoi() @@ -504,8 +499,7 @@ static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip) * for modern forms of interrupt handlers, which handle the flow * details in hardware, transparently. */ -void -handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) +void handle_fasteoi_irq(struct irq_desc *desc) { struct irq_chip *chip = desc->irq_data.chip; @@ -546,7 +540,6 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_irq); /** * handle_edge_irq - edge type IRQ handler - * @irq: the interrupt number * @desc: the interrupt description structure for this irq * * Interrupt occures on the falling and/or rising edge of a hardware @@ -560,8 +553,7 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_irq); * the handler was running. If all pending interrupts are handled, the * loop is left. */ -void -handle_edge_irq(unsigned int irq, struct irq_desc *desc) +void handle_edge_irq(struct irq_desc *desc) { raw_spin_lock(&desc->lock); @@ -618,13 +610,12 @@ EXPORT_SYMBOL(handle_edge_irq); #ifdef CONFIG_IRQ_EDGE_EOI_HANDLER /** * handle_edge_eoi_irq - edge eoi type IRQ handler - * @irq: the interrupt number * @desc: the interrupt description structure for this irq * * Similar as the above handle_edge_irq, but using eoi and w/o the * mask/unmask logic. */ -void handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc) +void handle_edge_eoi_irq(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); @@ -665,13 +656,11 @@ out_eoi: /** * handle_percpu_irq - Per CPU local irq handler - * @irq: the interrupt number * @desc: the interrupt description structure for this irq * * Per CPU interrupts on SMP machines without locking requirements */ -void -handle_percpu_irq(unsigned int irq, struct irq_desc *desc) +void handle_percpu_irq(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); @@ -688,7 +677,6 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) /** * handle_percpu_devid_irq - Per CPU local irq handler with per cpu dev ids - * @irq: the interrupt number * @desc: the interrupt description structure for this irq * * Per CPU interrupts on SMP machines without locking requirements. Same as @@ -698,11 +686,12 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) * contain the real device id for the cpu on which this handler is * called */ -void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc) +void handle_percpu_devid_irq(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct irqaction *action = desc->action; void *dev_id = raw_cpu_ptr(action->percpu_dev_id); + unsigned int irq = irq_desc_get_irq(desc); irqreturn_t res; kstat_incr_irqs_this_cpu(desc); @@ -796,7 +785,7 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle, return; __irq_do_set_handler(desc, handle, 1, NULL); - desc->irq_data.handler_data = data; + desc->irq_common_data.handler_data = data; irq_put_desc_busunlock(desc, flags); } diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index b6eeea8a80c5..de41a68fc038 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -27,8 +27,10 @@ * * Handles spurious and unhandled IRQ's. It also prints a debugmessage. */ -void handle_bad_irq(unsigned int irq, struct irq_desc *desc) +void handle_bad_irq(struct irq_desc *desc) { + unsigned int irq = irq_desc_get_irq(desc); + print_irq_desc(irq, desc); kstat_incr_irqs_this_cpu(desc); ack_bad_irq(irq); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index eee4b385cffb..5ef0c2dbe930 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -194,7 +194,7 @@ static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc) static inline int irq_desc_get_node(struct irq_desc *desc) { - return irq_data_get_node(&desc->irq_data); + return irq_common_data_get_node(&desc->irq_common_data); } #ifdef CONFIG_PM_SLEEP diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 0a2a4b697bcb..239e2ae2c947 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -38,12 +38,13 @@ static void __init init_irq_default_affinity(void) #ifdef CONFIG_SMP static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) { - if (!zalloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node)) + if (!zalloc_cpumask_var_node(&desc->irq_common_data.affinity, + gfp, node)) return -ENOMEM; #ifdef CONFIG_GENERIC_PENDING_IRQ if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { - free_cpumask_var(desc->irq_data.affinity); + free_cpumask_var(desc->irq_common_data.affinity); return -ENOMEM; } #endif @@ -52,11 +53,13 @@ static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) static void desc_smp_init(struct irq_desc *desc, int node) { - desc->irq_data.node = node; - cpumask_copy(desc->irq_data.affinity, irq_default_affinity); + cpumask_copy(desc->irq_common_data.affinity, irq_default_affinity); #ifdef CONFIG_GENERIC_PENDING_IRQ cpumask_clear(desc->pending_mask); #endif +#ifdef CONFIG_NUMA + desc->irq_common_data.node = node; +#endif } #else @@ -70,12 +73,13 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, { int cpu; + desc->irq_common_data.handler_data = NULL; + desc->irq_common_data.msi_desc = NULL; + desc->irq_data.common = &desc->irq_common_data; desc->irq_data.irq = irq; desc->irq_data.chip = &no_irq_chip; desc->irq_data.chip_data = NULL; - desc->irq_data.handler_data = NULL; - desc->irq_data.msi_desc = NULL; irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS); irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED); desc->handle_irq = handle_bad_irq; @@ -121,7 +125,7 @@ static void free_masks(struct irq_desc *desc) #ifdef CONFIG_GENERIC_PENDING_IRQ free_cpumask_var(desc->pending_mask); #endif - free_cpumask_var(desc->irq_data.affinity); + free_cpumask_var(desc->irq_common_data.affinity); } #else static inline void free_masks(struct irq_desc *desc) { } @@ -343,7 +347,7 @@ int generic_handle_irq(unsigned int irq) if (!desc) return -EINVAL; - generic_handle_irq_desc(irq, desc); + generic_handle_irq_desc(desc); return 0; } EXPORT_SYMBOL_GPL(generic_handle_irq); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 79baaf8a7813..dc9d27c0c158 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -844,7 +844,6 @@ static struct irq_data *irq_domain_insert_irq_data(struct irq_domain *domain, child->parent_data = irq_data; irq_data->irq = child->irq; irq_data->common = child->common; - irq_data->node = child->node; irq_data->domain = domain; } diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index ad1b064f94fe..f9a59f6cabd2 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -192,7 +192,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, switch (ret) { case IRQ_SET_MASK_OK: case IRQ_SET_MASK_OK_DONE: - cpumask_copy(data->affinity, mask); + cpumask_copy(desc->irq_common_data.affinity, mask); case IRQ_SET_MASK_OK_NOCOPY: irq_set_thread_affinity(desc); ret = 0; @@ -304,7 +304,7 @@ static void irq_affinity_notify(struct work_struct *work) if (irq_move_pending(&desc->irq_data)) irq_get_pending(cpumask, desc); else - cpumask_copy(cpumask, desc->irq_data.affinity); + cpumask_copy(cpumask, desc->irq_common_data.affinity); raw_spin_unlock_irqrestore(&desc->lock, flags); notify->notify(notify, cpumask); @@ -375,9 +375,9 @@ static int setup_affinity(struct irq_desc *desc, struct cpumask *mask) * one of the targets is online. */ if (irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) { - if (cpumask_intersects(desc->irq_data.affinity, + if (cpumask_intersects(desc->irq_common_data.affinity, cpu_online_mask)) - set = desc->irq_data.affinity; + set = desc->irq_common_data.affinity; else irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET); } @@ -829,8 +829,8 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) * This code is triggered unconditionally. Check the affinity * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out. */ - if (desc->irq_data.affinity) - cpumask_copy(mask, desc->irq_data.affinity); + if (desc->irq_common_data.affinity) + cpumask_copy(mask, desc->irq_common_data.affinity); else valid = false; raw_spin_unlock_irq(&desc->lock); diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 0e97c142ce40..e3a8c9577ba6 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -39,7 +39,7 @@ static struct proc_dir_entry *root_irq_dir; static int show_irq_affinity(int type, struct seq_file *m, void *v) { struct irq_desc *desc = irq_to_desc((long)m->private); - const struct cpumask *mask = desc->irq_data.affinity; + const struct cpumask *mask = desc->irq_common_data.affinity; #ifdef CONFIG_GENERIC_PENDING_IRQ if (irqd_is_setaffinity_pending(&desc->irq_data)) diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index dd95f44f99b2..b86886beee4f 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -38,7 +38,7 @@ static void resend_irqs(unsigned long arg) clear_bit(irq, irqs_resend); desc = irq_to_desc(irq); local_irq_disable(); - desc->handle_irq(irq, desc); + desc->handle_irq(desc); local_irq_enable(); } } diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 337c8818541d..87e9ce6a63c5 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -289,7 +289,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) if (pv_enabled()) goto queue; - if (virt_queued_spin_lock(lock)) + if (virt_spin_lock(lock)) return; /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3595403921bd..2f9c92884817 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -621,18 +621,21 @@ int get_nohz_timer_target(void) int i, cpu = smp_processor_id(); struct sched_domain *sd; - if (!idle_cpu(cpu)) + if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu)) return cpu; rcu_read_lock(); for_each_domain(cpu, sd) { for_each_cpu(i, sched_domain_span(sd)) { - if (!idle_cpu(i)) { + if (!idle_cpu(i) && is_housekeeping_cpu(cpu)) { cpu = i; goto unlock; } } } + + if (!is_housekeeping_cpu(cpu)) + cpu = housekeeping_any_cpu(); unlock: rcu_read_unlock(); return cpu; @@ -2666,13 +2669,20 @@ unsigned long nr_running(void) /* * Check if only the current task is running on the cpu. + * + * Caution: this function does not check that the caller has disabled + * preemption, thus the result might have a time-of-check-to-time-of-use + * race. The caller is responsible to use it correctly, for example: + * + * - from a non-preemptable section (of course) + * + * - from a thread that is bound to a single CPU + * + * - in a loop with very short iterations (e.g. a polling loop) */ bool single_task_running(void) { - if (cpu_rq(smp_processor_id())->nr_running == 1) - return true; - else - return false; + return raw_rq()->nr_running == 1; } EXPORT_SYMBOL(single_task_running); @@ -5178,24 +5188,47 @@ static void migrate_tasks(struct rq *dead_rq) break; /* - * Ensure rq->lock covers the entire task selection - * until the migration. + * pick_next_task assumes pinned rq->lock. */ lockdep_pin_lock(&rq->lock); next = pick_next_task(rq, &fake_task); BUG_ON(!next); next->sched_class->put_prev_task(rq, next); + /* + * Rules for changing task_struct::cpus_allowed are holding + * both pi_lock and rq->lock, such that holding either + * stabilizes the mask. + * + * Drop rq->lock is not quite as disastrous as it usually is + * because !cpu_active at this point, which means load-balance + * will not interfere. Also, stop-machine. + */ + lockdep_unpin_lock(&rq->lock); + raw_spin_unlock(&rq->lock); + raw_spin_lock(&next->pi_lock); + raw_spin_lock(&rq->lock); + + /* + * Since we're inside stop-machine, _nothing_ should have + * changed the task, WARN if weird stuff happened, because in + * that case the above rq->lock drop is a fail too. + */ + if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) { + raw_spin_unlock(&next->pi_lock); + continue; + } + /* Find suitable destination for @next, with force if needed. */ dest_cpu = select_fallback_rq(dead_rq->cpu, next); - lockdep_unpin_lock(&rq->lock); rq = __migrate_task(rq, next, dest_cpu); if (rq != dead_rq) { raw_spin_unlock(&rq->lock); rq = dead_rq; raw_spin_lock(&rq->lock); } + raw_spin_unlock(&next->pi_lock); } rq->stop = stop; diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 50eb107f1198..a9b76a40319e 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -97,20 +97,6 @@ EXPORT_SYMBOL_GPL(clockevent_delta2ns); static int __clockevents_switch_state(struct clock_event_device *dev, enum clock_event_state state) { - /* Transition with legacy set_mode() callback */ - if (dev->set_mode) { - /* Legacy callback doesn't support new modes */ - if (state > CLOCK_EVT_STATE_ONESHOT) - return -ENOSYS; - /* - * 'clock_event_state' and 'clock_event_mode' have 1-to-1 - * mapping until *_ONESHOT, and so a simple cast will work. - */ - dev->set_mode((enum clock_event_mode)state, dev); - dev->mode = (enum clock_event_mode)state; - return 0; - } - if (dev->features & CLOCK_EVT_FEAT_DUMMY) return 0; @@ -204,12 +190,8 @@ int clockevents_tick_resume(struct clock_event_device *dev) { int ret = 0; - if (dev->set_mode) { - dev->set_mode(CLOCK_EVT_MODE_RESUME, dev); - dev->mode = CLOCK_EVT_MODE_RESUME; - } else if (dev->tick_resume) { + if (dev->tick_resume) ret = dev->tick_resume(dev); - } return ret; } @@ -460,26 +442,6 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu) } EXPORT_SYMBOL_GPL(clockevents_unbind_device); -/* Sanity check of state transition callbacks */ -static int clockevents_sanity_check(struct clock_event_device *dev) -{ - /* Legacy set_mode() callback */ - if (dev->set_mode) { - /* We shouldn't be supporting new modes now */ - WARN_ON(dev->set_state_periodic || dev->set_state_oneshot || - dev->set_state_shutdown || dev->tick_resume || - dev->set_state_oneshot_stopped); - - BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); - return 0; - } - - if (dev->features & CLOCK_EVT_FEAT_DUMMY) - return 0; - - return 0; -} - /** * clockevents_register_device - register a clock event device * @dev: device to register @@ -488,8 +450,6 @@ void clockevents_register_device(struct clock_event_device *dev) { unsigned long flags; - BUG_ON(clockevents_sanity_check(dev)); - /* Initialize state to DETACHED */ clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index d11c55b6ab7d..4fcd99e12aa0 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -398,7 +398,6 @@ void tick_shutdown(unsigned int cpu) * the set mode function! */ clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); - dev->mode = CLOCK_EVT_MODE_UNUSED; clockevents_exchange_device(dev, NULL); dev->event_handler = clockevents_handle_noop; td->evtdev = NULL; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3319e16f31e5..7c7ec4515983 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -290,16 +290,17 @@ static int __init tick_nohz_full_setup(char *str) __setup("nohz_full=", tick_nohz_full_setup); static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, - unsigned long action, - void *hcpu) + unsigned long action, + void *hcpu) { unsigned int cpu = (unsigned long)hcpu; switch (action & ~CPU_TASKS_FROZEN) { case CPU_DOWN_PREPARE: /* - * If we handle the timekeeping duty for full dynticks CPUs, - * we can't safely shutdown that CPU. + * The boot CPU handles housekeeping duty (unbound timers, + * workqueues, timekeeping, ...) on behalf of full dynticks + * CPUs. It must remain online when nohz full is enabled. */ if (tick_nohz_full_running && tick_do_timer_cpu == cpu) return NOTIFY_BAD; @@ -370,6 +371,12 @@ void __init tick_nohz_init(void) cpu_notifier(tick_nohz_cpu_down_callback, 0); pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n", cpumask_pr_args(tick_nohz_full_mask)); + + /* + * We need at least one CPU to handle housekeeping work such + * as timekeeping, unbound timers, workqueues, ... + */ + WARN_ON_ONCE(cpumask_empty(housekeeping_mask)); } #endif diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f6ee2e6b6f5d..3739ac6aa473 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1614,7 +1614,7 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk, negative = (tick_error < 0); /* Sort out the magnitude of the correction */ - tick_error = abs(tick_error); + tick_error = abs64(tick_error); for (adj = 0; tick_error > interval; adj++) tick_error >>= 1; diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 129c96033e46..f75e35b60149 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -225,7 +225,7 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) (unsigned long long) dev->min_delta_ns); SEQ_printf(m, " mult: %u\n", dev->mult); SEQ_printf(m, " shift: %u\n", dev->shift); - SEQ_printf(m, " mode: %d\n", dev->mode); + SEQ_printf(m, " mode: %d\n", clockevent_get_state(dev)); SEQ_printf(m, " next_event: %Ld nsecs\n", (unsigned long long) ktime_to_ns(dev->next_event)); @@ -233,40 +233,34 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) print_name_offset(m, dev->set_next_event); SEQ_printf(m, "\n"); - if (dev->set_mode) { - SEQ_printf(m, " set_mode: "); - print_name_offset(m, dev->set_mode); + if (dev->set_state_shutdown) { + SEQ_printf(m, " shutdown: "); + print_name_offset(m, dev->set_state_shutdown); SEQ_printf(m, "\n"); - } else { - if (dev->set_state_shutdown) { - SEQ_printf(m, " shutdown: "); - print_name_offset(m, dev->set_state_shutdown); - SEQ_printf(m, "\n"); - } + } - if (dev->set_state_periodic) { - SEQ_printf(m, " periodic: "); - print_name_offset(m, dev->set_state_periodic); - SEQ_printf(m, "\n"); - } + if (dev->set_state_periodic) { + SEQ_printf(m, " periodic: "); + print_name_offset(m, dev->set_state_periodic); + SEQ_printf(m, "\n"); + } - if (dev->set_state_oneshot) { - SEQ_printf(m, " oneshot: "); - print_name_offset(m, dev->set_state_oneshot); - SEQ_printf(m, "\n"); - } + if (dev->set_state_oneshot) { + SEQ_printf(m, " oneshot: "); + print_name_offset(m, dev->set_state_oneshot); + SEQ_printf(m, "\n"); + } - if (dev->set_state_oneshot_stopped) { - SEQ_printf(m, " oneshot stopped: "); - print_name_offset(m, dev->set_state_oneshot_stopped); - SEQ_printf(m, "\n"); - } + if (dev->set_state_oneshot_stopped) { + SEQ_printf(m, " oneshot stopped: "); + print_name_offset(m, dev->set_state_oneshot_stopped); + SEQ_printf(m, "\n"); + } - if (dev->tick_resume) { - SEQ_printf(m, " resume: "); - print_name_offset(m, dev->tick_resume); - SEQ_printf(m, "\n"); - } + if (dev->tick_resume) { + SEQ_printf(m, " resume: "); + print_name_offset(m, dev->tick_resume); + SEQ_printf(m, "\n"); } SEQ_printf(m, " event_handler: "); diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 54036ce2e2dd..5939f63d90cd 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -59,7 +59,11 @@ void string_get_size(u64 size, u64 blk_size, const enum string_size_units units, } exp = divisor[units] / (u32)blk_size; - if (size >= exp) { + /* + * size must be strictly greater than exp here to ensure that remainder + * is greater than divisor[units] coming out of the if below. + */ + if (size > exp) { remainder = do_div(size, divisor[units]); remainder *= blk_size; i++; diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 7b28e9cdf1c7..8da211411b57 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -135,12 +135,11 @@ static __always_inline bool memory_is_poisoned_16(unsigned long addr) if (unlikely(*shadow_addr)) { u16 shadow_first_bytes = *(u16 *)shadow_addr; - s8 last_byte = (addr + 15) & KASAN_SHADOW_MASK; if (unlikely(shadow_first_bytes)) return true; - if (likely(!last_byte)) + if (likely(IS_ALIGNED(addr, 8))) return false; return memory_is_poisoned_1(addr + 15); diff --git a/mm/mmap.c b/mm/mmap.c index 971dd2cb77d2..c739d6db7193 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -612,8 +612,6 @@ static unsigned long count_vma_pages_range(struct mm_struct *mm, void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, struct rb_node **rb_link, struct rb_node *rb_parent) { - WARN_ONCE(vma->vm_file && !vma->vm_ops, "missing vma->vm_ops"); - /* Update tracking information for the gap following the new vma. */ if (vma->vm_next) vma_gap_update(vma->vm_next); @@ -1638,12 +1636,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr, */ WARN_ON_ONCE(addr != vma->vm_start); - /* All file mapping must have ->vm_ops set */ - if (!vma->vm_ops) { - static const struct vm_operations_struct dummy_ops = {}; - vma->vm_ops = &dummy_ops; - } - addr = vma->vm_start; vm_flags = vma->vm_flags; } else if (vm_flags & VM_SHARED) { diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 525f454f7531..b9b0e3b5da49 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1353,11 +1353,12 @@ static void prepare_write_keepalive(struct ceph_connection *con) dout("prepare_write_keepalive %p\n", con); con_out_kvec_reset(con); if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { - struct timespec ts = CURRENT_TIME; - struct ceph_timespec ceph_ts; - ceph_encode_timespec(&ceph_ts, &ts); + struct timespec now = CURRENT_TIME; + con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); - con_out_kvec_add(con, sizeof(ceph_ts), &ceph_ts); + ceph_encode_timespec(&con->out_temp_keepalive2, &now); + con_out_kvec_add(con, sizeof(con->out_temp_keepalive2), + &con->out_temp_keepalive2); } else { con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive); } diff --git a/scripts/extract-cert.c b/scripts/extract-cert.c index 10d23ca9f617..6ce5945a0b89 100644 --- a/scripts/extract-cert.c +++ b/scripts/extract-cert.c @@ -1,15 +1,15 @@ /* Extract X.509 certificate in DER form from PKCS#11 or PEM. * - * Copyright © 2014 Red Hat, Inc. All Rights Reserved. - * Copyright © 2015 Intel Corporation. + * Copyright © 2014-2015 Red Hat, Inc. All Rights Reserved. + * Copyright © 2015 Intel Corporation. * * Authors: David Howells * David Woodhouse * * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the licence, or (at your option) any later version. */ #define _GNU_SOURCE #include diff --git a/scripts/sign-file.c b/scripts/sign-file.c index 058bba3103e2..c3899ca4811c 100755 --- a/scripts/sign-file.c +++ b/scripts/sign-file.c @@ -1,12 +1,15 @@ /* Sign a module file using the given key. * - * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) + * Copyright © 2014-2015 Red Hat, Inc. All Rights Reserved. + * Copyright © 2015 Intel Corporation. + * + * Authors: David Howells + * David Woodhouse * * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 + * of the licence, or (at your option) any later version. */ #define _GNU_SOURCE #include diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 73455089feef..03c1652c9a1f 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -401,7 +401,7 @@ static bool verify_new_ex(struct dev_cgroup *dev_cgroup, bool match = false; RCU_LOCKDEP_WARN(!rcu_read_lock_held() && - lockdep_is_held(&devcgroup_mutex), + !lockdep_is_held(&devcgroup_mutex), "device_cgroup:verify_new_ex called without proper synchronization"); if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) { diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index eb51325e8ad9..284a76e04628 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -768,8 +768,8 @@ static int process_exit_event(struct perf_tool *tool, if (!evsel->attr.sample_id_all) { sample->cpu = 0; sample->time = 0; - sample->tid = event->comm.tid; - sample->pid = event->comm.pid; + sample->tid = event->fork.tid; + sample->pid = event->fork.pid; } print_sample_start(sample, thread, evsel); perf_event__fprintf(event, stdout); diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 1aa21c90731b..5b83f56a3b6f 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -34,6 +34,8 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) .disabled = 1, .freq = 1, }; + struct cpu_map *cpus; + struct thread_map *threads; attr.sample_freq = 500; @@ -50,14 +52,19 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) } perf_evlist__add(evlist, evsel); - evlist->cpus = cpu_map__dummy_new(); - evlist->threads = thread_map__new_by_tid(getpid()); - if (!evlist->cpus || !evlist->threads) { + cpus = cpu_map__dummy_new(); + threads = thread_map__new_by_tid(getpid()); + if (!cpus || !threads) { err = -ENOMEM; pr_debug("Not enough memory to create thread/cpu maps\n"); - goto out_delete_evlist; + goto out_free_maps; } + perf_evlist__set_maps(evlist, cpus, threads); + + cpus = NULL; + threads = NULL; + if (perf_evlist__open(evlist)) { const char *knob = "/proc/sys/kernel/perf_event_max_sample_rate"; @@ -107,6 +114,9 @@ next_event: err = -1; } +out_free_maps: + cpu_map__put(cpus); + thread_map__put(threads); out_delete_evlist: perf_evlist__delete(evlist); return err; diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 3a8fedef83bc..add16385f13e 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -43,6 +43,8 @@ int test__task_exit(void) }; const char *argv[] = { "true", NULL }; char sbuf[STRERR_BUFSIZE]; + struct cpu_map *cpus; + struct thread_map *threads; signal(SIGCHLD, sig_handler); @@ -58,14 +60,19 @@ int test__task_exit(void) * perf_evlist__prepare_workload we'll fill in the only thread * we're monitoring, the one forked there. */ - evlist->cpus = cpu_map__dummy_new(); - evlist->threads = thread_map__new_by_tid(-1); - if (!evlist->cpus || !evlist->threads) { + cpus = cpu_map__dummy_new(); + threads = thread_map__new_by_tid(-1); + if (!cpus || !threads) { err = -ENOMEM; pr_debug("Not enough memory to create thread/cpu maps\n"); - goto out_delete_evlist; + goto out_free_maps; } + perf_evlist__set_maps(evlist, cpus, threads); + + cpus = NULL; + threads = NULL; + err = perf_evlist__prepare_workload(evlist, &target, argv, false, workload_exec_failed_signal); if (err < 0) { @@ -114,6 +121,9 @@ retry: err = -1; } +out_free_maps: + cpu_map__put(cpus); + thread_map__put(threads); out_delete_evlist: perf_evlist__delete(evlist); return err; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index cf86f2d3a5e7..c04c60d4863c 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1968,7 +1968,8 @@ skip_annotation: &options[nr_options], dso); nr_options += add_map_opt(browser, &actions[nr_options], &options[nr_options], - browser->selection->map); + browser->selection ? + browser->selection->map : NULL); /* perf script support */ if (browser->he_selection) { @@ -1976,6 +1977,15 @@ skip_annotation: &actions[nr_options], &options[nr_options], thread, NULL); + /* + * Note that browser->selection != NULL + * when browser->he_selection is not NULL, + * so we don't need to check browser->selection + * before fetching browser->selection->sym like what + * we do before fetching browser->selection->map. + * + * See hist_browser__show_entry. + */ nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d51a5200c8af..c8fc8a258f42 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -124,6 +124,33 @@ void perf_evlist__delete(struct perf_evlist *evlist) free(evlist); } +static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, + struct perf_evsel *evsel) +{ + /* + * We already have cpus for evsel (via PMU sysfs) so + * keep it, if there's no target cpu list defined. + */ + if (!evsel->own_cpus || evlist->has_user_cpus) { + cpu_map__put(evsel->cpus); + evsel->cpus = cpu_map__get(evlist->cpus); + } else if (evsel->cpus != evsel->own_cpus) { + cpu_map__put(evsel->cpus); + evsel->cpus = cpu_map__get(evsel->own_cpus); + } + + thread_map__put(evsel->threads); + evsel->threads = thread_map__get(evlist->threads); +} + +static void perf_evlist__propagate_maps(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) + __perf_evlist__propagate_maps(evlist, evsel); +} + void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) { entry->evlist = evlist; @@ -133,18 +160,19 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) if (!evlist->nr_entries++) perf_evlist__set_id_pos(evlist); + + __perf_evlist__propagate_maps(evlist, entry); } void perf_evlist__splice_list_tail(struct perf_evlist *evlist, - struct list_head *list, - int nr_entries) + struct list_head *list) { - bool set_id_pos = !evlist->nr_entries; + struct perf_evsel *evsel, *temp; - list_splice_tail(list, &evlist->entries); - evlist->nr_entries += nr_entries; - if (set_id_pos) - perf_evlist__set_id_pos(evlist); + __evlist__for_each_safe(list, temp, evsel) { + list_del_init(&evsel->node); + perf_evlist__add(evlist, evsel); + } } void __perf_evlist__set_leader(struct list_head *list) @@ -210,7 +238,7 @@ static int perf_evlist__add_attrs(struct perf_evlist *evlist, list_add_tail(&evsel->node, &head); } - perf_evlist__splice_list_tail(evlist, &head, nr_attrs); + perf_evlist__splice_list_tail(evlist, &head); return 0; @@ -1103,71 +1131,56 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); } -static int perf_evlist__propagate_maps(struct perf_evlist *evlist, - bool has_user_cpus) -{ - struct perf_evsel *evsel; - - evlist__for_each(evlist, evsel) { - /* - * We already have cpus for evsel (via PMU sysfs) so - * keep it, if there's no target cpu list defined. - */ - if (evsel->cpus && has_user_cpus) - cpu_map__put(evsel->cpus); - - if (!evsel->cpus || has_user_cpus) - evsel->cpus = cpu_map__get(evlist->cpus); - - evsel->threads = thread_map__get(evlist->threads); - - if ((evlist->cpus && !evsel->cpus) || - (evlist->threads && !evsel->threads)) - return -ENOMEM; - } - - return 0; -} - int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) { - evlist->threads = thread_map__new_str(target->pid, target->tid, - target->uid); + struct cpu_map *cpus; + struct thread_map *threads; + + threads = thread_map__new_str(target->pid, target->tid, target->uid); - if (evlist->threads == NULL) + if (!threads) return -1; if (target__uses_dummy_map(target)) - evlist->cpus = cpu_map__dummy_new(); + cpus = cpu_map__dummy_new(); else - evlist->cpus = cpu_map__new(target->cpu_list); + cpus = cpu_map__new(target->cpu_list); - if (evlist->cpus == NULL) + if (!cpus) goto out_delete_threads; - return perf_evlist__propagate_maps(evlist, !!target->cpu_list); + evlist->has_user_cpus = !!target->cpu_list; + + perf_evlist__set_maps(evlist, cpus, threads); + + return 0; out_delete_threads: - thread_map__put(evlist->threads); - evlist->threads = NULL; + thread_map__put(threads); return -1; } -int perf_evlist__set_maps(struct perf_evlist *evlist, - struct cpu_map *cpus, - struct thread_map *threads) +void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, + struct thread_map *threads) { - if (evlist->cpus) + /* + * Allow for the possibility that one or another of the maps isn't being + * changed i.e. don't put it. Note we are assuming the maps that are + * being applied are brand new and evlist is taking ownership of the + * original reference count of 1. If that is not the case it is up to + * the caller to increase the reference count. + */ + if (cpus != evlist->cpus) { cpu_map__put(evlist->cpus); + evlist->cpus = cpus; + } - evlist->cpus = cpus; - - if (evlist->threads) + if (threads != evlist->threads) { thread_map__put(evlist->threads); + evlist->threads = threads; + } - evlist->threads = threads; - - return perf_evlist__propagate_maps(evlist, false); + perf_evlist__propagate_maps(evlist); } int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) @@ -1387,6 +1400,8 @@ void perf_evlist__close(struct perf_evlist *evlist) static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) { + struct cpu_map *cpus; + struct thread_map *threads; int err = -ENOMEM; /* @@ -1398,20 +1413,19 @@ static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) * error, and we may not want to do that fallback to a * default cpu identity map :-\ */ - evlist->cpus = cpu_map__new(NULL); - if (evlist->cpus == NULL) + cpus = cpu_map__new(NULL); + if (!cpus) goto out; - evlist->threads = thread_map__new_dummy(); - if (evlist->threads == NULL) - goto out_free_cpus; + threads = thread_map__new_dummy(); + if (!threads) + goto out_put; - err = 0; + perf_evlist__set_maps(evlist, cpus, threads); out: return err; -out_free_cpus: - cpu_map__put(evlist->cpus); - evlist->cpus = NULL; +out_put: + cpu_map__put(cpus); goto out; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index b39a6198f4ac..115d8b53c601 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -42,6 +42,7 @@ struct perf_evlist { int nr_mmaps; bool overwrite; bool enabled; + bool has_user_cpus; size_t mmap_len; int id_pos; int is_pos; @@ -155,9 +156,8 @@ int perf_evlist__enable_event_idx(struct perf_evlist *evlist, void perf_evlist__set_selected(struct perf_evlist *evlist, struct perf_evsel *evsel); -int perf_evlist__set_maps(struct perf_evlist *evlist, - struct cpu_map *cpus, - struct thread_map *threads); +void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, + struct thread_map *threads); int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target); int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel); @@ -179,8 +179,7 @@ bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist); bool perf_evlist__valid_read_format(struct perf_evlist *evlist); void perf_evlist__splice_list_tail(struct perf_evlist *evlist, - struct list_head *list, - int nr_entries); + struct list_head *list); static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist) { diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c53f79123b37..5410483d5219 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1033,6 +1033,7 @@ void perf_evsel__exit(struct perf_evsel *evsel) perf_evsel__free_config_terms(evsel); close_cgroup(evsel->cgrp); cpu_map__put(evsel->cpus); + cpu_map__put(evsel->own_cpus); thread_map__put(evsel->threads); zfree(&evsel->group_name); zfree(&evsel->name); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 298e6bbca200..ef8925f7211a 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -98,6 +98,7 @@ struct perf_evsel { struct cgroup_sel *cgrp; void *handler; struct cpu_map *cpus; + struct cpu_map *own_cpus; struct thread_map *threads; unsigned int sample_size; int id_pos; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 41814547da15..fce6634aebe2 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1438,7 +1438,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, if (ph->needs_swap) nr = bswap_32(nr); - ph->env.nr_cpus_online = nr; + ph->env.nr_cpus_avail = nr; ret = readn(fd, &nr, sizeof(nr)); if (ret != sizeof(nr)) @@ -1447,7 +1447,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, if (ph->needs_swap) nr = bswap_32(nr); - ph->env.nr_cpus_avail = nr; + ph->env.nr_cpus_online = nr; return 0; } diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index ea768625ab5b..eb0e7f8bf515 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -623,7 +623,7 @@ static int intel_bts_process_event(struct perf_session *session, if (err) return err; if (event->header.type == PERF_RECORD_EXIT) { - err = intel_bts_process_tid_exit(bts, event->comm.tid); + err = intel_bts_process_tid_exit(bts, event->fork.tid); if (err) return err; } diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index bb41c20e6005..535d86f8e4d1 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1494,7 +1494,7 @@ static int intel_pt_process_event(struct perf_session *session, if (pt->timeless_decoding) { if (event->header.type == PERF_RECORD_EXIT) { err = intel_pt_process_timeless_queues(pt, - event->comm.tid, + event->fork.tid, sample->time); } } else if (timestamp) { diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d826e6f515db..21ed6ee63da9 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -287,8 +287,8 @@ __add_event(struct list_head *list, int *idx, if (!evsel) return NULL; - if (cpus) - evsel->cpus = cpu_map__get(cpus); + evsel->cpus = cpu_map__get(cpus); + evsel->own_cpus = cpu_map__get(cpus); if (name) evsel->name = strdup(name); @@ -1140,10 +1140,9 @@ int parse_events(struct perf_evlist *evlist, const char *str, ret = parse_events__scanner(str, &data, PE_START_EVENTS); perf_pmu__parse_cleanup(); if (!ret) { - int entries = data.idx - evlist->nr_entries; struct perf_evsel *last; - perf_evlist__splice_list_tail(evlist, &data.list, entries); + perf_evlist__splice_list_tail(evlist, &data.list); evlist->nr_groups += data.nr_groups; last = perf_evlist__last(evlist); last->cmdline_group_boundary = true; diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 591905a02b92..9cd70819c795 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -255,7 +255,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc list_add_tail(&term->list, head); ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(list, &data->idx, "cpu", head)); + ABORT_ON(parse_events_add_pmu(data, list, "cpu", head)); parse_events__free_terms(head); $$ = list; } diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c index 9a43a59a9bb4..421c607a8856 100644 --- a/tools/testing/selftests/x86/entry_from_vm86.c +++ b/tools/testing/selftests/x86/entry_from_vm86.c @@ -116,8 +116,9 @@ static bool do_test(struct vm86plus_struct *v86, unsigned long eip, v86->regs.eip = eip; ret = vm86(VM86_ENTER, v86); - if (ret == -1 && errno == ENOSYS) { - printf("[SKIP]\tvm86 not supported\n"); + if (ret == -1 && (errno == ENOSYS || errno == EPERM)) { + printf("[SKIP]\tvm86 %s\n", + errno == ENOSYS ? "not supported" : "not allowed"); return false; } diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile index 505ad51b3b51..39c89a5ea990 100644 --- a/tools/virtio/Makefile +++ b/tools/virtio/Makefile @@ -6,7 +6,7 @@ vringh_test: vringh_test.o vringh.o virtio_ring.o CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE vpath %.c ../../drivers/virtio ../../drivers/vhost mod: - ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test + ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V} .PHONY: all test mod clean clean: ${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \ diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h index aff61e13306c..26b7926bda88 100644 --- a/tools/virtio/asm/barrier.h +++ b/tools/virtio/asm/barrier.h @@ -3,6 +3,8 @@ #define mb() __sync_synchronize() #define smp_mb() mb() +# define dma_rmb() barrier() +# define dma_wmb() barrier() # define smp_rmb() barrier() # define smp_wmb() barrier() /* Weak barriers should be used. If not - it's a bug */ diff --git a/tools/virtio/linux/export.h b/tools/virtio/linux/export.h new file mode 100644 index 000000000000..416875e29254 --- /dev/null +++ b/tools/virtio/linux/export.h @@ -0,0 +1,3 @@ +#define EXPORT_SYMBOL_GPL(sym) extern typeof(sym) sym +#define EXPORT_SYMBOL(sym) extern typeof(sym) sym + diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index 1e8ce6979c1e..0a3da64638ce 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -22,6 +22,7 @@ typedef unsigned long long dma_addr_t; typedef size_t __kernel_size_t; +typedef unsigned int __wsum; struct page { unsigned long long dummy; @@ -47,6 +48,13 @@ static inline void *kmalloc(size_t s, gfp_t gfp) return __kmalloc_fake; return malloc(s); } +static inline void *kzalloc(size_t s, gfp_t gfp) +{ + void *p = kmalloc(s, gfp); + + memset(p, 0, s); + return p; +} static inline void kfree(void *p) { diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 76e38d231e99..48c6e1ac6827 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -199,6 +199,14 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, */ timer->irq = irq; + /* + * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 + * and to 0 for ARMv7. We provide an implementation that always + * resets the timer to be disabled and unmasked and is compliant with + * the ARMv7 architecture. + */ + timer->cntv_ctl = 0; + /* * Tell the VGIC that the virtual interrupt is tied to a * physical interrupt. We do that once per VCPU. diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index afbf925b00f4..7dd5d62f10a1 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -288,7 +288,7 @@ int vgic_v3_probe(struct device_node *vgic_node, vgic->vctrl_base = NULL; vgic->type = VGIC_V3; - vgic->max_gic_vcpus = KVM_MAX_VCPUS; + vgic->max_gic_vcpus = VGIC_V3_MAX_CPUS; kvm_info("%s@%llx IRQ%d\n", vgic_node->name, vcpu_res.start, vgic->maint_irq); diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 9eb489a2c94c..6bd1c9bf7ae7 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1144,26 +1144,11 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, struct irq_phys_map *map; map = vgic_irq_map_search(vcpu, irq); - /* - * If we have a mapping, and the virtual interrupt is - * being injected, then we must set the state to - * active in the physical world. Otherwise the - * physical interrupt will fire and the guest will - * exit before processing the virtual interrupt. - */ if (map) { - int ret; - - BUG_ON(!map->active); vlr.hwirq = map->phys_irq; vlr.state |= LR_HW; vlr.state &= ~LR_EOI_INT; - ret = irq_set_irqchip_state(map->irq, - IRQCHIP_STATE_ACTIVE, - true); - WARN_ON(ret); - /* * Make sure we're not going to sample this * again, as a HW-backed interrupt cannot be @@ -1255,7 +1240,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; unsigned long *pa_percpu, *pa_shared; - int i, vcpu_id; + int i, vcpu_id, lr, ret; int overflow = 0; int nr_shared = vgic_nr_shared_irqs(dist); @@ -1310,6 +1295,31 @@ epilog: */ clear_bit(vcpu_id, dist->irq_pending_on_cpu); } + + for (lr = 0; lr < vgic->nr_lr; lr++) { + struct vgic_lr vlr; + + if (!test_bit(lr, vgic_cpu->lr_used)) + continue; + + vlr = vgic_get_lr(vcpu, lr); + + /* + * If we have a mapping, and the virtual interrupt is + * presented to the guest (as pending or active), then we must + * set the state to active in the physical world. See + * Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt. + */ + if (vlr.state & LR_HW) { + struct irq_phys_map *map; + map = vgic_irq_map_search(vcpu, vlr.irq); + + ret = irq_set_irqchip_state(map->irq, + IRQCHIP_STATE_ACTIVE, + true); + WARN_ON(ret); + } + } } static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h index 5cbf190d238c..6bca74ca5331 100644 --- a/virt/kvm/coalesced_mmio.h +++ b/virt/kvm/coalesced_mmio.h @@ -24,9 +24,9 @@ struct kvm_coalesced_mmio_dev { int kvm_coalesced_mmio_init(struct kvm *kvm); void kvm_coalesced_mmio_free(struct kvm *kvm); int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, - struct kvm_coalesced_mmio_zone *zone); + struct kvm_coalesced_mmio_zone *zone); int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, - struct kvm_coalesced_mmio_zone *zone); + struct kvm_coalesced_mmio_zone *zone); #else diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 9ff4193dfa49..79db45336e3a 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -771,40 +771,14 @@ static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) return KVM_MMIO_BUS; } -static int -kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +static int kvm_assign_ioeventfd_idx(struct kvm *kvm, + enum kvm_bus bus_idx, + struct kvm_ioeventfd *args) { - enum kvm_bus bus_idx; - struct _ioeventfd *p; - struct eventfd_ctx *eventfd; - int ret; - - bus_idx = ioeventfd_bus_from_flags(args->flags); - /* must be natural-word sized, or 0 to ignore length */ - switch (args->len) { - case 0: - case 1: - case 2: - case 4: - case 8: - break; - default: - return -EINVAL; - } - - /* check for range overflow */ - if (args->addr + args->len < args->addr) - return -EINVAL; - /* check for extra flags that we don't understand */ - if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) - return -EINVAL; - - /* ioeventfd with no length can't be combined with DATAMATCH */ - if (!args->len && - args->flags & (KVM_IOEVENTFD_FLAG_PIO | - KVM_IOEVENTFD_FLAG_DATAMATCH)) - return -EINVAL; + struct eventfd_ctx *eventfd; + struct _ioeventfd *p; + int ret; eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) @@ -843,16 +817,6 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) if (ret < 0) goto unlock_fail; - /* When length is ignored, MMIO is also put on a separate bus, for - * faster lookups. - */ - if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) { - ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS, - p->addr, 0, &p->dev); - if (ret < 0) - goto register_fail; - } - kvm->buses[bus_idx]->ioeventfd_count++; list_add_tail(&p->list, &kvm->ioeventfds); @@ -860,8 +824,6 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return 0; -register_fail: - kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); unlock_fail: mutex_unlock(&kvm->slots_lock); @@ -873,14 +835,13 @@ fail: } static int -kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, + struct kvm_ioeventfd *args) { - enum kvm_bus bus_idx; struct _ioeventfd *p, *tmp; struct eventfd_ctx *eventfd; int ret = -ENOENT; - bus_idx = ioeventfd_bus_from_flags(args->flags); eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) return PTR_ERR(eventfd); @@ -901,10 +862,6 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) continue; kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); - if (!p->length) { - kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS, - &p->dev); - } kvm->buses[bus_idx]->ioeventfd_count--; ioeventfd_release(p); ret = 0; @@ -918,6 +875,71 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return ret; } +static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags); + int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); + + if (!args->len && bus_idx == KVM_MMIO_BUS) + kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); + + return ret; +} + +static int +kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + enum kvm_bus bus_idx; + int ret; + + bus_idx = ioeventfd_bus_from_flags(args->flags); + /* must be natural-word sized, or 0 to ignore length */ + switch (args->len) { + case 0: + case 1: + case 2: + case 4: + case 8: + break; + default: + return -EINVAL; + } + + /* check for range overflow */ + if (args->addr + args->len < args->addr) + return -EINVAL; + + /* check for extra flags that we don't understand */ + if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) + return -EINVAL; + + /* ioeventfd with no length can't be combined with DATAMATCH */ + if (!args->len && + args->flags & (KVM_IOEVENTFD_FLAG_PIO | + KVM_IOEVENTFD_FLAG_DATAMATCH)) + return -EINVAL; + + ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); + if (ret) + goto fail; + + /* When length is ignored, MMIO is also put on a separate bus, for + * faster lookups. + */ + if (!args->len && bus_idx == KVM_MMIO_BUS) { + ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args); + if (ret < 0) + goto fast_fail; + } + + return 0; + +fast_fail: + kvm_deassign_ioeventfd_idx(kvm, bus_idx, args); +fail: + return ret; +} + int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a25a73147f71..04146a2e1d81 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2004,6 +2004,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) if (vcpu->halt_poll_ns) { ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns); + ++vcpu->stat.halt_attempted_poll; do { /* * This sets KVM_REQ_UNHALT if an interrupt @@ -2043,7 +2044,8 @@ out: else if (vcpu->halt_poll_ns < halt_poll_ns && block_ns < halt_poll_ns) grow_halt_poll_ns(vcpu); - } + } else + vcpu->halt_poll_ns = 0; trace_kvm_vcpu_wakeup(block_ns, waited); } @@ -3156,10 +3158,25 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1, const struct kvm_io_range *r2) { - if (r1->addr < r2->addr) + gpa_t addr1 = r1->addr; + gpa_t addr2 = r2->addr; + + if (addr1 < addr2) return -1; - if (r1->addr + r1->len > r2->addr + r2->len) + + /* If r2->len == 0, match the exact address. If r2->len != 0, + * accept any overlapping write. Any order is acceptable for + * overlapping ranges, because kvm_io_bus_get_first_dev ensures + * we process all of them. + */ + if (r2->len) { + addr1 += r1->len; + addr2 += r2->len; + } + + if (addr1 > addr2) return 1; + return 0; }