Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
authorPaul Mundt <lethal@linux-sh.org>
Fri, 25 Sep 2009 03:15:15 +0000 (12:15 +0900)
committerPaul Mundt <lethal@linux-sh.org>
Fri, 25 Sep 2009 03:15:15 +0000 (12:15 +0900)
626 files changed:
Documentation/arm/tcm.txt [new file with mode: 0644]
Documentation/auxdisplay/cfag12864b-example.c
Documentation/cgroups/cgroups.txt
Documentation/cgroups/memory.txt
Documentation/crypto/async-tx-api.txt
Documentation/filesystems/sharedsubtree.txt
Documentation/filesystems/vfs.txt
Documentation/ioctl/ioctl-number.txt
Documentation/sysctl/fs.txt
Documentation/sysctl/kernel.txt
Documentation/sysctl/vm.txt
Documentation/vm/.gitignore
Documentation/vm/locking
Documentation/vm/page-types.c
MAINTAINERS
arch/alpha/include/asm/fcntl.h
arch/alpha/kernel/core_marvel.c
arch/alpha/kernel/core_titan.c
arch/alpha/kernel/init_task.c
arch/alpha/kernel/pci_impl.h
arch/alpha/kernel/pci_iommu.c
arch/alpha/kernel/vmlinux.lds.S
arch/arm/Kconfig
arch/arm/common/locomo.c
arch/arm/common/vic.c
arch/arm/configs/littleton_defconfig [deleted file]
arch/arm/configs/pxa3xx_defconfig [new file with mode: 0644]
arch/arm/configs/xcep_defconfig [new file with mode: 0644]
arch/arm/configs/zylonite_defconfig [deleted file]
arch/arm/include/asm/atomic.h
arch/arm/include/asm/cache.h
arch/arm/include/asm/cputype.h
arch/arm/include/asm/hardware/iop3xx-adma.h
arch/arm/include/asm/hardware/iop_adma.h
arch/arm/include/asm/mach/mmc.h [deleted file]
arch/arm/include/asm/tcm.h [new file with mode: 0644]
arch/arm/include/asm/unified.h
arch/arm/kernel/Makefile
arch/arm/kernel/entry-armv.S
arch/arm/kernel/entry-header.S
arch/arm/kernel/kprobes.c
arch/arm/kernel/setup.c
arch/arm/kernel/tcm.c [new file with mode: 0644]
arch/arm/kernel/tcm.h [new file with mode: 0644]
arch/arm/kernel/vmlinux.lds.S
arch/arm/lib/copy_page.S
arch/arm/mach-at91/at91cap9_devices.c
arch/arm/mach-at91/at91sam9g45_devices.c
arch/arm/mach-at91/at91sam9rl_devices.c
arch/arm/mach-at91/board-cap9adk.c
arch/arm/mach-at91/board-neocore926.c
arch/arm/mach-at91/board-sam9m10g45ek.c
arch/arm/mach-at91/board-sam9rlek.c
arch/arm/mach-integrator/integrator_cp.c
arch/arm/mach-iop13xx/include/mach/adma.h
arch/arm/mach-iop13xx/setup.c
arch/arm/mach-pxa/Kconfig
arch/arm/mach-pxa/Makefile
arch/arm/mach-pxa/balloon3.c [new file with mode: 0644]
arch/arm/mach-pxa/clock.h
arch/arm/mach-pxa/cm-x270.c
arch/arm/mach-pxa/cm-x300.c
arch/arm/mach-pxa/colibri-pxa300.c
arch/arm/mach-pxa/colibri-pxa320.c
arch/arm/mach-pxa/colibri-pxa3xx.c
arch/arm/mach-pxa/corgi.c
arch/arm/mach-pxa/csb726.c
arch/arm/mach-pxa/devices.c
arch/arm/mach-pxa/devices.h
arch/arm/mach-pxa/e740.c
arch/arm/mach-pxa/e750.c
arch/arm/mach-pxa/em-x270.c
arch/arm/mach-pxa/eseries.c
arch/arm/mach-pxa/gumstix.c
arch/arm/mach-pxa/hx4700.c
arch/arm/mach-pxa/idp.c
arch/arm/mach-pxa/imote2.c
arch/arm/mach-pxa/include/mach/balloon3.h [new file with mode: 0644]
arch/arm/mach-pxa/include/mach/colibri.h
arch/arm/mach-pxa/include/mach/entry-macro.S
arch/arm/mach-pxa/include/mach/hardware.h
arch/arm/mach-pxa/include/mach/irda.h
arch/arm/mach-pxa/include/mach/irqs.h
arch/arm/mach-pxa/include/mach/mfp.h
arch/arm/mach-pxa/include/mach/mmc.h
arch/arm/mach-pxa/include/mach/palmtc.h [new file with mode: 0644]
arch/arm/mach-pxa/include/mach/palmtx.h
arch/arm/mach-pxa/include/mach/pxa3xx-regs.h
arch/arm/mach-pxa/include/mach/pxafb.h
arch/arm/mach-pxa/include/mach/regs-intc.h
arch/arm/mach-pxa/include/mach/uncompress.h
arch/arm/mach-pxa/irq.c
arch/arm/mach-pxa/littleton.c
arch/arm/mach-pxa/lubbock.c
arch/arm/mach-pxa/magician.c
arch/arm/mach-pxa/mainstone.c
arch/arm/mach-pxa/mioa701.c
arch/arm/mach-pxa/palmld.c
arch/arm/mach-pxa/palmt5.c
arch/arm/mach-pxa/palmtc.c [new file with mode: 0644]
arch/arm/mach-pxa/palmte2.c
arch/arm/mach-pxa/palmtx.c
arch/arm/mach-pxa/palmz72.c
arch/arm/mach-pxa/pcm990-baseboard.c
arch/arm/mach-pxa/poodle.c
arch/arm/mach-pxa/pxa2xx.c
arch/arm/mach-pxa/pxa300.c
arch/arm/mach-pxa/pxa320.c
arch/arm/mach-pxa/pxa930.c
arch/arm/mach-pxa/spitz.c
arch/arm/mach-pxa/tosa.c
arch/arm/mach-pxa/treo680.c
arch/arm/mach-pxa/trizeps4.c
arch/arm/mach-pxa/xcep.c [new file with mode: 0644]
arch/arm/mach-pxa/zylonite.c
arch/arm/mach-realview/core.c
arch/arm/mach-realview/core.h
arch/arm/mach-realview/realview_eb.c
arch/arm/mach-realview/realview_pb1176.c
arch/arm/mach-realview/realview_pb11mp.c
arch/arm/mach-realview/realview_pba8.c
arch/arm/mach-realview/realview_pbx.c
arch/arm/mach-s3c2410/Kconfig
arch/arm/mach-s3c2412/Kconfig
arch/arm/mach-s3c2440/Kconfig
arch/arm/mach-s3c6400/Kconfig
arch/arm/mach-s3c6410/Kconfig
arch/arm/mach-sa1100/dma.c
arch/arm/mach-u300/Kconfig
arch/arm/mach-u300/Makefile
arch/arm/mach-u300/core.c
arch/arm/mach-u300/dummyspichip.c [new file with mode: 0644]
arch/arm/mach-u300/gpio.c
arch/arm/mach-u300/i2c.c [new file with mode: 0644]
arch/arm/mach-u300/i2c.h [new file with mode: 0644]
arch/arm/mach-u300/include/mach/memory.h
arch/arm/mach-u300/include/mach/syscon.h
arch/arm/mach-u300/mmc.c
arch/arm/mach-u300/padmux.c
arch/arm/mach-u300/padmux.h
arch/arm/mach-u300/spi.c [new file with mode: 0644]
arch/arm/mach-u300/spi.h [new file with mode: 0644]
arch/arm/mach-u300/timer.c
arch/arm/mach-versatile/core.c
arch/arm/mach-versatile/versatile_pb.c
arch/arm/mm/Kconfig
arch/arm/mm/fault.c
arch/arm/mm/init.c
arch/arm/plat-iop/adma.c
arch/arm/plat-pxa/dma.c
arch/arm/plat-pxa/include/plat/mfp.h
arch/arm/plat-pxa/mfp.c
arch/arm/plat-s3c/gpio.c
arch/arm/plat-s3c64xx/dma.c
arch/arm/plat-s3c64xx/include/plat/dma-plat.h
arch/arm/plat-s3c64xx/include/plat/irqs.h
arch/arm/plat-s3c64xx/s3c6400-clock.c
arch/arm/plat-stmp3xxx/dma.c
arch/arm/tools/mach-types
arch/blackfin/kernel/vmlinux.lds.S
arch/cris/arch-v10/kernel/time.c
arch/cris/arch-v32/kernel/smp.c
arch/cris/arch-v32/kernel/time.c
arch/cris/arch-v32/mach-a3/io.c
arch/cris/arch-v32/mach-fs/io.c
arch/cris/include/arch-v10/arch/mmu.h
arch/cris/include/arch-v32/arch/mmu.h
arch/cris/include/asm/hardirq.h
arch/cris/include/asm/pgtable.h
arch/cris/kernel/irq.c
arch/cris/kernel/vmlinux.lds.S
arch/frv/kernel/pm.c
arch/frv/mb93090-mb00/Makefile
arch/frv/mb93090-mb00/flash.c [new file with mode: 0644]
arch/h8300/kernel/vmlinux.lds.S
arch/m32r/include/asm/page.h
arch/m32r/include/asm/processor.h
arch/m32r/include/asm/thread_info.h
arch/m32r/kernel/entry.S
arch/m32r/kernel/head.S
arch/m32r/kernel/vmlinux.lds.S
arch/microblaze/Kconfig
arch/microblaze/Makefile
arch/microblaze/boot/Makefile
arch/microblaze/boot/dts/system.dts [new symlink]
arch/microblaze/boot/linked_dtb.S [new file with mode: 0644]
arch/microblaze/configs/mmu_defconfig
arch/microblaze/configs/nommu_defconfig
arch/microblaze/include/asm/asm-compat.h [new file with mode: 0644]
arch/microblaze/include/asm/io.h
arch/microblaze/include/asm/ipc.h [deleted file]
arch/microblaze/include/asm/page.h
arch/microblaze/include/asm/setup.h
arch/microblaze/include/asm/syscall.h [new file with mode: 0644]
arch/microblaze/kernel/cpu/cpuinfo.c
arch/microblaze/kernel/entry.S
arch/microblaze/kernel/exceptions.c
arch/microblaze/kernel/head.S
arch/microblaze/kernel/hw_exception_handler.S
arch/microblaze/kernel/process.c
arch/microblaze/kernel/ptrace.c
arch/microblaze/kernel/setup.c
arch/microblaze/kernel/vmlinux.lds.S
arch/microblaze/mm/init.c
arch/mips/lasat/sysctl.c
arch/mn10300/kernel/vmlinux.lds.S
arch/parisc/include/asm/fcntl.h
arch/parisc/kernel/vmlinux.lds.S
arch/powerpc/Kconfig
arch/powerpc/Makefile
arch/powerpc/include/asm/device.h
arch/powerpc/include/asm/dma-mapping.h
arch/powerpc/include/asm/fsldma.h [new file with mode: 0644]
arch/powerpc/include/asm/iommu.h
arch/powerpc/include/asm/pmc.h
arch/powerpc/include/asm/pte-40x.h
arch/powerpc/include/asm/pte-8xx.h
arch/powerpc/include/asm/pte-common.h
arch/powerpc/kernel/dma-iommu.c
arch/powerpc/kernel/dma.c
arch/powerpc/kernel/exceptions-64e.S
arch/powerpc/kernel/pci-common.c
arch/powerpc/kernel/process.c
arch/powerpc/kernel/prom_init.c
arch/powerpc/kernel/vdso.c
arch/powerpc/kernel/vio.c
arch/powerpc/kernel/vmlinux.lds.S
arch/powerpc/mm/pgtable.c
arch/powerpc/mm/tlb_low_64e.S
arch/powerpc/platforms/cell/beat_iommu.c
arch/powerpc/platforms/cell/iommu.c
arch/powerpc/platforms/iseries/iommu.c
arch/powerpc/platforms/pasemi/iommu.c
arch/powerpc/platforms/pseries/iommu.c
arch/powerpc/relocs_check.pl [new file with mode: 0755]
arch/powerpc/sysdev/dart_iommu.c
arch/powerpc/xmon/xmon.c
arch/s390/appldata/appldata_base.c
arch/s390/kernel/debug.c
arch/s390/mm/cmm.c
arch/sh/drivers/dma/Kconfig
arch/sh/drivers/dma/Makefile
arch/sh/include/asm/dma-sh.h
arch/um/include/asm/common.lds.S
arch/um/kernel/dyn.lds.S
arch/um/kernel/uml.lds.S
arch/x86/include/asm/nmi.h
arch/x86/include/asm/pci.h
arch/x86/kernel/apic/nmi.c
arch/x86/kernel/vsyscall_64.c
arch/x86/mm/fault.c
arch/x86/mm/pageattr.c
arch/x86/pci/common.c
arch/xtensa/kernel/vmlinux.lds.S
crypto/async_tx/Kconfig
crypto/async_tx/Makefile
crypto/async_tx/async_memcpy.c
crypto/async_tx/async_memset.c
crypto/async_tx/async_pq.c [new file with mode: 0644]
crypto/async_tx/async_raid6_recov.c [new file with mode: 0644]
crypto/async_tx/async_tx.c
crypto/async_tx/async_xor.c
crypto/async_tx/raid6test.c [new file with mode: 0644]
drivers/acpi/button.c
drivers/cdrom/cdrom.c
drivers/char/Kconfig
drivers/char/Makefile
drivers/char/agp/intel-agp.c
drivers/char/bfin-otp.c
drivers/char/hpet.c
drivers/char/hvc_console.c
drivers/char/hvc_console.h
drivers/char/hvc_iucv.c
drivers/char/mem.c
drivers/char/mwave/mwavedd.c
drivers/char/random.c
drivers/char/rio/rioctrl.c
drivers/char/uv_mmtimer.c [new file with mode: 0644]
drivers/dca/dca-core.c
drivers/dma/Kconfig
drivers/dma/Makefile
drivers/dma/at_hdmac.c
drivers/dma/at_hdmac_regs.h
drivers/dma/dmaengine.c
drivers/dma/dmatest.c
drivers/dma/dw_dmac.c
drivers/dma/dw_dmac_regs.h
drivers/dma/fsldma.c
drivers/dma/fsldma.h
drivers/dma/ioat.c [deleted file]
drivers/dma/ioat/Makefile [new file with mode: 0644]
drivers/dma/ioat/dca.c [new file with mode: 0644]
drivers/dma/ioat/dma.c [new file with mode: 0644]
drivers/dma/ioat/dma.h [new file with mode: 0644]
drivers/dma/ioat/dma_v2.c [new file with mode: 0644]
drivers/dma/ioat/dma_v2.h [new file with mode: 0644]
drivers/dma/ioat/dma_v3.c [new file with mode: 0644]
drivers/dma/ioat/hw.h [new file with mode: 0644]
drivers/dma/ioat/pci.c [new file with mode: 0644]
drivers/dma/ioat/registers.h [new file with mode: 0644]
drivers/dma/ioat_dca.c [deleted file]
drivers/dma/ioat_dma.c [deleted file]
drivers/dma/ioatdma.h [deleted file]
drivers/dma/ioatdma_hw.h [deleted file]
drivers/dma/ioatdma_registers.h [deleted file]
drivers/dma/iop-adma.c
drivers/dma/iovlock.c
drivers/dma/mv_xor.c
drivers/dma/mv_xor.h
drivers/dma/shdma.c [new file with mode: 0644]
drivers/dma/shdma.h [new file with mode: 0644]
drivers/dma/txx9dmac.c
drivers/dma/txx9dmac.h
drivers/edac/Kconfig
drivers/edac/Makefile
drivers/edac/cpc925_edac.c
drivers/edac/edac_device.c
drivers/edac/edac_mc.c
drivers/edac/edac_pci.c
drivers/edac/i3200_edac.c [new file with mode: 0644]
drivers/edac/mpc85xx_edac.c
drivers/edac/mv64x60_edac.c
drivers/gpu/drm/Kconfig
drivers/gpu/drm/drm_gem.c
drivers/gpu/drm/i915/Makefile
drivers/gpu/drm/i915/i915_debugfs.c
drivers/gpu/drm/i915/i915_dma.c
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_irq.c
drivers/gpu/drm/i915/i915_opregion.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/i915_suspend.c
drivers/gpu/drm/i915/i915_trace.h [new file with mode: 0644]
drivers/gpu/drm/i915/i915_trace_points.c [new file with mode: 0644]
drivers/gpu/drm/i915/intel_bios.c
drivers/gpu/drm/i915/intel_crt.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_drv.h
drivers/gpu/drm/i915/intel_lvds.c
drivers/gpu/drm/i915/intel_sdvo.c
drivers/i2c/busses/Kconfig
drivers/idle/i7300_idle.c
drivers/infiniband/core/mad_rmpp.c
drivers/infiniband/hw/mthca/mthca_catas.c
drivers/infiniband/hw/nes/nes_nic.c
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
drivers/input/misc/Kconfig
drivers/md/Kconfig
drivers/md/bitmap.c
drivers/md/linear.c
drivers/md/md.c
drivers/md/md.h
drivers/md/multipath.c
drivers/md/raid0.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/md/raid5.h
drivers/media/dvb/dvb-core/dvbdev.h
drivers/media/dvb/dvb-usb/Kconfig
drivers/media/video/saa7164/saa7164-api.c
drivers/media/video/saa7164/saa7164-cmd.c
drivers/media/video/saa7164/saa7164-core.c
drivers/media/video/saa7164/saa7164.h
drivers/memstick/core/memstick.c
drivers/misc/sgi-gru/grukservices.c
drivers/misc/sgi-gru/gruprocfs.c
drivers/mmc/host/atmel-mci.c
drivers/mmc/host/mmci.c
drivers/mmc/host/mmci.h
drivers/mmc/host/pxamci.c
drivers/mtd/Kconfig
drivers/mtd/maps/Kconfig
drivers/net/cris/eth_v10.c
drivers/net/irda/pxaficp_ir.c
drivers/net/mlx4/fw.c
drivers/net/wireless/arlan-proc.c
drivers/parport/procfs.c
drivers/pci/hotplug/pciehp.h
drivers/pci/hotplug/pciehp_acpi.c
drivers/pci/hotplug/pciehp_core.c
drivers/pci/hotplug/pciehp_ctrl.c
drivers/pci/hotplug/pciehp_hpc.c
drivers/pci/hotplug/pciehp_pci.c
drivers/pci/pcie/aer/aerdrv.c
drivers/pci/pcie/aspm.c
drivers/pcmcia/Makefile
drivers/pcmcia/pxa2xx_base.c
drivers/pcmcia/pxa2xx_palmtc.c [new file with mode: 0644]
drivers/rtc/rtc-pxa.c
drivers/rtc/rtc-sa1100.c
drivers/serial/cpm_uart/cpm_uart_core.c
drivers/serial/crisv10.c
drivers/serial/pxa.c
drivers/spi/amba-pl022.c
drivers/spi/pxa2xx_spi.c
drivers/staging/go7007/Makefile
drivers/usb/Kconfig
drivers/usb/host/ohci-pxa27x.c
drivers/usb/serial/sierra.c
drivers/video/backlight/da903x_bl.c
drivers/video/pxafb.c
drivers/vlynq/vlynq.c
fs/adfs/inode.c
fs/attr.c
fs/befs/linuxvfs.c
fs/binfmt_elf.c
fs/binfmt_elf_fdpic.c
fs/binfmt_flat.c
fs/block_dev.c
fs/btrfs/async-thread.c
fs/btrfs/async-thread.h
fs/btrfs/btrfs_inode.h
fs/btrfs/compression.c
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/dir-item.c
fs/btrfs/disk-io.c
fs/btrfs/export.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/extent_map.c
fs/btrfs/extent_map.h
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/inode-item.c
fs/btrfs/inode-map.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/ioctl.h
fs/btrfs/ordered-data.c
fs/btrfs/ordered-data.h
fs/btrfs/orphan.c
fs/btrfs/relocation.c
fs/btrfs/root-tree.c
fs/btrfs/super.c
fs/btrfs/transaction.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h
fs/buffer.c
fs/char_dev.c
fs/cifs/cifsfs.c
fs/cifs/inode.c
fs/coda/coda_int.h
fs/compat.c
fs/drop_caches.c
fs/ecryptfs/Kconfig
fs/ecryptfs/crypto.c
fs/ecryptfs/inode.c
fs/ecryptfs/keystore.c
fs/ecryptfs/kthread.c
fs/ecryptfs/main.c
fs/ecryptfs/mmap.c
fs/ecryptfs/read_write.c
fs/ecryptfs/super.c
fs/exec.c
fs/exofs/super.c
fs/ext2/inode.c
fs/ext3/inode.c
fs/ext4/inode.c
fs/fat/inode.c
fs/fcntl.c
fs/file_table.c
fs/fuse/dir.c
fs/fuse/fuse_i.h
fs/fuse/inode.c
fs/gfs2/aops.c
fs/hfs/mdb.c
fs/hfsplus/super.c
fs/hugetlbfs/inode.c
fs/inode.c
fs/internal.h
fs/ioctl.c
fs/isofs/inode.c
fs/jfs/super.c
fs/libfs.c
fs/namespace.c
fs/ncpfs/inode.c
fs/ncpfs/ioctl.c
fs/nfs/file.c
fs/nfs/inode.c
fs/nfs/super.c
fs/nls/nls_base.c
fs/ntfs/aops.c
fs/ntfs/super.c
fs/ocfs2/aops.c
fs/proc/array.c
fs/proc/meminfo.c
fs/proc/proc_sysctl.c
fs/proc/uptime.c
fs/ramfs/file-nommu.c
fs/read_write.c
fs/romfs/super.c
fs/seq_file.c
fs/smbfs/inode.c
fs/super.c
fs/xfs/linux-2.6/xfs_aops.c
fs/xfs/linux-2.6/xfs_sysctl.c
include/acpi/button.h [new file with mode: 0644]
include/asm-generic/fcntl.h
include/asm-generic/mman-common.h
include/asm-generic/siginfo.h
include/asm-generic/vmlinux.lds.h
include/drm/drm_pciids.h
include/drm/i915_drm.h
include/linux/amba/mmci.h [new file with mode: 0644]
include/linux/amba/pl022.h
include/linux/async_tx.h
include/linux/binfmts.h
include/linux/cgroup.h
include/linux/configfs.h
include/linux/dca.h
include/linux/debugfs.h
include/linux/dmaengine.h
include/linux/fb.h
include/linux/fs.h
include/linux/ftrace.h
include/linux/futex.h
include/linux/hugetlb.h
include/linux/memcontrol.h
include/linux/mm.h
include/linux/mm_types.h
include/linux/mmzone.h
include/linux/module.h
include/linux/page-flags.h
include/linux/page_cgroup.h
include/linux/pci_ids.h
include/linux/prctl.h
include/linux/relay.h
include/linux/res_counter.h
include/linux/rmap.h
include/linux/sched.h
include/linux/security.h
include/linux/seq_file.h
include/linux/signal.h
include/linux/swap.h
include/linux/swapops.h
include/linux/sysctl.h
include/linux/time.h
include/linux/tracehook.h
include/linux/tracepoint.h
include/linux/unaligned/be_byteshift.h
include/linux/unaligned/le_byteshift.h
include/linux/vgaarb.h
include/linux/writeback.h
include/net/ip.h
include/net/ndisc.h
ipc/ipc_sysctl.c
ipc/mq_sysctl.c
kernel/Makefile
kernel/audit.c
kernel/audit_watch.c
kernel/auditsc.c
kernel/cgroup.c
kernel/cgroup_debug.c [deleted file]
kernel/cgroup_freezer.c
kernel/cpuset.c
kernel/exit.c
kernel/fork.c
kernel/gcov/Kconfig
kernel/hung_task.c
kernel/module.c
kernel/ns_cgroup.c
kernel/params.c
kernel/pid_namespace.c
kernel/ptrace.c
kernel/res_counter.c
kernel/sched.c
kernel/sched_fair.c
kernel/signal.c
kernel/slow-work.c
kernel/softlockup.c
kernel/sys.c
kernel/sysctl.c
kernel/time/Makefile
kernel/time/timeconv.c [new file with mode: 0644]
kernel/trace/ftrace.c
kernel/trace/trace_stack.c
kernel/utsname_sysctl.c
lib/decompress_inflate.c
lib/decompress_unlzma.c
mm/Kconfig
mm/Makefile
mm/filemap.c
mm/hugetlb.c
mm/hwpoison-inject.c [new file with mode: 0644]
mm/ksm.c
mm/madvise.c
mm/memcontrol.c
mm/memory-failure.c [new file with mode: 0644]
mm/memory.c
mm/migrate.c
mm/mremap.c
mm/nommu.c
mm/page-writeback.c
mm/page_alloc.c
mm/rmap.c
mm/shmem.c
mm/swapfile.c
mm/truncate.c
mm/vmscan.c
net/bridge/br_netfilter.c
net/decnet/dn_dev.c
net/decnet/sysctl_net_decnet.c
net/ipv4/devinet.c
net/ipv4/route.c
net/ipv4/sysctl_net_ipv4.c
net/ipv6/addrconf.c
net/ipv6/ndisc.c
net/ipv6/route.c
net/irda/irsysctl.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/nf_log.c
net/phonet/sysctl.c
net/sunrpc/clnt.c
net/sunrpc/sysctl.c
net/sunrpc/xprtrdma/svc_rdma.c
security/device_cgroup.c
security/lsm_audit.c
security/min_addr.c
security/selinux/hooks.c
sound/arm/pxa2xx-ac97.c

diff --git a/Documentation/arm/tcm.txt b/Documentation/arm/tcm.txt
new file mode 100644 (file)
index 0000000..074f4be
--- /dev/null
@@ -0,0 +1,145 @@
+ARM TCM (Tightly-Coupled Memory) handling in Linux
+----
+Written by Linus Walleij <linus.walleij@stericsson.com>
+
+Some ARM SoC:s have a so-called TCM (Tightly-Coupled Memory).
+This is usually just a few (4-64) KiB of RAM inside the ARM
+processor.
+
+Due to being embedded inside the CPU The TCM has a
+Harvard-architecture, so there is an ITCM (instruction TCM)
+and a DTCM (data TCM). The DTCM can not contain any
+instructions, but the ITCM can actually contain data.
+The size of DTCM or ITCM is minimum 4KiB so the typical
+minimum configuration is 4KiB ITCM and 4KiB DTCM.
+
+ARM CPU:s have special registers to read out status, physical
+location and size of TCM memories. arch/arm/include/asm/cputype.h
+defines a CPUID_TCM register that you can read out from the
+system control coprocessor. Documentation from ARM can be found
+at http://infocenter.arm.com, search for "TCM Status Register"
+to see documents for all CPUs. Reading this register you can
+determine if ITCM (bit 0) and/or DTCM (bit 16) is present in the
+machine.
+
+There is further a TCM region register (search for "TCM Region
+Registers" at the ARM site) that can report and modify the location
+size of TCM memories at runtime. This is used to read out and modify
+TCM location and size. Notice that this is not a MMU table: you
+actually move the physical location of the TCM around. At the
+place you put it, it will mask any underlying RAM from the
+CPU so it is usually wise not to overlap any physical RAM with
+the TCM. The TCM memory exists totally outside the MMU and will
+override any MMU mappings.
+
+Code executing inside the ITCM does not "see" any MMU mappings
+and e.g. register accesses must be made to physical addresses.
+
+TCM is used for a few things:
+
+- FIQ and other interrupt handlers that need deterministic
+  timing and cannot wait for cache misses.
+
+- Idle loops where all external RAM is set to self-refresh
+  retention mode, so only on-chip RAM is accessible by
+  the CPU and then we hang inside ITCM waiting for an
+  interrupt.
+
+- Other operations which implies shutting off or reconfiguring
+  the external RAM controller.
+
+There is an interface for using TCM on the ARM architecture
+in <asm/tcm.h>. Using this interface it is possible to:
+
+- Define the physical address and size of ITCM and DTCM.
+
+- Tag functions to be compiled into ITCM.
+
+- Tag data and constants to be allocated to DTCM and ITCM.
+
+- Have the remaining TCM RAM added to a special
+  allocation pool with gen_pool_create() and gen_pool_add()
+  and provice tcm_alloc() and tcm_free() for this
+  memory. Such a heap is great for things like saving
+  device state when shutting off device power domains.
+
+A machine that has TCM memory shall select HAVE_TCM in
+arch/arm/Kconfig for itself, and then the
+rest of the functionality will depend on the physical
+location and size of ITCM and DTCM to be defined in
+mach/memory.h for the machine. Code that needs to use
+TCM shall #include <asm/tcm.h> If the TCM is not located
+at the place given in memory.h it will be moved using
+the TCM Region registers.
+
+Functions to go into itcm can be tagged like this:
+int __tcmfunc foo(int bar);
+
+Variables to go into dtcm can be tagged like this:
+int __tcmdata foo;
+
+Constants can be tagged like this:
+int __tcmconst foo;
+
+To put assembler into TCM just use
+.section ".tcm.text" or .section ".tcm.data"
+respectively.
+
+Example code:
+
+#include <asm/tcm.h>
+
+/* Uninitialized data */
+static u32 __tcmdata tcmvar;
+/* Initialized data */
+static u32 __tcmdata tcmassigned = 0x2BADBABEU;
+/* Constant */
+static const u32 __tcmconst tcmconst = 0xCAFEBABEU;
+
+static void __tcmlocalfunc tcm_to_tcm(void)
+{
+       int i;
+       for (i = 0; i < 100; i++)
+               tcmvar ++;
+}
+
+static void __tcmfunc hello_tcm(void)
+{
+       /* Some abstract code that runs in ITCM */
+       int i;
+       for (i = 0; i < 100; i++) {
+               tcmvar ++;
+       }
+       tcm_to_tcm();
+}
+
+static void __init test_tcm(void)
+{
+       u32 *tcmem;
+       int i;
+
+       hello_tcm();
+       printk("Hello TCM executed from ITCM RAM\n");
+
+       printk("TCM variable from testrun: %u @ %p\n", tcmvar, &tcmvar);
+       tcmvar = 0xDEADBEEFU;
+       printk("TCM variable: 0x%x @ %p\n", tcmvar, &tcmvar);
+
+       printk("TCM assigned variable: 0x%x @ %p\n", tcmassigned, &tcmassigned);
+
+       printk("TCM constant: 0x%x @ %p\n", tcmconst, &tcmconst);
+
+       /* Allocate some TCM memory from the pool */
+       tcmem = tcm_alloc(20);
+       if (tcmem) {
+               printk("TCM Allocated 20 bytes of TCM @ %p\n", tcmem);
+               tcmem[0] = 0xDEADBEEFU;
+               tcmem[1] = 0x2BADBABEU;
+               tcmem[2] = 0xCAFEBABEU;
+               tcmem[3] = 0xDEADBEEFU;
+               tcmem[4] = 0x2BADBABEU;
+               for (i = 0; i < 5; i++)
+                       printk("TCM tcmem[%d] = %08x\n", i, tcmem[i]);
+               tcm_free(tcmem, 20);
+       }
+}
index 1d2c010..e7823ff 100644 (file)
@@ -194,7 +194,6 @@ static void cfag12864b_blit(void)
  */
 
 #include <stdio.h>
-#include <string.h>
 
 #define EXAMPLES       6
 
index 6eb1a97..455d4e6 100644 (file)
@@ -408,6 +408,26 @@ You can attach the current shell task by echoing 0:
 
 # echo 0 > tasks
 
+2.3 Mounting hierarchies by name
+--------------------------------
+
+Passing the name=<x> option when mounting a cgroups hierarchy
+associates the given name with the hierarchy.  This can be used when
+mounting a pre-existing hierarchy, in order to refer to it by name
+rather than by its set of active subsystems.  Each hierarchy is either
+nameless, or has a unique name.
+
+The name should match [\w.-]+
+
+When passing a name=<x> option for a new hierarchy, you need to
+specify subsystems manually; the legacy behaviour of mounting all
+subsystems when none are explicitly specified is not supported when
+you give a subsystem a name.
+
+The name of the subsystem appears as part of the hierarchy description
+in /proc/mounts and /proc/<pid>/cgroups.
+
+
 3. Kernel API
 =============
 
@@ -501,7 +521,7 @@ rmdir() will fail with it. From this behavior, pre_destroy() can be
 called multiple times against a cgroup.
 
 int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-              struct task_struct *task)
+              struct task_struct *task, bool threadgroup)
 (cgroup_mutex held by caller)
 
 Called prior to moving a task into a cgroup; if the subsystem
@@ -509,14 +529,20 @@ returns an error, this will abort the attach operation.  If a NULL
 task is passed, then a successful result indicates that *any*
 unspecified task can be moved into the cgroup. Note that this isn't
 called on a fork. If this method returns 0 (success) then this should
-remain valid while the caller holds cgroup_mutex.
+remain valid while the caller holds cgroup_mutex. If threadgroup is
+true, then a successful result indicates that all threads in the given
+thread's threadgroup can be moved together.
 
 void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-           struct cgroup *old_cgrp, struct task_struct *task)
+           struct cgroup *old_cgrp, struct task_struct *task,
+           bool threadgroup)
 (cgroup_mutex held by caller)
 
 Called after the task has been attached to the cgroup, to allow any
 post-attachment activity that requires memory allocations or blocking.
+If threadgroup is true, the subsystem should take care of all threads
+in the specified thread's threadgroup. Currently does not support any
+subsystem that might need the old_cgrp for every thread in the group.
 
 void fork(struct cgroup_subsy *ss, struct task_struct *task)
 
index 23d1262..b871f25 100644 (file)
@@ -179,6 +179,9 @@ The reclaim algorithm has not been modified for cgroups, except that
 pages that are selected for reclaiming come from the per cgroup LRU
 list.
 
+NOTE: Reclaim does not work for the root cgroup, since we cannot set any
+limits on the root cgroup.
+
 2. Locking
 
 The memory controller uses the following hierarchy
@@ -210,6 +213,7 @@ We can alter the memory limit:
 NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
 mega or gigabytes.
 NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited).
+NOTE: We cannot set limits on the root cgroup any more.
 
 # cat /cgroups/0/memory.limit_in_bytes
 4194304
@@ -375,7 +379,42 @@ cgroups created below it.
 
 NOTE2: This feature can be enabled/disabled per subtree.
 
-7. TODO
+7. Soft limits
+
+Soft limits allow for greater sharing of memory. The idea behind soft limits
+is to allow control groups to use as much of the memory as needed, provided
+
+a. There is no memory contention
+b. They do not exceed their hard limit
+
+When the system detects memory contention or low memory control groups
+are pushed back to their soft limits. If the soft limit of each control
+group is very high, they are pushed back as much as possible to make
+sure that one control group does not starve the others of memory.
+
+Please note that soft limits is a best effort feature, it comes with
+no guarantees, but it does its best to make sure that when memory is
+heavily contended for, memory is allocated based on the soft limit
+hints/setup. Currently soft limit based reclaim is setup such that
+it gets invoked from balance_pgdat (kswapd).
+
+7.1 Interface
+
+Soft limits can be setup by using the following commands (in this example we
+assume a soft limit of 256 megabytes)
+
+# echo 256M > memory.soft_limit_in_bytes
+
+If we want to change this to 1G, we can at any time use
+
+# echo 1G > memory.soft_limit_in_bytes
+
+NOTE1: Soft limits take effect over a long period of time, since they involve
+       reclaiming memory for balancing between memory cgroups
+NOTE2: It is recommended to set the soft limit always below the hard limit,
+       otherwise the hard limit will take precedence.
+
+8. TODO
 
 1. Add support for accounting huge pages (as a separate controller)
 2. Make per-cgroup scanner reclaim not-shared pages first
index 9f59fcb..ba046b8 100644 (file)
@@ -54,20 +54,23 @@ features surfaced as a result:
 
 3.1 General format of the API:
 struct dma_async_tx_descriptor *
-async_<operation>(<op specific parameters>,
-                 enum async_tx_flags flags,
-                 struct dma_async_tx_descriptor *dependency,
-                 dma_async_tx_callback callback_routine,
-                 void *callback_parameter);
+async_<operation>(<op specific parameters>, struct async_submit ctl *submit)
 
 3.2 Supported operations:
-memcpy       - memory copy between a source and a destination buffer
-memset       - fill a destination buffer with a byte value
-xor          - xor a series of source buffers and write the result to a
-              destination buffer
-xor_zero_sum - xor a series of source buffers and set a flag if the
-              result is zero.  The implementation attempts to prevent
-              writes to memory
+memcpy  - memory copy between a source and a destination buffer
+memset  - fill a destination buffer with a byte value
+xor     - xor a series of source buffers and write the result to a
+         destination buffer
+xor_val - xor a series of source buffers and set a flag if the
+         result is zero.  The implementation attempts to prevent
+         writes to memory
+pq     - generate the p+q (raid6 syndrome) from a series of source buffers
+pq_val  - validate that a p and or q buffer are in sync with a given series of
+         sources
+datap  - (raid6_datap_recov) recover a raid6 data block and the p block
+         from the given sources
+2data  - (raid6_2data_recov) recover 2 raid6 data blocks from the given
+         sources
 
 3.3 Descriptor management:
 The return value is non-NULL and points to a 'descriptor' when the operation
@@ -80,8 +83,8 @@ acknowledged by the application before the offload engine driver is allowed to
 recycle (or free) the descriptor.  A descriptor can be acked by one of the
 following methods:
 1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted
-2/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent
-   descriptor of a new operation.
+2/ submitting an unacknowledged descriptor as a dependency to another
+   async_tx call will implicitly set the acknowledged state.
 3/ calling async_tx_ack() on the descriptor.
 
 3.4 When does the operation execute?
@@ -119,30 +122,42 @@ of an operation.
 Perform a xor->copy->xor operation where each operation depends on the
 result from the previous operation:
 
-void complete_xor_copy_xor(void *param)
+void callback(void *param)
 {
-       printk("complete\n");
+       struct completion *cmp = param;
+
+       complete(cmp);
 }
 
-int run_xor_copy_xor(struct page **xor_srcs,
-                    int xor_src_cnt,
-                    struct page *xor_dest,
-                    size_t xor_len,
-                    struct page *copy_src,
-                    struct page *copy_dest,
-                    size_t copy_len)
+void run_xor_copy_xor(struct page **xor_srcs,
+                     int xor_src_cnt,
+                     struct page *xor_dest,
+                     size_t xor_len,
+                     struct page *copy_src,
+                     struct page *copy_dest,
+                     size_t copy_len)
 {
        struct dma_async_tx_descriptor *tx;
+       addr_conv_t addr_conv[xor_src_cnt];
+       struct async_submit_ctl submit;
+       addr_conv_t addr_conv[NDISKS];
+       struct completion cmp;
+
+       init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL,
+                         addr_conv);
+       tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit)
 
-       tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
-                      ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL);
-       tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len,
-                         ASYNC_TX_DEP_ACK, tx, NULL, NULL);
-       tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
-                      ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK,
-                      tx, complete_xor_copy_xor, NULL);
+       submit->depend_tx = tx;
+       tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit);
+
+       init_completion(&cmp);
+       init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx,
+                         callback, &cmp, addr_conv);
+       tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit);
 
        async_tx_issue_pending_all();
+
+       wait_for_completion(&cmp);
 }
 
 See include/linux/async_tx.h for more information on the flags.  See the
index 7365400..23a1810 100644 (file)
@@ -4,7 +4,7 @@ Shared Subtrees
 Contents:
        1) Overview
        2) Features
-       3) smount command
+       3) Setting mount states
        4) Use-case
        5) Detailed semantics
        6) Quiz
@@ -41,14 +41,14 @@ replicas continue to be exactly same.
 
        Here is an example:
 
-       Lets say /mnt has a mount that is shared.
+       Let's say /mnt has a mount that is shared.
        mount --make-shared /mnt
 
-       note: mount command does not yet support the --make-shared flag.
-       I have included a small C program which does the same by executing
-       'smount /mnt shared'
+       Note: mount(8) command now supports the --make-shared flag,
+       so the sample 'smount' program is no longer needed and has been
+       removed.
 
-       #mount --bind /mnt /tmp
+       # mount --bind /mnt /tmp
        The above command replicates the mount at /mnt to the mountpoint /tmp
        and the contents of both the mounts remain identical.
 
@@ -58,8 +58,8 @@ replicas continue to be exactly same.
        #ls /tmp
        a b c
 
-       Now lets say we mount a device at /tmp/a
-       #mount /dev/sd0  /tmp/a
+       Now let's say we mount a device at /tmp/a
+       # mount /dev/sd0  /tmp/a
 
        #ls /tmp/a
        t1 t2 t2
@@ -80,21 +80,20 @@ replicas continue to be exactly same.
 
        Here is an example:
 
-       Lets say /mnt has a mount which is shared.
-       #mount --make-shared /mnt
+       Let's say /mnt has a mount which is shared.
+       # mount --make-shared /mnt
 
-       Lets bind mount /mnt to /tmp
-       #mount --bind /mnt /tmp
+       Let's bind mount /mnt to /tmp
+       # mount --bind /mnt /tmp
 
        the new mount at /tmp becomes a shared mount and it is a replica of
        the mount at /mnt.
 
-       Now lets make the mount at /tmp; a slave of /mnt
-       #mount --make-slave /tmp
-       [or smount /tmp slave]
+       Now let's make the mount at /tmp; a slave of /mnt
+       # mount --make-slave /tmp
 
-       lets mount /dev/sd0 on /mnt/a
-       #mount /dev/sd0 /mnt/a
+       let's mount /dev/sd0 on /mnt/a
+       # mount /dev/sd0 /mnt/a
 
        #ls /mnt/a
        t1 t2 t3
@@ -104,9 +103,9 @@ replicas continue to be exactly same.
 
        Note the mount event has propagated to the mount at /tmp
 
-       However lets see what happens if we mount something on the mount at /tmp
+       However let's see what happens if we mount something on the mount at /tmp
 
-       #mount /dev/sd1 /tmp/b
+       # mount /dev/sd1 /tmp/b
 
        #ls /tmp/b
        s1 s2 s3
@@ -124,12 +123,11 @@ replicas continue to be exactly same.
 
 2d) A unbindable mount is a unbindable private mount
 
-       lets say we have a mount at /mnt and we make is unbindable
+       let's say we have a mount at /mnt and we make is unbindable
 
-       #mount --make-unbindable /mnt
-        [ smount /mnt  unbindable ]
+       # mount --make-unbindable /mnt
 
-        Lets try to bind mount this mount somewhere else.
+        Let's try to bind mount this mount somewhere else.
         # mount --bind /mnt /tmp
         mount: wrong fs type, bad option, bad superblock on /mnt,
                or too many mounted file systems
@@ -137,149 +135,15 @@ replicas continue to be exactly same.
        Binding a unbindable mount is a invalid operation.
 
 
-3) smount command
+3) Setting mount states
 
-       Currently the mount command is not aware of shared subtree features.
-       Work is in progress to add the support in mount ( util-linux package ).
-       Till then use the following program.
+       The mount command (util-linux package) can be used to set mount
+       states:
 
-       ------------------------------------------------------------------------
-       //
-       //this code was developed my Miklos Szeredi <miklos@szeredi.hu>
-       //and modified by Ram Pai <linuxram@us.ibm.com>
-       // sample usage:
-       //              smount /tmp shared
-       //
-       #include <stdio.h>
-       #include <stdlib.h>
-       #include <unistd.h>
-       #include <string.h>
-       #include <sys/mount.h>
-       #include <sys/fsuid.h>
-
-       #ifndef MS_REC
-       #define MS_REC          0x4000  /* 16384: Recursive loopback */
-       #endif
-
-       #ifndef MS_SHARED
-       #define MS_SHARED               1<<20   /* Shared */
-       #endif
-
-       #ifndef MS_PRIVATE
-       #define MS_PRIVATE              1<<18   /* Private */
-       #endif
-
-       #ifndef MS_SLAVE
-       #define MS_SLAVE                1<<19   /* Slave */
-       #endif
-
-       #ifndef MS_UNBINDABLE
-       #define MS_UNBINDABLE           1<<17   /* Unbindable */
-       #endif
-
-       int main(int argc, char *argv[])
-       {
-               int type;
-               if(argc != 3) {
-                       fprintf(stderr, "usage: %s dir "
-                       "<rshared|rslave|rprivate|runbindable|shared|slave"
-                       "|private|unbindable>\n" , argv[0]);
-                       return 1;
-               }
-
-               fprintf(stdout, "%s %s %s\n", argv[0], argv[1], argv[2]);
-
-               if (strcmp(argv[2],"rshared")==0)
-                       type=(MS_SHARED|MS_REC);
-               else if (strcmp(argv[2],"rslave")==0)
-                       type=(MS_SLAVE|MS_REC);
-               else if (strcmp(argv[2],"rprivate")==0)
-                       type=(MS_PRIVATE|MS_REC);
-               else if (strcmp(argv[2],"runbindable")==0)
-                       type=(MS_UNBINDABLE|MS_REC);
-               else if (strcmp(argv[2],"shared")==0)
-                       type=MS_SHARED;
-               else if (strcmp(argv[2],"slave")==0)
-                       type=MS_SLAVE;
-               else if (strcmp(argv[2],"private")==0)
-                       type=MS_PRIVATE;
-               else if (strcmp(argv[2],"unbindable")==0)
-                       type=MS_UNBINDABLE;
-               else {
-                       fprintf(stderr, "invalid operation: %s\n", argv[2]);
-                       return 1;
-               }
-               setfsuid(getuid());
-
-               if(mount("", argv[1], "dontcare", type, "") == -1) {
-                       perror("mount");
-                       return 1;
-               }
-               return 0;
-       }
-       -----------------------------------------------------------------------
-
-       Copy the above code snippet into smount.c
-       gcc -o smount smount.c
-
-
-       (i) To mark all the mounts under /mnt as shared execute the following
-       command:
-
-               smount /mnt rshared
-               the corresponding syntax planned for mount command is
-               mount --make-rshared /mnt
-
-           just to mark a mount /mnt as shared, execute the following
-           command:
-               smount /mnt shared
-               the corresponding syntax planned for mount command is
-               mount --make-shared /mnt
-
-       (ii) To mark all the shared mounts under /mnt as slave execute the
-       following
-
-            command:
-               smount /mnt rslave
-               the corresponding syntax planned for mount command is
-               mount --make-rslave /mnt
-
-           just to mark a mount /mnt as slave, execute the following
-           command:
-               smount /mnt slave
-               the corresponding syntax planned for mount command is
-               mount --make-slave /mnt
-
-       (iii) To mark all the mounts under /mnt as private execute the
-       following command:
-
-               smount /mnt rprivate
-               the corresponding syntax planned for mount command is
-               mount --make-rprivate /mnt
-
-           just to mark a mount /mnt as private, execute the following
-           command:
-               smount /mnt private
-               the corresponding syntax planned for mount command is
-               mount --make-private /mnt
-
-             NOTE: by default all the mounts are created as private. But if
-             you want to change some shared/slave/unbindable  mount as
-             private at a later point in time, this command can help.
-
-       (iv) To mark all the mounts under /mnt as unbindable execute the
-       following
-
-            command:
-               smount /mnt runbindable
-               the corresponding syntax planned for mount command is
-               mount --make-runbindable /mnt
-
-           just to mark a mount /mnt as unbindable, execute the following
-           command:
-               smount /mnt unbindable
-               the corresponding syntax planned for mount command is
-               mount --make-unbindable /mnt
+       mount --make-shared mountpoint
+       mount --make-slave mountpoint
+       mount --make-private mountpoint
+       mount --make-unbindable mountpoint
 
 
 4) Use cases
@@ -350,7 +214,7 @@ replicas continue to be exactly same.
                mount --rbind / /view/v3
                mount --rbind / /view/v4
 
-               and if /usr has a versioning filesystem mounted, than that
+               and if /usr has a versioning filesystem mounted, then that
                mount appears at /view/v1/usr, /view/v2/usr, /view/v3/usr and
                /view/v4/usr too
 
@@ -390,7 +254,7 @@ replicas continue to be exactly same.
 
                For example:
                        mount --make-shared /mnt
-                       mount --bin /mnt /tmp
+                       mount --bind /mnt /tmp
 
                The mount at /mnt and that at /tmp are both shared and belong
                to the same peer group. Anything mounted or unmounted under
@@ -558,7 +422,7 @@ replicas continue to be exactly same.
        then the subtree under the unbindable mount is pruned in the new
        location.
 
-       eg: lets say we have the following mount tree.
+       eg: let's say we have the following mount tree.
 
                A
              /   \
@@ -566,7 +430,7 @@ replicas continue to be exactly same.
             / \ / \
             D E F G
 
-            Lets say all the mount except the mount C in the tree are
+            Let's say all the mount except the mount C in the tree are
             of a type other than unbindable.
 
             If this tree is rbound to say Z
@@ -683,13 +547,13 @@ replicas continue to be exactly same.
        'b' on mounts that receive propagation from mount 'B' and does not have
        sub-mounts within them are unmounted.
 
-       Example: Lets say 'B1', 'B2', 'B3' are shared mounts that propagate to
+       Example: Let's say 'B1', 'B2', 'B3' are shared mounts that propagate to
        each other.
 
-       lets say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount
+       let's say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount
        'B1', 'B2' and 'B3' respectively.
 
-       lets say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on
+       let's say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on
        mount 'B1', 'B2' and 'B3' respectively.
 
        if 'C1' is unmounted, all the mounts that are most-recently-mounted on
@@ -710,7 +574,7 @@ replicas continue to be exactly same.
        A cloned namespace contains all the mounts as that of the parent
        namespace.
 
-       Lets say 'A' and 'B' are the corresponding mounts in the parent and the
+       Let's say 'A' and 'B' are the corresponding mounts in the parent and the
        child namespace.
 
        If 'A' is shared, then 'B' is also shared and 'A' and 'B' propagate to
@@ -759,11 +623,11 @@ replicas continue to be exactly same.
                mount --make-slave /mnt
 
                At this point we have the first mount at /tmp and
-               its root dentry is 1. Lets call this mount 'A'
+               its root dentry is 1. Let's call this mount 'A'
                And then we have a second mount at /tmp1 with root
-               dentry 2. Lets call this mount 'B'
+               dentry 2. Let's call this mount 'B'
                Next we have a third mount at /mnt with root dentry
-               mnt. Lets call this mount 'C'
+               mnt. Let's call this mount 'C'
 
                'B' is the slave of 'A' and 'C' is a slave of 'B'
                A -> B -> C
@@ -794,7 +658,7 @@ replicas continue to be exactly same.
 
        Q3 Why is unbindable mount needed?
 
-               Lets say we want to replicate the mount tree at multiple
+               Let's say we want to replicate the mount tree at multiple
                locations within the same subtree.
 
                if one rbind mounts a tree within the same subtree 'n' times
@@ -803,7 +667,7 @@ replicas continue to be exactly same.
                mounts. Here is a example.
 
                step 1:
-                  lets say the root tree has just two directories with
+                  let's say the root tree has just two directories with
                   one vfsmount.
                                    root
                                   /    \
@@ -875,7 +739,7 @@ replicas continue to be exactly same.
                Unclonable mounts come in handy here.
 
                step 1:
-                  lets say the root tree has just two directories with
+                  let's say the root tree has just two directories with
                   one vfsmount.
                                    root
                                   /    \
index f49eecf..623f094 100644 (file)
@@ -536,6 +536,7 @@ struct address_space_operations {
        /* migrate the contents of a page to the specified target */
        int (*migratepage) (struct page *, struct page *);
        int (*launder_page) (struct page *);
+       int (*error_remove_page) (struct mapping *mapping, struct page *page);
 };
 
   writepage: called by the VM to write a dirty page to backing store.
@@ -694,6 +695,12 @@ struct address_space_operations {
        prevent redirtying the page, it is kept locked during the whole
        operation.
 
+  error_remove_page: normally set to generic_error_remove_page if truncation
+       is ok for this address space. Used for memory failure handling.
+       Setting this implies you deal with pages going away under you,
+       unless you have them locked or reference counts increased.
+
+
 The File Object
 ===============
 
index aafca0a..9473749 100644 (file)
@@ -135,6 +135,7 @@ Code        Seq#    Include File            Comments
                                        <http://mikonos.dia.unisa.it/tcfs>
 'l'    40-7F   linux/udf_fs_i.h        in development:
                                        <http://sourceforge.net/projects/linux-udf/>
+'m'    00-09   linux/mmtimer.h
 'm'    all     linux/mtio.h            conflict!
 'm'    all     linux/soundcard.h       conflict!
 'm'    all     linux/synclink.h        conflict!
index 1458448..6268250 100644 (file)
@@ -96,13 +96,16 @@ handles that the Linux kernel will allocate. When you get lots
 of error messages about running out of file handles, you might
 want to increase this limit.
 
-The three values in file-nr denote the number of allocated
-file handles, the number of unused file handles and the maximum
-number of file handles. When the allocated file handles come
-close to the maximum, but the number of unused file handles is
-significantly greater than 0, you've encountered a peak in your 
-usage of file handles and you don't need to increase the maximum.
-
+Historically, the three values in file-nr denoted the number of
+allocated file handles, the number of allocated but unused file
+handles, and the maximum number of file handles. Linux 2.6 always
+reports 0 as the number of free file handles -- this is not an
+error, it just means that the number of allocated file handles
+exactly matches the number of used file handles.
+
+Attempts to allocate more file descriptors than file-max are
+reported with printk, look for "VFS: file-max limit <number>
+reached".
 ==============================================================
 
 nr_open:
index b3d8b49..a028b92 100644 (file)
@@ -22,6 +22,7 @@ show up in /proc/sys/kernel:
 - callhome                  [ S390 only ]
 - auto_msgmni
 - core_pattern
+- core_pipe_limit
 - core_uses_pid
 - ctrl-alt-del
 - dentry-state
@@ -135,6 +136,27 @@ core_pattern is used to specify a core dumpfile pattern name.
 
 ==============================================================
 
+core_pipe_limit:
+
+This sysctl is only applicable when core_pattern is configured to pipe core
+files to user space helper a (when the first character of core_pattern is a '|',
+see above).  When collecting cores via a pipe to an application, it is
+occasionally usefull for the collecting application to gather data about the
+crashing process from its /proc/pid directory.  In order to do this safely, the
+kernel must wait for the collecting process to exit, so as not to remove the
+crashing processes proc files prematurely.  This in turn creates the possibility
+that a misbehaving userspace collecting process can block the reaping of a
+crashed process simply by never exiting.  This sysctl defends against that.  It
+defines how many concurrent crashing processes may be piped to user space
+applications in parallel.  If this value is exceeded, then those crashing
+processes above that value are noted via the kernel log and their cores are
+skipped.  0 is a special value, indicating that unlimited processes may be
+captured in parallel, but that no waiting will take place (i.e. the collecting
+process is not guaranteed access to /proc/<crahing pid>/).  This value defaults
+to 0.
+
+==============================================================
+
 core_uses_pid:
 
 The default coredump filename is "core".  By setting
index e6fb1ec..a6e360d 100644 (file)
@@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/vm:
 - legacy_va_layout
 - lowmem_reserve_ratio
 - max_map_count
+- memory_failure_early_kill
+- memory_failure_recovery
 - min_free_kbytes
 - min_slab_ratio
 - min_unmapped_ratio
@@ -53,7 +55,6 @@ Currently, these files are in /proc/sys/vm:
 - vfs_cache_pressure
 - zone_reclaim_mode
 
-
 ==============================================================
 
 block_dump
@@ -275,6 +276,44 @@ e.g., up to one or two maps per allocation.
 
 The default value is 65536.
 
+=============================================================
+
+memory_failure_early_kill:
+
+Control how to kill processes when uncorrected memory error (typically
+a 2bit error in a memory module) is detected in the background by hardware
+that cannot be handled by the kernel. In some cases (like the page
+still having a valid copy on disk) the kernel will handle the failure
+transparently without affecting any applications. But if there is
+no other uptodate copy of the data it will kill to prevent any data
+corruptions from propagating.
+
+1: Kill all processes that have the corrupted and not reloadable page mapped
+as soon as the corruption is detected.  Note this is not supported
+for a few types of pages, like kernel internally allocated data or
+the swap cache, but works for the majority of user pages.
+
+0: Only unmap the corrupted page from all processes and only kill a process
+who tries to access it.
+
+The kill is done using a catchable SIGBUS with BUS_MCEERR_AO, so processes can
+handle this if they want to.
+
+This is only active on architectures/platforms with advanced machine
+check handling and depends on the hardware capabilities.
+
+Applications can override this setting individually with the PR_MCE_KILL prctl
+
+==============================================================
+
+memory_failure_recovery
+
+Enable memory failure recovery (when supported by the platform)
+
+1: Attempt recovery.
+
+0: Always panic on a memory failure.
+
 ==============================================================
 
 min_free_kbytes:
index f366fa9..25fadb4 100644 (file)
@@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee that no new clones using the
 mm start up ... this is a loose form of stability on mm_users. For
 example, it is used in copy_mm to protect against a racing tlb_gather_mmu
 single address space optimization, so that the zap_page_range (from
-vmtruncate) does not lose sending ipi's to cloned threads that might 
+truncate) does not lose sending ipi's to cloned threads that might
 be spawned underneath it and go to user mode to drag in pte's into tlbs.
 
 swap_lock
index 3eda8ea..fa1a30d 100644 (file)
@@ -5,6 +5,7 @@
  * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com>
  */
 
+#define _LARGEFILE64_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <string.h>
 #include <getopt.h>
 #include <limits.h>
+#include <assert.h>
 #include <sys/types.h>
 #include <sys/errno.h>
 #include <sys/fcntl.h>
 
 
+/*
+ * pagemap kernel ABI bits
+ */
+
+#define PM_ENTRY_BYTES      sizeof(uint64_t)
+#define PM_STATUS_BITS      3
+#define PM_STATUS_OFFSET    (64 - PM_STATUS_BITS)
+#define PM_STATUS_MASK      (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
+#define PM_STATUS(nr)       (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
+#define PM_PSHIFT_BITS      6
+#define PM_PSHIFT_OFFSET    (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
+#define PM_PSHIFT_MASK      (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
+#define PM_PSHIFT(x)        (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
+#define PM_PFRAME_MASK      ((1LL << PM_PSHIFT_OFFSET) - 1)
+#define PM_PFRAME(x)        ((x) & PM_PFRAME_MASK)
+
+#define PM_PRESENT          PM_STATUS(4LL)
+#define PM_SWAP             PM_STATUS(2LL)
+
+
 /*
  * kernel page flags
  */
@@ -126,6 +148,14 @@ static int         nr_addr_ranges;
 static unsigned long   opt_offset[MAX_ADDR_RANGES];
 static unsigned long   opt_size[MAX_ADDR_RANGES];
 
+#define MAX_VMAS       10240
+static int             nr_vmas;
+static unsigned long   pg_start[MAX_VMAS];
+static unsigned long   pg_end[MAX_VMAS];
+static unsigned long   voffset;
+
+static int             pagemap_fd;
+
 #define MAX_BIT_FILTERS        64
 static int             nr_bit_filters;
 static uint64_t                opt_mask[MAX_BIT_FILTERS];
@@ -135,7 +165,6 @@ static int          page_size;
 
 #define PAGES_BATCH    (64 << 10)      /* 64k pages */
 static int             kpageflags_fd;
-static uint64_t                kpageflags_buf[KPF_BYTES * PAGES_BATCH];
 
 #define HASH_SHIFT     13
 #define HASH_SIZE      (1 << HASH_SHIFT)
@@ -158,6 +187,11 @@ static uint64_t    page_flags[HASH_SIZE];
        type __min2 = (y);                      \
        __min1 < __min2 ? __min1 : __min2; })
 
+#define max_t(type, x, y) ({                   \
+       type __max1 = (x);                      \
+       type __max2 = (y);                      \
+       __max1 > __max2 ? __max1 : __max2; })
+
 static unsigned long pages2mb(unsigned long pages)
 {
        return (pages * page_size) >> 20;
@@ -224,26 +258,34 @@ static char *page_flag_longname(uint64_t flags)
 static void show_page_range(unsigned long offset, uint64_t flags)
 {
        static uint64_t      flags0;
+       static unsigned long voff;
        static unsigned long index;
        static unsigned long count;
 
-       if (flags == flags0 && offset == index + count) {
+       if (flags == flags0 && offset == index + count &&
+           (!opt_pid || voffset == voff + count)) {
                count++;
                return;
        }
 
-       if (count)
-               printf("%lu\t%lu\t%s\n",
+       if (count) {
+               if (opt_pid)
+                       printf("%lx\t", voff);
+               printf("%lx\t%lx\t%s\n",
                                index, count, page_flag_name(flags0));
+       }
 
        flags0 = flags;
        index  = offset;
+       voff   = voffset;
        count  = 1;
 }
 
 static void show_page(unsigned long offset, uint64_t flags)
 {
-       printf("%lu\t%s\n", offset, page_flag_name(flags));
+       if (opt_pid)
+               printf("%lx\t", voffset);
+       printf("%lx\t%s\n", offset, page_flag_name(flags));
 }
 
 static void show_summary(void)
@@ -383,6 +425,8 @@ static void walk_pfn(unsigned long index, unsigned long count)
        lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET);
 
        while (count) {
+               uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH];
+
                batch = min_t(unsigned long, count, PAGES_BATCH);
                n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES);
                if (n == 0)
@@ -404,6 +448,81 @@ static void walk_pfn(unsigned long index, unsigned long count)
        }
 }
 
+
+#define PAGEMAP_BATCH  4096
+static unsigned long task_pfn(unsigned long pgoff)
+{
+       static uint64_t buf[PAGEMAP_BATCH];
+       static unsigned long start;
+       static long count;
+       uint64_t pfn;
+
+       if (pgoff < start || pgoff >= start + count) {
+               if (lseek64(pagemap_fd,
+                           (uint64_t)pgoff * PM_ENTRY_BYTES,
+                           SEEK_SET) < 0) {
+                       perror("pagemap seek");
+                       exit(EXIT_FAILURE);
+               }
+               count = read(pagemap_fd, buf, sizeof(buf));
+               if (count == 0)
+                       return 0;
+               if (count < 0) {
+                       perror("pagemap read");
+                       exit(EXIT_FAILURE);
+               }
+               if (count % PM_ENTRY_BYTES) {
+                       fatal("pagemap read not aligned.\n");
+                       exit(EXIT_FAILURE);
+               }
+               count /= PM_ENTRY_BYTES;
+               start = pgoff;
+       }
+
+       pfn = buf[pgoff - start];
+       if (pfn & PM_PRESENT)
+               pfn = PM_PFRAME(pfn);
+       else
+               pfn = 0;
+
+       return pfn;
+}
+
+static void walk_task(unsigned long index, unsigned long count)
+{
+       int i = 0;
+       const unsigned long end = index + count;
+
+       while (index < end) {
+
+               while (pg_end[i] <= index)
+                       if (++i >= nr_vmas)
+                               return;
+               if (pg_start[i] >= end)
+                       return;
+
+               voffset = max_t(unsigned long, pg_start[i], index);
+               index   = min_t(unsigned long, pg_end[i], end);
+
+               assert(voffset < index);
+               for (; voffset < index; voffset++) {
+                       unsigned long pfn = task_pfn(voffset);
+                       if (pfn)
+                               walk_pfn(pfn, 1);
+               }
+       }
+}
+
+static void add_addr_range(unsigned long offset, unsigned long size)
+{
+       if (nr_addr_ranges >= MAX_ADDR_RANGES)
+               fatal("too many addr ranges\n");
+
+       opt_offset[nr_addr_ranges] = offset;
+       opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset);
+       nr_addr_ranges++;
+}
+
 static void walk_addr_ranges(void)
 {
        int i;
@@ -415,10 +534,13 @@ static void walk_addr_ranges(void)
        }
 
        if (!nr_addr_ranges)
-               walk_pfn(0, ULONG_MAX);
+               add_addr_range(0, ULONG_MAX);
 
        for (i = 0; i < nr_addr_ranges; i++)
-               walk_pfn(opt_offset[i], opt_size[i]);
+               if (!opt_pid)
+                       walk_pfn(opt_offset[i], opt_size[i]);
+               else
+                       walk_task(opt_offset[i], opt_size[i]);
 
        close(kpageflags_fd);
 }
@@ -446,8 +568,8 @@ static void usage(void)
 "            -r|--raw                  Raw mode, for kernel developers\n"
 "            -a|--addr    addr-spec    Walk a range of pages\n"
 "            -b|--bits    bits-spec    Walk pages with specified bits\n"
-#if 0 /* planned features */
 "            -p|--pid     pid          Walk process address space\n"
+#if 0 /* planned features */
 "            -f|--file    filename     Walk file address space\n"
 #endif
 "            -l|--list                 Show page details in ranges\n"
@@ -459,7 +581,7 @@ static void usage(void)
 "            N+M                       pages range from N to N+M-1\n"
 "            N,M                       pages range from N to M-1\n"
 "            N,                        pages range from N to end\n"
-"            ,M                        pages range from 0 to M\n"
+"            ,M                        pages range from 0 to M-1\n"
 "bits-spec:\n"
 "            bit1,bit2                 (flags & (bit1|bit2)) != 0\n"
 "            bit1,bit2=bit1            (flags & (bit1|bit2)) == bit1\n"
@@ -496,21 +618,57 @@ static unsigned long long parse_number(const char *str)
 
 static void parse_pid(const char *str)
 {
+       FILE *file;
+       char buf[5000];
+
        opt_pid = parse_number(str);
-}
 
-static void parse_file(const char *name)
-{
+       sprintf(buf, "/proc/%d/pagemap", opt_pid);
+       pagemap_fd = open(buf, O_RDONLY);
+       if (pagemap_fd < 0) {
+               perror(buf);
+               exit(EXIT_FAILURE);
+       }
+
+       sprintf(buf, "/proc/%d/maps", opt_pid);
+       file = fopen(buf, "r");
+       if (!file) {
+               perror(buf);
+               exit(EXIT_FAILURE);
+       }
+
+       while (fgets(buf, sizeof(buf), file) != NULL) {
+               unsigned long vm_start;
+               unsigned long vm_end;
+               unsigned long long pgoff;
+               int major, minor;
+               char r, w, x, s;
+               unsigned long ino;
+               int n;
+
+               n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu",
+                          &vm_start,
+                          &vm_end,
+                          &r, &w, &x, &s,
+                          &pgoff,
+                          &major, &minor,
+                          &ino);
+               if (n < 10) {
+                       fprintf(stderr, "unexpected line: %s\n", buf);
+                       continue;
+               }
+               pg_start[nr_vmas] = vm_start / page_size;
+               pg_end[nr_vmas] = vm_end / page_size;
+               if (++nr_vmas >= MAX_VMAS) {
+                       fprintf(stderr, "too many VMAs\n");
+                       break;
+               }
+       }
+       fclose(file);
 }
 
-static void add_addr_range(unsigned long offset, unsigned long size)
+static void parse_file(const char *name)
 {
-       if (nr_addr_ranges >= MAX_ADDR_RANGES)
-               fatal("too much addr ranges\n");
-
-       opt_offset[nr_addr_ranges] = offset;
-       opt_size[nr_addr_ranges] = size;
-       nr_addr_ranges++;
 }
 
 static void parse_addr_range(const char *optarg)
@@ -676,8 +834,10 @@ int main(int argc, char *argv[])
                }
        }
 
+       if (opt_list && opt_pid)
+               printf("voffset\t");
        if (opt_list == 1)
-               printf("offset\tcount\tflags\n");
+               printf("offset\tlen\tflags\n");
        if (opt_list == 2)
                printf("offset\tflags\n");
 
index 7c1c0b0..c450f3a 100644 (file)
@@ -257,12 +257,6 @@ W: http://www.lesswatts.org/projects/acpi/
 S:     Supported
 F:     drivers/acpi/fan.c
 
-ACPI PCI HOTPLUG DRIVER
-M:     Kristen Carlson Accardi <kristen.c.accardi@intel.com>
-L:     linux-pci@vger.kernel.org
-S:     Supported
-F:     drivers/pci/hotplug/acpi*
-
 ACPI THERMAL DRIVER
 M:     Zhang Rui <rui.zhang@intel.com>
 L:     linux-acpi@vger.kernel.org
@@ -689,7 +683,7 @@ S:  Maintained
 ARM/INTEL IXP4XX ARM ARCHITECTURE
 M:     Imre Kaloz <kaloz@openwrt.org>
 M:     Krzysztof Halasa <khc@pm.waw.pl>
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-ixp4xx/
 
@@ -746,18 +740,22 @@ M:        Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
 M:     Dirk Opfer <dirk@opfer-online.de>
 S:     Maintained
 
-ARM/PALMTX,PALMT5,PALMLD,PALMTE2 SUPPORT
-M:     Marek Vasut <marek.vasut@gmail.com>
+ARM/PALMTX,PALMT5,PALMLD,PALMTE2,PALMTC SUPPORT
+P:     Marek Vasut
+M:     marek.vasut@gmail.com
+L:     linux-arm-kernel@lists.infradead.org
 W:     http://hackndev.com
 S:     Maintained
 
 ARM/PALM TREO 680 SUPPORT
 M:     Tomas Cech <sleep_walker@suse.cz>
+L:     linux-arm-kernel@lists.infradead.org
 W:     http://hackndev.com
 S:     Maintained
 
 ARM/PALMZ72 SUPPORT
 M:     Sergey Lapin <slapin@ossfans.org>
+L:     linux-arm-kernel@lists.infradead.org
 W:     http://hackndev.com
 S:     Maintained
 
@@ -2331,7 +2329,9 @@ S:        Orphan
 F:     drivers/hwmon/
 
 HARDWARE RANDOM NUMBER GENERATOR CORE
-S:     Orphan
+M:     Matt Mackall <mpm@selenic.com>
+M:     Herbert Xu <herbert@gondor.apana.org.au>
+S:     Odd fixes
 F:     Documentation/hw_random.txt
 F:     drivers/char/hw_random/
 F:     include/linux/hw_random.h
@@ -4003,11 +4003,11 @@ F:      Documentation/PCI/
 F:     drivers/pci/
 F:     include/linux/pci*
 
-PCIE HOTPLUG DRIVER
-M:     Kristen Carlson Accardi <kristen.c.accardi@intel.com>
+PCI HOTPLUG
+M:     Jesse Barnes <jbarnes@virtuousgeek.org>
 L:     linux-pci@vger.kernel.org
 S:     Supported
-F:     drivers/pci/pcie/
+F:     drivers/pci/hotplug
 
 PCMCIA SUBSYSTEM
 P:     Linux PCMCIA Team
@@ -4670,12 +4670,6 @@ F:       drivers/serial/serial_lh7a40x.c
 F:     drivers/usb/gadget/lh7a40*
 F:     drivers/usb/host/ohci-lh7a40*
 
-SHPC HOTPLUG DRIVER
-M:     Kristen Carlson Accardi <kristen.c.accardi@intel.com>
-L:     linux-pci@vger.kernel.org
-S:     Supported
-F:     drivers/pci/hotplug/shpchp*
-
 SIMPLE FIRMWARE INTERFACE (SFI)
 P:     Len Brown
 M:     lenb@kernel.org
@@ -4687,7 +4681,6 @@ F:        arch/x86/kernel/*sfi*
 F:     drivers/sfi/
 F:     include/linux/sfi*.h
 
-
 SIMTEC EB110ATX (Chalice CATS)
 P:     Ben Dooks
 M:     Vincent Sanders <support@simtec.co.uk>
index 25da001..e42823e 100644 (file)
@@ -26,6 +26,8 @@
 #define F_GETOWN       6       /*  for sockets. */
 #define F_SETSIG       10      /*  for sockets. */
 #define F_GETSIG       11      /*  for sockets. */
+#define F_SETOWN_EX    12
+#define F_GETOWN_EX    13
 
 /* for posix fcntl() and lockf() */
 #define F_RDLCK                1
index e302dae..8e059e5 100644 (file)
@@ -1016,7 +1016,7 @@ marvel_agp_bind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *m
 {
        struct marvel_agp_aperture *aper = agp->aperture.sysdata;
        return iommu_bind(aper->arena, aper->pg_start + pg_start, 
-                         mem->page_count, mem->memory);
+                         mem->page_count, mem->pages);
 }
 
 static int 
index 319fcb7..7668649 100644 (file)
@@ -680,7 +680,7 @@ titan_agp_bind_memory(alpha_agp_info *agp, off_t pg_start, struct agp_memory *me
 {
        struct titan_agp_aperture *aper = agp->aperture.sysdata;
        return iommu_bind(aper->arena, aper->pg_start + pg_start, 
-                         mem->page_count, mem->memory);
+                         mem->page_count, mem->pages);
 }
 
 static int 
index 19b8632..6f80ca4 100644 (file)
@@ -13,6 +13,5 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
 struct task_struct init_task = INIT_TASK(init_task);
 EXPORT_SYMBOL(init_task);
 
-union thread_union init_thread_union
-       __attribute__((section(".data.init_thread")))
-       = { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+       { INIT_THREAD_INFO(init_task) };
index 00edd04..85457b2 100644 (file)
@@ -198,7 +198,7 @@ extern unsigned long size_for_memory(unsigned long max);
 
 extern int iommu_reserve(struct pci_iommu_arena *, long, long);
 extern int iommu_release(struct pci_iommu_arena *, long, long);
-extern int iommu_bind(struct pci_iommu_arena *, long, long, unsigned long *);
+extern int iommu_bind(struct pci_iommu_arena *, long, long, struct page **);
 extern int iommu_unbind(struct pci_iommu_arena *, long, long);
 
 
index d15aedf..8449504 100644 (file)
@@ -876,7 +876,7 @@ iommu_release(struct pci_iommu_arena *arena, long pg_start, long pg_count)
 
 int
 iommu_bind(struct pci_iommu_arena *arena, long pg_start, long pg_count, 
-          unsigned long *physaddrs)
+          struct page **pages)
 {
        unsigned long flags;
        unsigned long *ptes;
@@ -896,7 +896,7 @@ iommu_bind(struct pci_iommu_arena *arena, long pg_start, long pg_count,
        }
                
        for(i = 0, j = pg_start; i < pg_count; i++, j++)
-               ptes[j] = mk_iommu_pte(physaddrs[i]);
+               ptes[j] = mk_iommu_pte(page_to_phys(pages[i]));
 
        spin_unlock_irqrestore(&arena->lock, flags);
 
index 6dc03c3..2906665 100644 (file)
@@ -1,5 +1,6 @@
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/page.h>
+#include <asm/thread_info.h>
 
 OUTPUT_FORMAT("elf64-alpha")
 OUTPUT_ARCH(alpha)
@@ -31,88 +32,21 @@ SECTIONS
        } :kernel
 
        RODATA
-
-       /* Exception table */
-       . = ALIGN(16);
-       __ex_table : {
-               __start___ex_table = .;
-               *(__ex_table)
-               __stop___ex_table = .;
-       }
+       EXCEPTION_TABLE(16)
 
        /* Will be freed after init */
-       . = ALIGN(PAGE_SIZE);
-       /* Init code and data */
-       __init_begin = .;
-       .init.text : {
-               _sinittext = .;
-               INIT_TEXT
-               _einittext = .;
-       }
-       .init.data : {
-               INIT_DATA
-       }
-
-       . = ALIGN(16);
-       .init.setup : {
-               __setup_start = .;
-               *(.init.setup)
-               __setup_end = .;
-       }
-
-       . = ALIGN(8);
-       .initcall.init : {
-               __initcall_start = .;
-               INITCALLS
-               __initcall_end = .;
-       }
-
-#ifdef CONFIG_BLK_DEV_INITRD
-       . = ALIGN(PAGE_SIZE);
-       .init.ramfs : {
-               __initramfs_start = .;
-               *(.init.ramfs)
-               __initramfs_end = .;
-       }
-#endif
-
-       . = ALIGN(8);
-       .con_initcall.init : {
-               __con_initcall_start = .;
-               *(.con_initcall.init)
-               __con_initcall_end = .;
-       }
-
-       . = ALIGN(8);
-       SECURITY_INIT
-
+       __init_begin = ALIGN(PAGE_SIZE);
+       INIT_TEXT_SECTION(PAGE_SIZE)
+       INIT_DATA_SECTION(16)
        PERCPU(PAGE_SIZE)
-
-       . = ALIGN(2 * PAGE_SIZE);
+       /* Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page
+          needed for the THREAD_SIZE aligned init_task gets freed after init */
+       . = ALIGN(THREAD_SIZE);
        __init_end = .;
        /* Freed after init ends here */
 
-       /* Note 2 page alignment above.  */
-       .data.init_thread : {
-               *(.data.init_thread)
-       }
-
-       . = ALIGN(PAGE_SIZE);
-       .data.page_aligned : {
-               *(.data.page_aligned)
-       }
-
-       . = ALIGN(64);
-       .data.cacheline_aligned : {
-               *(.data.cacheline_aligned)
-       }
-
        _data = .;
-       /* Data */
-       .data : {
-               DATA_DATA
-               CONSTRUCTORS
-       }
+       RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE)
 
        .got : {
                *(.got)
@@ -122,16 +56,7 @@ SECTIONS
        }
        _edata = .;     /* End of data section */
 
-       __bss_start = .;
-       .sbss : {
-               *(.sbss)
-               *(.scommon)
-       }
-       .bss : {
-               *(.bss)
-               *(COMMON)
-       }
-       __bss_stop = .;
+       BSS_SECTION(0, 0, 0)
        _end = .;
 
        .mdebug 0 : {
index d778a69..1c4119c 100644 (file)
@@ -46,6 +46,10 @@ config GENERIC_CLOCKEVENTS_BROADCAST
        depends on GENERIC_CLOCKEVENTS
        default y if SMP && !LOCAL_TIMERS
 
+config HAVE_TCM
+       bool
+       select GENERIC_ALLOCATOR
+
 config NO_IOPORT
        bool
 
@@ -649,6 +653,7 @@ config ARCH_U300
        bool "ST-Ericsson U300 Series"
        depends on MMU
        select CPU_ARM926T
+       select HAVE_TCM
        select ARM_AMBA
        select ARM_VIC
        select GENERIC_TIME
index 2293f0c..bd36c77 100644 (file)
@@ -865,6 +865,7 @@ void locomo_gpio_set_dir(struct device *dev, unsigned int bits, unsigned int dir
 
        spin_unlock_irqrestore(&lchip->lock, flags);
 }
+EXPORT_SYMBOL(locomo_gpio_set_dir);
 
 int locomo_gpio_read_level(struct device *dev, unsigned int bits)
 {
@@ -882,6 +883,7 @@ int locomo_gpio_read_level(struct device *dev, unsigned int bits)
        ret &= bits;
        return ret;
 }
+EXPORT_SYMBOL(locomo_gpio_read_level);
 
 int locomo_gpio_read_output(struct device *dev, unsigned int bits)
 {
@@ -899,6 +901,7 @@ int locomo_gpio_read_output(struct device *dev, unsigned int bits)
        ret &= bits;
        return ret;
 }
+EXPORT_SYMBOL(locomo_gpio_read_output);
 
 void locomo_gpio_write(struct device *dev, unsigned int bits, unsigned int set)
 {
@@ -920,6 +923,7 @@ void locomo_gpio_write(struct device *dev, unsigned int bits, unsigned int set)
 
        spin_unlock_irqrestore(&lchip->lock, flags);
 }
+EXPORT_SYMBOL(locomo_gpio_write);
 
 static void locomo_m62332_sendbit(void *mapbase, int bit)
 {
@@ -1084,13 +1088,12 @@ void locomo_m62332_senddata(struct locomo_dev *ldev, unsigned int dac_data, int
 
        spin_unlock_irqrestore(&lchip->lock, flags);
 }
+EXPORT_SYMBOL(locomo_m62332_senddata);
 
 /*
  *     Frontlight control
  */
 
-static struct locomo *locomo_chip_driver(struct locomo_dev *ldev);
-
 void locomo_frontlight_set(struct locomo_dev *dev, int duty, int vr, int bpwf)
 {
        unsigned long flags;
@@ -1182,11 +1185,13 @@ int locomo_driver_register(struct locomo_driver *driver)
        driver->drv.bus = &locomo_bus_type;
        return driver_register(&driver->drv);
 }
+EXPORT_SYMBOL(locomo_driver_register);
 
 void locomo_driver_unregister(struct locomo_driver *driver)
 {
        driver_unregister(&driver->drv);
 }
+EXPORT_SYMBOL(locomo_driver_unregister);
 
 static int __init locomo_init(void)
 {
@@ -1208,11 +1213,3 @@ module_exit(locomo_exit);
 MODULE_DESCRIPTION("Sharp LoCoMo core driver");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("John Lenz <lenz@cs.wisc.edu>");
-
-EXPORT_SYMBOL(locomo_driver_register);
-EXPORT_SYMBOL(locomo_driver_unregister);
-EXPORT_SYMBOL(locomo_gpio_set_dir);
-EXPORT_SYMBOL(locomo_gpio_read_level);
-EXPORT_SYMBOL(locomo_gpio_read_output);
-EXPORT_SYMBOL(locomo_gpio_write);
-EXPORT_SYMBOL(locomo_m62332_senddata);
index 920ced0..f232941 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/list.h>
 #include <linux/io.h>
 #include <linux/sysdev.h>
+#include <linux/device.h>
 #include <linux/amba/bus.h>
 
 #include <asm/mach/irq.h>
diff --git a/arch/arm/configs/littleton_defconfig b/arch/arm/configs/littleton_defconfig
deleted file mode 100644 (file)
index 1db4969..0000000
+++ /dev/null
@@ -1,783 +0,0 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.24-rc5
-# Fri Dec 21 11:06:19 2007
-#
-CONFIG_ARM=y
-CONFIG_SYS_SUPPORTS_APM_EMULATION=y
-CONFIG_GENERIC_GPIO=y
-CONFIG_GENERIC_TIME=y
-CONFIG_GENERIC_CLOCKEVENTS=y
-CONFIG_MMU=y
-# CONFIG_NO_IOPORT is not set
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_STACKTRACE_SUPPORT=y
-CONFIG_LOCKDEP_SUPPORT=y
-CONFIG_TRACE_IRQFLAGS_SUPPORT=y
-CONFIG_HARDIRQS_SW_RESEND=y
-CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_RWSEM_GENERIC_SPINLOCK=y
-# CONFIG_ARCH_HAS_ILOG2_U32 is not set
-# CONFIG_ARCH_HAS_ILOG2_U64 is not set
-CONFIG_GENERIC_HWEIGHT=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_ZONE_DMA=y
-CONFIG_ARCH_MTD_XIP=y
-CONFIG_VECTORS_BASE=0xffff0000
-CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
-
-#
-# General setup
-#
-CONFIG_EXPERIMENTAL=y
-CONFIG_BROKEN_ON_SMP=y
-CONFIG_LOCK_KERNEL=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_SWAP=y
-CONFIG_SYSVIPC=y
-CONFIG_SYSVIPC_SYSCTL=y
-# CONFIG_POSIX_MQUEUE is not set
-# CONFIG_BSD_PROCESS_ACCT is not set
-# CONFIG_TASKSTATS is not set
-# CONFIG_USER_NS is not set
-# CONFIG_PID_NS is not set
-# CONFIG_AUDIT is not set
-# CONFIG_IKCONFIG is not set
-CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CGROUPS is not set
-CONFIG_FAIR_GROUP_SCHED=y
-CONFIG_FAIR_USER_SCHED=y
-# CONFIG_FAIR_CGROUP_SCHED is not set
-CONFIG_SYSFS_DEPRECATED=y
-# CONFIG_RELAY is not set
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE=""
-CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_SYSCTL=y
-# CONFIG_EMBEDDED is not set
-CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
-CONFIG_KALLSYMS=y
-# CONFIG_KALLSYMS_ALL is not set
-# CONFIG_KALLSYMS_EXTRA_PASS is not set
-CONFIG_HOTPLUG=y
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_ANON_INODES=y
-CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_EVENTFD=y
-CONFIG_SHMEM=y
-CONFIG_VM_EVENT_COUNTERS=y
-CONFIG_SLAB=y
-# CONFIG_SLUB is not set
-# CONFIG_SLOB is not set
-CONFIG_RT_MUTEXES=y
-# CONFIG_TINY_SHMEM is not set
-CONFIG_BASE_SMALL=0
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-# CONFIG_MODVERSIONS is not set
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-# CONFIG_KMOD is not set
-CONFIG_BLOCK=y
-# CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
-# CONFIG_LSF is not set
-# CONFIG_BLK_DEV_BSG is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
-# CONFIG_DEFAULT_AS is not set
-# CONFIG_DEFAULT_DEADLINE is not set
-CONFIG_DEFAULT_CFQ=y
-# CONFIG_DEFAULT_NOOP is not set
-CONFIG_DEFAULT_IOSCHED="cfq"
-
-#
-# System Type
-#
-# CONFIG_ARCH_AAEC2000 is not set
-# CONFIG_ARCH_INTEGRATOR is not set
-# CONFIG_ARCH_REALVIEW is not set
-# CONFIG_ARCH_VERSATILE is not set
-# CONFIG_ARCH_AT91 is not set
-# CONFIG_ARCH_CLPS7500 is not set
-# CONFIG_ARCH_CLPS711X is not set
-# CONFIG_ARCH_CO285 is not set
-# CONFIG_ARCH_EBSA110 is not set
-# CONFIG_ARCH_EP93XX is not set
-# CONFIG_ARCH_FOOTBRIDGE is not set
-# CONFIG_ARCH_NETX is not set
-# CONFIG_ARCH_H720X is not set
-# CONFIG_ARCH_IMX is not set
-# CONFIG_ARCH_IOP13XX is not set
-# CONFIG_ARCH_IOP32X is not set
-# CONFIG_ARCH_IOP33X is not set
-# CONFIG_ARCH_IXP23XX is not set
-# CONFIG_ARCH_IXP2000 is not set
-# CONFIG_ARCH_IXP4XX is not set
-# CONFIG_ARCH_L7200 is not set
-# CONFIG_ARCH_KS8695 is not set
-# CONFIG_ARCH_NS9XXX is not set
-# CONFIG_ARCH_MXC is not set
-# CONFIG_ARCH_PNX4008 is not set
-CONFIG_ARCH_PXA=y
-# CONFIG_ARCH_RPC is not set
-# CONFIG_ARCH_SA1100 is not set
-# CONFIG_ARCH_S3C2410 is not set
-# CONFIG_ARCH_SHARK is not set
-# CONFIG_ARCH_LH7A40X is not set
-# CONFIG_ARCH_DAVINCI is not set
-# CONFIG_ARCH_OMAP is not set
-
-#
-# Intel PXA2xx/PXA3xx Implementations
-#
-
-#
-# Supported PXA3xx Processor Variants
-#
-CONFIG_CPU_PXA300=y
-CONFIG_CPU_PXA310=y
-# CONFIG_CPU_PXA320 is not set
-# CONFIG_ARCH_LUBBOCK is not set
-# CONFIG_MACH_LOGICPD_PXA270 is not set
-# CONFIG_MACH_MAINSTONE is not set
-# CONFIG_ARCH_PXA_IDP is not set
-# CONFIG_PXA_SHARPSL is not set
-# CONFIG_MACH_TRIZEPS4 is not set
-# CONFIG_MACH_EM_X270 is not set
-# CONFIG_MACH_ZYLONITE is not set
-CONFIG_MACH_LITTLETON=y
-# CONFIG_MACH_ARMCORE is not set
-CONFIG_PXA3xx=y
-CONFIG_PXA_SSP=y
-
-#
-# Boot options
-#
-
-#
-# Power management
-#
-
-#
-# Processor Type
-#
-CONFIG_CPU_32=y
-CONFIG_CPU_XSC3=y
-CONFIG_CPU_32v5=y
-CONFIG_CPU_ABRT_EV5T=y
-CONFIG_CPU_CACHE_VIVT=y
-CONFIG_CPU_TLB_V4WBI=y
-CONFIG_CPU_CP15=y
-CONFIG_CPU_CP15_MMU=y
-CONFIG_IO_36=y
-
-#
-# Processor Features
-#
-# CONFIG_ARM_THUMB is not set
-# CONFIG_CPU_DCACHE_DISABLE is not set
-# CONFIG_CPU_BPREDICT_DISABLE is not set
-# CONFIG_OUTER_CACHE is not set
-CONFIG_IWMMXT=y
-
-#
-# Bus support
-#
-# CONFIG_PCI_SYSCALL is not set
-# CONFIG_ARCH_SUPPORTS_MSI is not set
-# CONFIG_PCCARD is not set
-
-#
-# Kernel Features
-#
-CONFIG_TICK_ONESHOT=y
-# CONFIG_NO_HZ is not set
-# CONFIG_HIGH_RES_TIMERS is not set
-CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
-CONFIG_PREEMPT=y
-CONFIG_HZ=100
-CONFIG_AEABI=y
-CONFIG_OABI_COMPAT=y
-# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set
-CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_FLATMEM_MANUAL=y
-# CONFIG_DISCONTIGMEM_MANUAL is not set
-# CONFIG_SPARSEMEM_MANUAL is not set
-CONFIG_FLATMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
-# CONFIG_SPARSEMEM_STATIC is not set
-# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
-CONFIG_SPLIT_PTLOCK_CPUS=4096
-# CONFIG_RESOURCES_64BIT is not set
-CONFIG_ZONE_DMA_FLAG=1
-CONFIG_BOUNCE=y
-CONFIG_VIRT_TO_BUS=y
-CONFIG_ALIGNMENT_TRAP=y
-
-#
-# Boot options
-#
-CONFIG_ZBOOT_ROM_TEXT=0x0
-CONFIG_ZBOOT_ROM_BSS=0x0
-CONFIG_CMDLINE="root=/dev/nfs rootfstype=nfs nfsroot=192.168.1.100:/nfsroot/ ip=192.168.1.101:192.168.1.100::255.255.255.0::eth0:on console=ttyS2,38400 mem=64M"
-# CONFIG_XIP_KERNEL is not set
-# CONFIG_KEXEC is not set
-
-#
-# CPU Frequency scaling
-#
-# CONFIG_CPU_FREQ is not set
-
-#
-# Floating point emulation
-#
-
-#
-# At least one emulation must be selected
-#
-CONFIG_FPE_NWFPE=y
-# CONFIG_FPE_NWFPE_XP is not set
-# CONFIG_FPE_FASTFPE is not set
-
-#
-# Userspace binary formats
-#
-CONFIG_BINFMT_ELF=y
-# CONFIG_BINFMT_AOUT is not set
-# CONFIG_BINFMT_MISC is not set
-
-#
-# Power management options
-#
-# CONFIG_PM is not set
-CONFIG_SUSPEND_UP_POSSIBLE=y
-
-#
-# Networking
-#
-CONFIG_NET=y
-
-#
-# Networking options
-#
-CONFIG_PACKET=y
-# CONFIG_PACKET_MMAP is not set
-CONFIG_UNIX=y
-CONFIG_XFRM=y
-# CONFIG_XFRM_USER is not set
-# CONFIG_XFRM_SUB_POLICY is not set
-# CONFIG_XFRM_MIGRATE is not set
-# CONFIG_NET_KEY is not set
-CONFIG_INET=y
-# CONFIG_IP_MULTICAST is not set
-# CONFIG_IP_ADVANCED_ROUTER is not set
-CONFIG_IP_FIB_HASH=y
-CONFIG_IP_PNP=y
-# CONFIG_IP_PNP_DHCP is not set
-# CONFIG_IP_PNP_BOOTP is not set
-# CONFIG_IP_PNP_RARP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_ARPD is not set
-# CONFIG_SYN_COOKIES is not set
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_XFRM_TUNNEL is not set
-# CONFIG_INET_TUNNEL is not set
-CONFIG_INET_XFRM_MODE_TRANSPORT=y
-CONFIG_INET_XFRM_MODE_TUNNEL=y
-CONFIG_INET_XFRM_MODE_BEET=y
-# CONFIG_INET_LRO is not set
-CONFIG_INET_DIAG=y
-CONFIG_INET_TCP_DIAG=y
-# CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_CUBIC=y
-CONFIG_DEFAULT_TCP_CONG="cubic"
-# CONFIG_TCP_MD5SIG is not set
-# CONFIG_IPV6 is not set
-# CONFIG_INET6_XFRM_TUNNEL is not set
-# CONFIG_INET6_TUNNEL is not set
-# CONFIG_NETWORK_SECMARK is not set
-# CONFIG_NETFILTER is not set
-# CONFIG_IP_DCCP is not set
-# CONFIG_IP_SCTP is not set
-# CONFIG_TIPC is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-# CONFIG_NET_SCHED is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-# CONFIG_AF_RXRPC is not set
-
-#
-# Wireless
-#
-# CONFIG_CFG80211 is not set
-# CONFIG_WIRELESS_EXT is not set
-# CONFIG_MAC80211 is not set
-# CONFIG_IEEE80211 is not set
-# CONFIG_RFKILL is not set
-# CONFIG_NET_9P is not set
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_STANDALONE is not set
-# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-CONFIG_FW_LOADER=y
-# CONFIG_DEBUG_DRIVER is not set
-# CONFIG_DEBUG_DEVRES is not set
-# CONFIG_SYS_HYPERVISOR is not set
-# CONFIG_CONNECTOR is not set
-# CONFIG_MTD is not set
-# CONFIG_PARPORT is not set
-# CONFIG_BLK_DEV is not set
-# CONFIG_MISC_DEVICES is not set
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-# CONFIG_RAID_ATTRS is not set
-# CONFIG_SCSI is not set
-# CONFIG_SCSI_DMA is not set
-# CONFIG_SCSI_NETLINK is not set
-# CONFIG_ATA is not set
-# CONFIG_MD is not set
-CONFIG_NETDEVICES=y
-# CONFIG_NETDEVICES_MULTIQUEUE is not set
-# CONFIG_DUMMY is not set
-# CONFIG_BONDING is not set
-# CONFIG_MACVLAN is not set
-# CONFIG_EQUALIZER is not set
-# CONFIG_TUN is not set
-# CONFIG_VETH is not set
-# CONFIG_PHYLIB is not set
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-# CONFIG_AX88796 is not set
-CONFIG_SMC91X=y
-# CONFIG_DM9000 is not set
-# CONFIG_SMC911X is not set
-# CONFIG_IBM_NEW_EMAC_ZMII is not set
-# CONFIG_IBM_NEW_EMAC_RGMII is not set
-# CONFIG_IBM_NEW_EMAC_TAH is not set
-# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
-# CONFIG_B44 is not set
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
-
-#
-# Wireless LAN
-#
-# CONFIG_WLAN_PRE80211 is not set
-# CONFIG_WLAN_80211 is not set
-# CONFIG_WAN is not set
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
-# CONFIG_SHAPER is not set
-# CONFIG_NETCONSOLE is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-# CONFIG_ISDN is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-# CONFIG_INPUT_FF_MEMLESS is not set
-# CONFIG_INPUT_POLLDEV is not set
-
-#
-# Userland interfaces
-#
-CONFIG_INPUT_MOUSEDEV=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TABLET is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Hardware I/O ports
-#
-# CONFIG_SERIO is not set
-# CONFIG_GAMEPORT is not set
-
-#
-# Character devices
-#
-CONFIG_VT=y
-CONFIG_VT_CONSOLE=y
-CONFIG_HW_CONSOLE=y
-# CONFIG_VT_HW_CONSOLE_BINDING is not set
-# CONFIG_SERIAL_NONSTANDARD is not set
-
-#
-# Serial drivers
-#
-# CONFIG_SERIAL_8250 is not set
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_PXA=y
-CONFIG_SERIAL_PXA_CONSOLE=y
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-CONFIG_UNIX98_PTYS=y
-# CONFIG_LEGACY_PTYS is not set
-# CONFIG_IPMI_HANDLER is not set
-# CONFIG_HW_RANDOM is not set
-# CONFIG_NVRAM is not set
-# CONFIG_R3964 is not set
-# CONFIG_RAW_DRIVER is not set
-# CONFIG_TCG_TPM is not set
-# CONFIG_I2C is not set
-
-#
-# SPI support
-#
-# CONFIG_SPI is not set
-# CONFIG_SPI_MASTER is not set
-# CONFIG_W1 is not set
-# CONFIG_POWER_SUPPLY is not set
-# CONFIG_HWMON is not set
-# CONFIG_WATCHDOG is not set
-
-#
-# Sonics Silicon Backplane
-#
-CONFIG_SSB_POSSIBLE=y
-# CONFIG_SSB is not set
-
-#
-# Multifunction device drivers
-#
-# CONFIG_MFD_SM501 is not set
-
-#
-# Multimedia devices
-#
-# CONFIG_VIDEO_DEV is not set
-# CONFIG_DVB_CORE is not set
-# CONFIG_DAB is not set
-
-#
-# Graphics support
-#
-# CONFIG_VGASTATE is not set
-# CONFIG_VIDEO_OUTPUT_CONTROL is not set
-CONFIG_FB=y
-# CONFIG_FIRMWARE_EDID is not set
-# CONFIG_FB_DDC is not set
-CONFIG_FB_CFB_FILLRECT=y
-CONFIG_FB_CFB_COPYAREA=y
-CONFIG_FB_CFB_IMAGEBLIT=y
-# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
-# CONFIG_FB_SYS_FILLRECT is not set
-# CONFIG_FB_SYS_COPYAREA is not set
-# CONFIG_FB_SYS_IMAGEBLIT is not set
-# CONFIG_FB_SYS_FOPS is not set
-CONFIG_FB_DEFERRED_IO=y
-# CONFIG_FB_SVGALIB is not set
-# CONFIG_FB_MACMODES is not set
-# CONFIG_FB_BACKLIGHT is not set
-# CONFIG_FB_MODE_HELPERS is not set
-# CONFIG_FB_TILEBLITTING is not set
-
-#
-# Frame buffer hardware drivers
-#
-# CONFIG_FB_S1D13XXX is not set
-CONFIG_FB_PXA=y
-# CONFIG_FB_PXA_PARAMETERS is not set
-# CONFIG_FB_MBX is not set
-# CONFIG_FB_VIRTUAL is not set
-# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
-
-#
-# Display device support
-#
-# CONFIG_DISPLAY_SUPPORT is not set
-
-#
-# Console display driver support
-#
-# CONFIG_VGA_CONSOLE is not set
-CONFIG_DUMMY_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set
-# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
-CONFIG_FONTS=y
-# CONFIG_FONT_8x8 is not set
-CONFIG_FONT_8x16=y
-# CONFIG_FONT_6x11 is not set
-# CONFIG_FONT_7x14 is not set
-# CONFIG_FONT_PEARL_8x8 is not set
-# CONFIG_FONT_ACORN_8x8 is not set
-# CONFIG_FONT_MINI_4x6 is not set
-# CONFIG_FONT_SUN8x16 is not set
-# CONFIG_FONT_SUN12x22 is not set
-# CONFIG_FONT_10x18 is not set
-CONFIG_LOGO=y
-CONFIG_LOGO_LINUX_MONO=y
-CONFIG_LOGO_LINUX_VGA16=y
-CONFIG_LOGO_LINUX_CLUT224=y
-
-#
-# Sound
-#
-# CONFIG_SOUND is not set
-# CONFIG_HID_SUPPORT is not set
-# CONFIG_USB_SUPPORT is not set
-# CONFIG_MMC is not set
-# CONFIG_NEW_LEDS is not set
-CONFIG_RTC_LIB=y
-# CONFIG_RTC_CLASS is not set
-
-#
-# File systems
-#
-# CONFIG_EXT2_FS is not set
-# CONFIG_EXT3_FS is not set
-# CONFIG_EXT4DEV_FS is not set
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-CONFIG_FS_POSIX_ACL=y
-# CONFIG_XFS_FS is not set
-# CONFIG_GFS2_FS is not set
-# CONFIG_OCFS2_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_ROMFS_FS is not set
-# CONFIG_INOTIFY is not set
-# CONFIG_QUOTA is not set
-# CONFIG_DNOTIFY is not set
-# CONFIG_AUTOFS_FS is not set
-# CONFIG_AUTOFS4_FS is not set
-# CONFIG_FUSE_FS is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
-# CONFIG_ISO9660_FS is not set
-# CONFIG_UDF_FS is not set
-
-#
-# DOS/FAT/NT Filesystems
-#
-# CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_PROC_SYSCTL=y
-CONFIG_SYSFS=y
-# CONFIG_TMPFS is not set
-# CONFIG_HUGETLB_PAGE is not set
-# CONFIG_CONFIGFS_FS is not set
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-CONFIG_NETWORK_FILESYSTEMS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
-CONFIG_NFS_DIRECTIO=y
-# CONFIG_NFSD is not set
-CONFIG_ROOT_NFS=y
-CONFIG_LOCKD=y
-CONFIG_LOCKD_V4=y
-CONFIG_NFS_ACL_SUPPORT=y
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=y
-CONFIG_SUNRPC_GSS=y
-# CONFIG_SUNRPC_BIND34 is not set
-CONFIG_RPCSEC_GSS_KRB5=y
-# CONFIG_RPCSEC_GSS_SPKM3 is not set
-# CONFIG_SMB_FS is not set
-# CONFIG_CIFS is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-
-#
-# Partition Types
-#
-# CONFIG_PARTITION_ADVANCED is not set
-CONFIG_MSDOS_PARTITION=y
-# CONFIG_NLS is not set
-# CONFIG_DLM is not set
-# CONFIG_INSTRUMENTATION is not set
-
-#
-# Kernel hacking
-#
-CONFIG_PRINTK_TIME=y
-CONFIG_ENABLE_WARN_DEPRECATED=y
-CONFIG_ENABLE_MUST_CHECK=y
-CONFIG_MAGIC_SYSRQ=y
-# CONFIG_UNUSED_SYMBOLS is not set
-# CONFIG_DEBUG_FS is not set
-# CONFIG_HEADERS_CHECK is not set
-CONFIG_DEBUG_KERNEL=y
-# CONFIG_DEBUG_SHIRQ is not set
-CONFIG_DETECT_SOFTLOCKUP=y
-CONFIG_SCHED_DEBUG=y
-# CONFIG_SCHEDSTATS is not set
-# CONFIG_TIMER_STATS is not set
-# CONFIG_DEBUG_SLAB is not set
-# CONFIG_DEBUG_PREEMPT is not set
-# CONFIG_DEBUG_RT_MUTEXES is not set
-# CONFIG_RT_MUTEX_TESTER is not set
-# CONFIG_DEBUG_SPINLOCK is not set
-# CONFIG_DEBUG_MUTEXES is not set
-# CONFIG_DEBUG_LOCK_ALLOC is not set
-# CONFIG_PROVE_LOCKING is not set
-# CONFIG_LOCK_STAT is not set
-# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
-# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
-# CONFIG_DEBUG_KOBJECT is not set
-CONFIG_DEBUG_BUGVERBOSE=y
-CONFIG_DEBUG_INFO=y
-# CONFIG_DEBUG_VM is not set
-# CONFIG_DEBUG_LIST is not set
-# CONFIG_DEBUG_SG is not set
-CONFIG_FRAME_POINTER=y
-CONFIG_FORCED_INLINING=y
-# CONFIG_BOOT_PRINTK_DELAY is not set
-# CONFIG_RCU_TORTURE_TEST is not set
-# CONFIG_FAULT_INJECTION is not set
-# CONFIG_SAMPLES is not set
-CONFIG_DEBUG_USER=y
-CONFIG_DEBUG_ERRORS=y
-CONFIG_DEBUG_LL=y
-# CONFIG_DEBUG_ICEDCC is not set
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-# CONFIG_SECURITY_FILE_CAPABILITIES is not set
-CONFIG_CRYPTO=y
-CONFIG_CRYPTO_ALGAPI=y
-CONFIG_CRYPTO_BLKCIPHER=y
-CONFIG_CRYPTO_MANAGER=y
-# CONFIG_CRYPTO_HMAC is not set
-# CONFIG_CRYPTO_XCBC is not set
-# CONFIG_CRYPTO_NULL is not set
-# CONFIG_CRYPTO_MD4 is not set
-CONFIG_CRYPTO_MD5=y
-# CONFIG_CRYPTO_SHA1 is not set
-# CONFIG_CRYPTO_SHA256 is not set
-# CONFIG_CRYPTO_SHA512 is not set
-# CONFIG_CRYPTO_WP512 is not set
-# CONFIG_CRYPTO_TGR192 is not set
-# CONFIG_CRYPTO_GF128MUL is not set
-# CONFIG_CRYPTO_ECB is not set
-CONFIG_CRYPTO_CBC=y
-# CONFIG_CRYPTO_PCBC is not set
-# CONFIG_CRYPTO_LRW is not set
-# CONFIG_CRYPTO_XTS is not set
-# CONFIG_CRYPTO_CRYPTD is not set
-CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_FCRYPT is not set
-# CONFIG_CRYPTO_BLOWFISH is not set
-# CONFIG_CRYPTO_TWOFISH is not set
-# CONFIG_CRYPTO_SERPENT is not set
-# CONFIG_CRYPTO_AES is not set
-# CONFIG_CRYPTO_CAST5 is not set
-# CONFIG_CRYPTO_CAST6 is not set
-# CONFIG_CRYPTO_TEA is not set
-# CONFIG_CRYPTO_ARC4 is not set
-# CONFIG_CRYPTO_KHAZAD is not set
-# CONFIG_CRYPTO_ANUBIS is not set
-# CONFIG_CRYPTO_SEED is not set
-# CONFIG_CRYPTO_DEFLATE is not set
-# CONFIG_CRYPTO_MICHAEL_MIC is not set
-# CONFIG_CRYPTO_CRC32C is not set
-# CONFIG_CRYPTO_CAMELLIA is not set
-# CONFIG_CRYPTO_TEST is not set
-# CONFIG_CRYPTO_AUTHENC is not set
-CONFIG_CRYPTO_HW=y
-
-#
-# Library routines
-#
-CONFIG_BITREVERSE=y
-CONFIG_CRC_CCITT=y
-# CONFIG_CRC16 is not set
-# CONFIG_CRC_ITU_T is not set
-CONFIG_CRC32=y
-# CONFIG_CRC7 is not set
-# CONFIG_LIBCRC32C is not set
-CONFIG_PLIST=y
-CONFIG_HAS_IOMEM=y
-CONFIG_HAS_IOPORT=y
-CONFIG_HAS_DMA=y
diff --git a/arch/arm/configs/pxa3xx_defconfig b/arch/arm/configs/pxa3xx_defconfig
new file mode 100644 (file)
index 0000000..733b851
--- /dev/null
@@ -0,0 +1,1332 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.31-rc1
+# Mon Jul 13 22:48:49 2009
+#
+CONFIG_ARM=y
+CONFIG_HAVE_PWM=y
+CONFIG_SYS_SUPPORTS_APM_EMULATION=y
+CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_MMU=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_HARDIRQS_SW_RESEND=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_ARCH_MTD_XIP=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_VECTORS_BASE=0xffff0000
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+CONFIG_CONSTRUCTORS=y
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+# CONFIG_POSIX_MQUEUE is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+# CONFIG_CLASSIC_RCU is not set
+CONFIG_TREE_RCU=y
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RCU_FANOUT=32
+# CONFIG_RCU_FANOUT_EXACT is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=18
+CONFIG_GROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+# CONFIG_RT_GROUP_SCHED is not set
+CONFIG_USER_SCHED=y
+# CONFIG_CGROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+# CONFIG_RELAY is not set
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_IPC_NS is not set
+# CONFIG_USER_NS is not set
+# CONFIG_PID_NS is not set
+# CONFIG_NET_NS is not set
+# CONFIG_BLK_DEV_INITRD is not set
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+# CONFIG_EMBEDDED is not set
+CONFIG_UID16=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+
+#
+# Performance Counters
+#
+CONFIG_VM_EVENT_COUNTERS=y
+# CONFIG_STRIP_ASM_SYMS is not set
+CONFIG_COMPAT_BRK=y
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
+# CONFIG_SLOB is not set
+# CONFIG_PROFILING is not set
+# CONFIG_MARKERS is not set
+CONFIG_HAVE_OPROFILE=y
+# CONFIG_KPROBES is not set
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_CLK=y
+
+#
+# GCOV-based kernel profiling
+#
+# CONFIG_SLOW_WORK is not set
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+# CONFIG_MODULE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_BLOCK=y
+CONFIG_LBDAF=y
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_DEADLINE is not set
+CONFIG_DEFAULT_CFQ=y
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="cfq"
+# CONFIG_FREEZER is not set
+
+#
+# System Type
+#
+# CONFIG_ARCH_AAEC2000 is not set
+# CONFIG_ARCH_INTEGRATOR is not set
+# CONFIG_ARCH_REALVIEW is not set
+# CONFIG_ARCH_VERSATILE is not set
+# CONFIG_ARCH_AT91 is not set
+# CONFIG_ARCH_CLPS711X is not set
+# CONFIG_ARCH_GEMINI is not set
+# CONFIG_ARCH_EBSA110 is not set
+# CONFIG_ARCH_EP93XX is not set
+# CONFIG_ARCH_FOOTBRIDGE is not set
+# CONFIG_ARCH_MXC is not set
+# CONFIG_ARCH_STMP3XXX is not set
+# CONFIG_ARCH_NETX is not set
+# CONFIG_ARCH_H720X is not set
+# CONFIG_ARCH_IOP13XX is not set
+# CONFIG_ARCH_IOP32X is not set
+# CONFIG_ARCH_IOP33X is not set
+# CONFIG_ARCH_IXP23XX is not set
+# CONFIG_ARCH_IXP2000 is not set
+# CONFIG_ARCH_IXP4XX is not set
+# CONFIG_ARCH_L7200 is not set
+# CONFIG_ARCH_KIRKWOOD is not set
+# CONFIG_ARCH_LOKI is not set
+# CONFIG_ARCH_MV78XX0 is not set
+# CONFIG_ARCH_ORION5X is not set
+# CONFIG_ARCH_MMP is not set
+# CONFIG_ARCH_KS8695 is not set
+# CONFIG_ARCH_NS9XXX is not set
+# CONFIG_ARCH_W90X900 is not set
+# CONFIG_ARCH_PNX4008 is not set
+CONFIG_ARCH_PXA=y
+# CONFIG_ARCH_MSM is not set
+# CONFIG_ARCH_RPC is not set
+# CONFIG_ARCH_SA1100 is not set
+# CONFIG_ARCH_S3C2410 is not set
+# CONFIG_ARCH_S3C64XX is not set
+# CONFIG_ARCH_SHARK is not set
+# CONFIG_ARCH_LH7A40X is not set
+# CONFIG_ARCH_U300 is not set
+# CONFIG_ARCH_DAVINCI is not set
+# CONFIG_ARCH_OMAP is not set
+
+#
+# Intel PXA2xx/PXA3xx Implementations
+#
+
+#
+# Supported PXA3xx Processor Variants
+#
+CONFIG_CPU_PXA300=y
+CONFIG_CPU_PXA310=y
+CONFIG_CPU_PXA320=y
+CONFIG_CPU_PXA930=y
+CONFIG_CPU_PXA935=y
+# CONFIG_ARCH_GUMSTIX is not set
+# CONFIG_MACH_INTELMOTE2 is not set
+# CONFIG_MACH_STARGATE2 is not set
+# CONFIG_ARCH_LUBBOCK is not set
+# CONFIG_MACH_LOGICPD_PXA270 is not set
+# CONFIG_MACH_MAINSTONE is not set
+# CONFIG_MACH_MP900C is not set
+# CONFIG_ARCH_PXA_IDP is not set
+# CONFIG_PXA_SHARPSL is not set
+# CONFIG_ARCH_VIPER is not set
+# CONFIG_ARCH_PXA_ESERIES is not set
+# CONFIG_TRIZEPS_PXA is not set
+# CONFIG_MACH_H5000 is not set
+# CONFIG_MACH_EM_X270 is not set
+# CONFIG_MACH_EXEDA is not set
+# CONFIG_MACH_COLIBRI is not set
+# CONFIG_MACH_COLIBRI300 is not set
+# CONFIG_MACH_COLIBRI320 is not set
+CONFIG_MACH_ZYLONITE=y
+CONFIG_MACH_LITTLETON=y
+CONFIG_MACH_TAVOREVB=y
+CONFIG_MACH_SAAR=y
+# CONFIG_MACH_ARMCORE is not set
+# CONFIG_MACH_CM_X300 is not set
+# CONFIG_MACH_H4700 is not set
+# CONFIG_MACH_MAGICIAN is not set
+# CONFIG_MACH_HIMALAYA is not set
+# CONFIG_MACH_MIOA701 is not set
+# CONFIG_MACH_PCM027 is not set
+# CONFIG_ARCH_PXA_PALM is not set
+# CONFIG_MACH_CSB726 is not set
+# CONFIG_PXA_EZX is not set
+CONFIG_PXA3xx=y
+CONFIG_PXA_SSP=y
+CONFIG_PXA_HAVE_BOARD_IRQS=y
+CONFIG_PLAT_PXA=y
+
+#
+# Processor Type
+#
+CONFIG_CPU_32=y
+CONFIG_CPU_XSC3=y
+CONFIG_CPU_32v5=y
+CONFIG_CPU_ABRT_EV5T=y
+CONFIG_CPU_PABRT_NOIFAR=y
+CONFIG_CPU_CACHE_VIVT=y
+CONFIG_CPU_TLB_V4WBI=y
+CONFIG_CPU_CP15=y
+CONFIG_CPU_CP15_MMU=y
+CONFIG_IO_36=y
+
+#
+# Processor Features
+#
+CONFIG_ARM_THUMB=y
+# CONFIG_CPU_DCACHE_DISABLE is not set
+# CONFIG_CPU_BPREDICT_DISABLE is not set
+CONFIG_OUTER_CACHE=y
+CONFIG_CACHE_XSC3L2=y
+CONFIG_IWMMXT=y
+CONFIG_COMMON_CLKDEV=y
+
+#
+# Bus support
+#
+# CONFIG_PCI_SYSCALL is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+
+#
+# Kernel Features
+#
+CONFIG_TICK_ONESHOT=y
+# CONFIG_NO_HZ is not set
+# CONFIG_HIGH_RES_TIMERS is not set
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+CONFIG_VMSPLIT_3G=y
+# CONFIG_VMSPLIT_2G is not set
+# CONFIG_VMSPLIT_1G is not set
+CONFIG_PAGE_OFFSET=0xC0000000
+CONFIG_PREEMPT=y
+CONFIG_HZ=100
+CONFIG_AEABI=y
+CONFIG_OABI_COMPAT=y
+# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set
+# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set
+# CONFIG_HIGHMEM is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4096
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_VIRT_TO_BUS=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
+CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
+CONFIG_ALIGNMENT_TRAP=y
+# CONFIG_UACCESS_WITH_MEMCPY is not set
+
+#
+# Boot options
+#
+CONFIG_ZBOOT_ROM_TEXT=0x0
+CONFIG_ZBOOT_ROM_BSS=0x0
+CONFIG_CMDLINE="root=/dev/nfs rootfstype=nfs nfsroot=192.168.1.100:/nfsroot/ ip=192.168.1.101:192.168.1.100::255.255.255.0::eth0:on console=ttyS0,115200 mem=64M debug"
+# CONFIG_XIP_KERNEL is not set
+# CONFIG_KEXEC is not set
+
+#
+# CPU Power Management
+#
+# CONFIG_CPU_FREQ is not set
+# CONFIG_CPU_IDLE is not set
+
+#
+# Floating point emulation
+#
+
+#
+# At least one emulation must be selected
+#
+CONFIG_FPE_NWFPE=y
+# CONFIG_FPE_NWFPE_XP is not set
+# CONFIG_FPE_FASTFPE is not set
+
+#
+# Userspace binary formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_HAVE_AOUT=y
+# CONFIG_BINFMT_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Power management options
+#
+# CONFIG_PM is not set
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+# CONFIG_IP_PNP_DHCP is not set
+# CONFIG_IP_PNP_BOOTP is not set
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_INET_DIAG is not set
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
+# CONFIG_IEEE802154 is not set
+# CONFIG_NET_SCHED is not set
+# CONFIG_DCB is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+# CONFIG_WIRELESS is not set
+# CONFIG_WIMAX is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_FW_LOADER=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE=""
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+CONFIG_MTD=y
+# CONFIG_MTD_DEBUG is not set
+CONFIG_MTD_CONCAT=y
+CONFIG_MTD_PARTITIONS=y
+# CONFIG_MTD_TESTS is not set
+# CONFIG_MTD_REDBOOT_PARTS is not set
+# CONFIG_MTD_CMDLINE_PARTS is not set
+# CONFIG_MTD_AFS_PARTS is not set
+# CONFIG_MTD_AR7_PARTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLKDEVS=y
+CONFIG_MTD_BLOCK=y
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+# CONFIG_RFD_FTL is not set
+# CONFIG_SSFDC is not set
+# CONFIG_MTD_OOPS is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+# CONFIG_MTD_CFI is not set
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+# CONFIG_MTD_RAM is not set
+# CONFIG_MTD_ROM is not set
+# CONFIG_MTD_ABSENT is not set
+
+#
+# Mapping drivers for chip access
+#
+# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_DATAFLASH is not set
+# CONFIG_MTD_M25P80 is not set
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+# CONFIG_MTD_BLOCK2MTD is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_VERIFY_WRITE=y
+# CONFIG_MTD_NAND_ECC_SMC is not set
+# CONFIG_MTD_NAND_MUSEUM_IDS is not set
+# CONFIG_MTD_NAND_H1900 is not set
+# CONFIG_MTD_NAND_GPIO is not set
+CONFIG_MTD_NAND_IDS=y
+# CONFIG_MTD_NAND_DISKONCHIP is not set
+# CONFIG_MTD_NAND_SHARPSL is not set
+CONFIG_MTD_NAND_PXA3xx=y
+CONFIG_MTD_NAND_PXA3xx_BUILTIN=y
+# CONFIG_MTD_NAND_NANDSIM is not set
+# CONFIG_MTD_NAND_PLATFORM is not set
+CONFIG_MTD_ONENAND=y
+CONFIG_MTD_ONENAND_VERIFY_WRITE=y
+CONFIG_MTD_ONENAND_GENERIC=y
+# CONFIG_MTD_ONENAND_OTP is not set
+# CONFIG_MTD_ONENAND_2X_PROGRAM is not set
+# CONFIG_MTD_ONENAND_SIM is not set
+
+#
+# LPDDR flash memory drivers
+#
+# CONFIG_MTD_LPDDR is not set
+
+#
+# UBI - Unsorted block images
+#
+# CONFIG_MTD_UBI is not set
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_LOOP=y
+# CONFIG_BLK_DEV_CRYPTOLOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=4096
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_MG_DISK is not set
+# CONFIG_MISC_DEVICES is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+CONFIG_NETDEVICES=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_VETH is not set
+# CONFIG_PHYLIB is not set
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_AX88796 is not set
+CONFIG_SMC91X=y
+# CONFIG_DM9000 is not set
+# CONFIG_ENC28J60 is not set
+# CONFIG_ETHOC is not set
+# CONFIG_SMC911X is not set
+# CONFIG_SMSC911X is not set
+# CONFIG_DNET is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
+# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
+# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
+# CONFIG_B44 is not set
+# CONFIG_KS8842 is not set
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+
+#
+# Enable WiMAX (Networking options) to see the WiMAX drivers
+#
+# CONFIG_WAN is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_ISDN is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+# CONFIG_KEYBOARD_ATKBD is not set
+# CONFIG_KEYBOARD_SUNKBD is not set
+# CONFIG_KEYBOARD_LKKBD is not set
+# CONFIG_KEYBOARD_XTKBD is not set
+# CONFIG_KEYBOARD_NEWTON is not set
+# CONFIG_KEYBOARD_STOWAWAY is not set
+# CONFIG_KEYBOARD_LM8323 is not set
+CONFIG_KEYBOARD_PXA27x=y
+CONFIG_KEYBOARD_PXA930_ROTARY=y
+CONFIG_KEYBOARD_GPIO=y
+CONFIG_INPUT_MOUSE=y
+CONFIG_MOUSE_PS2=y
+CONFIG_MOUSE_PS2_ALPS=y
+CONFIG_MOUSE_PS2_LOGIPS2PP=y
+CONFIG_MOUSE_PS2_SYNAPTICS=y
+CONFIG_MOUSE_PS2_TRACKPOINT=y
+# CONFIG_MOUSE_PS2_ELANTECH is not set
+# CONFIG_MOUSE_PS2_TOUCHKIT is not set
+# CONFIG_MOUSE_SERIAL is not set
+# CONFIG_MOUSE_VSXXXAA is not set
+# CONFIG_MOUSE_GPIO is not set
+CONFIG_MOUSE_PXA930_TRKBALL=y
+# CONFIG_MOUSE_SYNAPTICS_I2C is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+CONFIG_INPUT_TOUCHSCREEN=y
+# CONFIG_TOUCHSCREEN_ADS7846 is not set
+# CONFIG_TOUCHSCREEN_AD7877 is not set
+# CONFIG_TOUCHSCREEN_AD7879_I2C is not set
+# CONFIG_TOUCHSCREEN_AD7879_SPI is not set
+# CONFIG_TOUCHSCREEN_AD7879 is not set
+CONFIG_TOUCHSCREEN_DA9034=y
+# CONFIG_TOUCHSCREEN_EETI is not set
+# CONFIG_TOUCHSCREEN_FUJITSU is not set
+# CONFIG_TOUCHSCREEN_GUNZE is not set
+# CONFIG_TOUCHSCREEN_ELO is not set
+# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set
+# CONFIG_TOUCHSCREEN_MTOUCH is not set
+# CONFIG_TOUCHSCREEN_INEXIO is not set
+# CONFIG_TOUCHSCREEN_MK712 is not set
+# CONFIG_TOUCHSCREEN_PENMOUNT is not set
+# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
+# CONFIG_TOUCHSCREEN_TOUCHWIN is not set
+# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set
+# CONFIG_TOUCHSCREEN_TSC2007 is not set
+# CONFIG_TOUCHSCREEN_W90X900 is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+CONFIG_SERIO=y
+CONFIG_SERIO_SERPORT=y
+CONFIG_SERIO_LIBPS2=y
+# CONFIG_SERIO_RAW is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_CONSOLE_TRANSLATIONS=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_VT_HW_CONSOLE_BINDING is not set
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+# CONFIG_SERIAL_MAX3100 is not set
+CONFIG_SERIAL_PXA=y
+CONFIG_SERIAL_PXA_CONSOLE=y
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+CONFIG_I2C=y
+CONFIG_I2C_BOARDINFO=y
+# CONFIG_I2C_CHARDEV is not set
+# CONFIG_I2C_HELPER_AUTO is not set
+
+#
+# I2C Algorithms
+#
+# CONFIG_I2C_ALGOBIT is not set
+# CONFIG_I2C_ALGOPCF is not set
+# CONFIG_I2C_ALGOPCA is not set
+
+#
+# I2C Hardware Bus support
+#
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+# CONFIG_I2C_DESIGNWARE is not set
+# CONFIG_I2C_GPIO is not set
+# CONFIG_I2C_OCORES is not set
+CONFIG_I2C_PXA=y
+# CONFIG_I2C_PXA_SLAVE is not set
+# CONFIG_I2C_SIMTEC is not set
+
+#
+# External I2C/SMBus adapter drivers
+#
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_TAOS_EVM is not set
+
+#
+# Other I2C/SMBus bus drivers
+#
+# CONFIG_I2C_PCA_PLATFORM is not set
+# CONFIG_I2C_STUB is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_DS1682 is not set
+# CONFIG_SENSORS_TSL2550 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+CONFIG_SPI=y
+# CONFIG_SPI_DEBUG is not set
+CONFIG_SPI_MASTER=y
+
+#
+# SPI Master Controller Drivers
+#
+# CONFIG_SPI_BITBANG is not set
+# CONFIG_SPI_GPIO is not set
+CONFIG_SPI_PXA2XX=y
+
+#
+# SPI Protocol Masters
+#
+# CONFIG_SPI_SPIDEV is not set
+# CONFIG_SPI_TLE62X0 is not set
+CONFIG_ARCH_REQUIRE_GPIOLIB=y
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+# CONFIG_GPIO_SYSFS is not set
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+CONFIG_GPIO_MAX732X=y
+CONFIG_GPIO_PCA953X=y
+CONFIG_GPIO_PCF857X=y
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+CONFIG_GPIO_MAX7301=y
+# CONFIG_GPIO_MCP23S08 is not set
+# CONFIG_W1 is not set
+CONFIG_POWER_SUPPLY=y
+CONFIG_POWER_SUPPLY_DEBUG=y
+CONFIG_PDA_POWER=y
+# CONFIG_BATTERY_DS2760 is not set
+# CONFIG_BATTERY_BQ27x00 is not set
+CONFIG_BATTERY_DA9030=y
+# CONFIG_BATTERY_MAX17040 is not set
+# CONFIG_HWMON is not set
+# CONFIG_THERMAL is not set
+# CONFIG_THERMAL_HWMON is not set
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_MFD_ASIC3 is not set
+# CONFIG_HTC_EGPIO is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_TPS65010 is not set
+# CONFIG_TWL4030_CORE is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_MFD_T7L66XB is not set
+# CONFIG_MFD_TC6387XB is not set
+# CONFIG_MFD_TC6393XB is not set
+CONFIG_PMIC_DA903X=y
+# CONFIG_MFD_WM8400 is not set
+# CONFIG_MFD_WM8350_I2C is not set
+# CONFIG_MFD_PCF50633 is not set
+# CONFIG_AB3100_CORE is not set
+# CONFIG_EZX_PCAP is not set
+# CONFIG_MEDIA_SUPPORT is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+CONFIG_FB=y
+# CONFIG_FIRMWARE_EDID is not set
+# CONFIG_FB_DDC is not set
+# CONFIG_FB_BOOT_VESA_SUPPORT is not set
+CONFIG_FB_CFB_FILLRECT=y
+CONFIG_FB_CFB_COPYAREA=y
+CONFIG_FB_CFB_IMAGEBLIT=y
+# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
+# CONFIG_FB_SYS_FILLRECT is not set
+# CONFIG_FB_SYS_COPYAREA is not set
+# CONFIG_FB_SYS_IMAGEBLIT is not set
+# CONFIG_FB_FOREIGN_ENDIAN is not set
+# CONFIG_FB_SYS_FOPS is not set
+# CONFIG_FB_SVGALIB is not set
+# CONFIG_FB_MACMODES is not set
+# CONFIG_FB_BACKLIGHT is not set
+# CONFIG_FB_MODE_HELPERS is not set
+# CONFIG_FB_TILEBLITTING is not set
+
+#
+# Frame buffer hardware drivers
+#
+# CONFIG_FB_S1D13XXX is not set
+CONFIG_FB_PXA=y
+# CONFIG_FB_PXA_OVERLAY is not set
+# CONFIG_FB_PXA_SMARTPANEL is not set
+# CONFIG_FB_PXA_PARAMETERS is not set
+# CONFIG_FB_MBX is not set
+# CONFIG_FB_W100 is not set
+# CONFIG_FB_VIRTUAL is not set
+# CONFIG_FB_METRONOME is not set
+# CONFIG_FB_MB862XX is not set
+# CONFIG_FB_BROADSHEET is not set
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_LCD_CLASS_DEVICE=y
+# CONFIG_LCD_LTV350QV is not set
+# CONFIG_LCD_ILI9320 is not set
+CONFIG_LCD_TDO24M=y
+# CONFIG_LCD_VGG2432A4 is not set
+# CONFIG_LCD_PLATFORM is not set
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
+# CONFIG_BACKLIGHT_GENERIC is not set
+CONFIG_BACKLIGHT_PWM=y
+CONFIG_BACKLIGHT_DA903X=y
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Console display driver support
+#
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
+CONFIG_FONTS=y
+# CONFIG_FONT_8x8 is not set
+# CONFIG_FONT_8x16 is not set
+CONFIG_FONT_6x11=y
+# CONFIG_FONT_7x14 is not set
+# CONFIG_FONT_PEARL_8x8 is not set
+# CONFIG_FONT_ACORN_8x8 is not set
+# CONFIG_FONT_MINI_4x6 is not set
+# CONFIG_FONT_SUN8x16 is not set
+# CONFIG_FONT_SUN12x22 is not set
+# CONFIG_FONT_10x18 is not set
+CONFIG_LOGO=y
+CONFIG_LOGO_LINUX_MONO=y
+CONFIG_LOGO_LINUX_VGA16=y
+CONFIG_LOGO_LINUX_CLUT224=y
+# CONFIG_SOUND is not set
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_MMC=y
+# CONFIG_MMC_DEBUG is not set
+# CONFIG_MMC_UNSAFE_RESUME is not set
+
+#
+# MMC/SD/SDIO Card Drivers
+#
+CONFIG_MMC_BLOCK=y
+CONFIG_MMC_BLOCK_BOUNCE=y
+# CONFIG_SDIO_UART is not set
+# CONFIG_MMC_TEST is not set
+
+#
+# MMC/SD/SDIO Host Controller Drivers
+#
+CONFIG_MMC_PXA=y
+# CONFIG_MMC_SDHCI is not set
+# CONFIG_MMC_SPI is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_ACCESSIBILITY is not set
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=m
+
+#
+# LED drivers
+#
+# CONFIG_LEDS_PCA9532 is not set
+CONFIG_LEDS_GPIO=m
+CONFIG_LEDS_GPIO_PLATFORM=y
+# CONFIG_LEDS_LP5521 is not set
+# CONFIG_LEDS_PCA955X is not set
+CONFIG_LEDS_DA903X=m
+# CONFIG_LEDS_DAC124S085 is not set
+# CONFIG_LEDS_PWM is not set
+# CONFIG_LEDS_BD2802 is not set
+
+#
+# LED Triggers
+#
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=m
+CONFIG_LEDS_TRIGGER_HEARTBEAT=m
+CONFIG_LEDS_TRIGGER_BACKLIGHT=m
+CONFIG_LEDS_TRIGGER_GPIO=m
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
+
+#
+# iptables trigger is under Netfilter config (LED target)
+#
+CONFIG_RTC_LIB=y
+# CONFIG_RTC_CLASS is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_AUXDISPLAY is not set
+CONFIG_REGULATOR=y
+CONFIG_REGULATOR_DEBUG=y
+# CONFIG_REGULATOR_FIXED_VOLTAGE is not set
+CONFIG_REGULATOR_VIRTUAL_CONSUMER=y
+# CONFIG_REGULATOR_USERSPACE_CONSUMER is not set
+# CONFIG_REGULATOR_BQ24022 is not set
+# CONFIG_REGULATOR_MAX1586 is not set
+CONFIG_REGULATOR_DA903X=y
+# CONFIG_REGULATOR_LP3971 is not set
+# CONFIG_UIO is not set
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+# CONFIG_EXT2_FS is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+CONFIG_FS_POSIX_ACL=y
+# CONFIG_XFS_FS is not set
+# CONFIG_GFS2_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
+CONFIG_FILE_LOCKING=y
+CONFIG_FSNOTIFY=y
+CONFIG_DNOTIFY=y
+# CONFIG_INOTIFY is not set
+CONFIG_INOTIFY_USER=y
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SYSFS=y
+# CONFIG_TMPFS is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+CONFIG_MISC_FILESYSTEMS=y
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+CONFIG_JFFS2_FS=y
+CONFIG_JFFS2_FS_DEBUG=0
+CONFIG_JFFS2_FS_WRITEBUFFER=y
+CONFIG_JFFS2_FS_WBUF_VERIFY=y
+# CONFIG_JFFS2_SUMMARY is not set
+# CONFIG_JFFS2_FS_XATTR is not set
+CONFIG_JFFS2_COMPRESSION_OPTIONS=y
+CONFIG_JFFS2_ZLIB=y
+CONFIG_JFFS2_LZO=y
+CONFIG_JFFS2_RTIME=y
+CONFIG_JFFS2_RUBIN=y
+# CONFIG_JFFS2_CMODE_NONE is not set
+CONFIG_JFFS2_CMODE_PRIORITY=y
+# CONFIG_JFFS2_CMODE_SIZE is not set
+# CONFIG_JFFS2_CMODE_FAVOURLZO is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_SQUASHFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+# CONFIG_NFS_V4_1 is not set
+CONFIG_ROOT_NFS=y
+# CONFIG_NFSD is not set
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_ACL_SUPPORT=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+CONFIG_SUNRPC_GSS=y
+CONFIG_RPCSEC_GSS_KRB5=y
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="iso8859-1"
+# CONFIG_NLS_CODEPAGE_437 is not set
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+# CONFIG_NLS_ISO8859_1 is not set
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+# CONFIG_NLS_UTF8 is not set
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+CONFIG_PRINTK_TIME=y
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_DETECT_SOFTLOCKUP=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=1
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_TIMER_STATS is not set
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_KMEMLEAK is not set
+CONFIG_DEBUG_PREEMPT=y
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
+CONFIG_DEBUG_SPINLOCK=y
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
+CONFIG_DEBUG_SPINLOCK_SLEEP=y
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_KOBJECT is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+CONFIG_DEBUG_MEMORY_INIT=y
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_LATENCYTOP is not set
+# CONFIG_SYSCTL_SYSCALL_CHECK is not set
+# CONFIG_PAGE_POISONING is not set
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_TRACING_SUPPORT=y
+# CONFIG_FTRACE is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_KGDB is not set
+# CONFIG_KMEMCHECK is not set
+CONFIG_ARM_UNWIND=y
+CONFIG_DEBUG_USER=y
+# CONFIG_DEBUG_ERRORS is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_LL is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+# CONFIG_CRYPTO_FIPS is not set
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
+CONFIG_CRYPTO_AEAD2=y
+CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
+CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_PCOMP=y
+CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+CONFIG_CRYPTO_WORKQUEUE=y
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+CONFIG_CRYPTO_CBC=y
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+# CONFIG_CRYPTO_ECB is not set
+# CONFIG_CRYPTO_LRW is not set
+# CONFIG_CRYPTO_PCBC is not set
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=y
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
+# CONFIG_CRYPTO_LZO is not set
+
+#
+# Random Number Generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+# CONFIG_CRYPTO_HW is not set
+# CONFIG_BINARY_PRINTF is not set
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_T10DIF is not set
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=y
+CONFIG_LZO_COMPRESS=y
+CONFIG_LZO_DECOMPRESS=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y
diff --git a/arch/arm/configs/xcep_defconfig b/arch/arm/configs/xcep_defconfig
new file mode 100644 (file)
index 0000000..33bb725
--- /dev/null
@@ -0,0 +1,1129 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.31-rc6
+# Thu Aug 20 09:02:37 2009
+#
+CONFIG_ARM=y
+CONFIG_SYS_SUPPORTS_APM_EMULATION=y
+CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_MMU=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_HARDIRQS_SW_RESEND=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_ARCH_MTD_XIP=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_VECTORS_BASE=0xffff0000
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+CONFIG_CONSTRUCTORS=y
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_BROKEN_ON_SMP=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=".xcep-itech"
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+# CONFIG_POSIX_MQUEUE is not set
+CONFIG_BSD_PROCESS_ACCT=y
+# CONFIG_BSD_PROCESS_ACCT_V3 is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=16
+# CONFIG_GROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+# CONFIG_RELAY is not set
+# CONFIG_NAMESPACES is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_RD_GZIP=y
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+# CONFIG_UID16 is not set
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS=y
+# CONFIG_KALLSYMS_ALL is not set
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+# CONFIG_SHMEM is not set
+CONFIG_AIO=y
+
+#
+# Performance Counters
+#
+# CONFIG_VM_EVENT_COUNTERS is not set
+CONFIG_STRIP_ASM_SYMS=y
+# CONFIG_COMPAT_BRK is not set
+# CONFIG_SLAB is not set
+# CONFIG_SLUB is not set
+CONFIG_SLOB=y
+# CONFIG_PROFILING is not set
+CONFIG_TRACEPOINTS=y
+CONFIG_MARKERS=y
+CONFIG_HAVE_OPROFILE=y
+CONFIG_KPROBES=y
+CONFIG_KRETPROBES=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_CLK=y
+
+#
+# GCOV-based kernel profiling
+#
+# CONFIG_SLOW_WORK is not set
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+# CONFIG_BLOCK is not set
+# CONFIG_FREEZER is not set
+
+#
+# System Type
+#
+# CONFIG_ARCH_AAEC2000 is not set
+# CONFIG_ARCH_INTEGRATOR is not set
+# CONFIG_ARCH_REALVIEW is not set
+# CONFIG_ARCH_VERSATILE is not set
+# CONFIG_ARCH_AT91 is not set
+# CONFIG_ARCH_CLPS711X is not set
+# CONFIG_ARCH_GEMINI is not set
+# CONFIG_ARCH_EBSA110 is not set
+# CONFIG_ARCH_EP93XX is not set
+# CONFIG_ARCH_FOOTBRIDGE is not set
+# CONFIG_ARCH_MXC is not set
+# CONFIG_ARCH_STMP3XXX is not set
+# CONFIG_ARCH_NETX is not set
+# CONFIG_ARCH_H720X is not set
+# CONFIG_ARCH_IOP13XX is not set
+# CONFIG_ARCH_IOP32X is not set
+# CONFIG_ARCH_IOP33X is not set
+# CONFIG_ARCH_IXP23XX is not set
+# CONFIG_ARCH_IXP2000 is not set
+# CONFIG_ARCH_IXP4XX is not set
+# CONFIG_ARCH_L7200 is not set
+# CONFIG_ARCH_KIRKWOOD is not set
+# CONFIG_ARCH_LOKI is not set
+# CONFIG_ARCH_MV78XX0 is not set
+# CONFIG_ARCH_ORION5X is not set
+# CONFIG_ARCH_MMP is not set
+# CONFIG_ARCH_KS8695 is not set
+# CONFIG_ARCH_NS9XXX is not set
+# CONFIG_ARCH_W90X900 is not set
+# CONFIG_ARCH_PNX4008 is not set
+CONFIG_ARCH_PXA=y
+# CONFIG_ARCH_MSM is not set
+# CONFIG_ARCH_RPC is not set
+# CONFIG_ARCH_SA1100 is not set
+# CONFIG_ARCH_S3C2410 is not set
+# CONFIG_ARCH_S3C64XX is not set
+# CONFIG_ARCH_SHARK is not set
+# CONFIG_ARCH_LH7A40X is not set
+# CONFIG_ARCH_U300 is not set
+# CONFIG_ARCH_DAVINCI is not set
+# CONFIG_ARCH_OMAP is not set
+
+#
+# Intel PXA2xx/PXA3xx Implementations
+#
+# CONFIG_ARCH_GUMSTIX is not set
+# CONFIG_MACH_INTELMOTE2 is not set
+# CONFIG_MACH_STARGATE2 is not set
+# CONFIG_ARCH_LUBBOCK is not set
+# CONFIG_MACH_LOGICPD_PXA270 is not set
+# CONFIG_MACH_MAINSTONE is not set
+# CONFIG_MACH_MP900C is not set
+# CONFIG_ARCH_PXA_IDP is not set
+# CONFIG_PXA_SHARPSL is not set
+# CONFIG_ARCH_VIPER is not set
+# CONFIG_ARCH_PXA_ESERIES is not set
+# CONFIG_TRIZEPS_PXA is not set
+# CONFIG_MACH_H5000 is not set
+# CONFIG_MACH_EM_X270 is not set
+# CONFIG_MACH_EXEDA is not set
+# CONFIG_MACH_COLIBRI is not set
+# CONFIG_MACH_COLIBRI300 is not set
+# CONFIG_MACH_COLIBRI320 is not set
+# CONFIG_MACH_ZYLONITE is not set
+# CONFIG_MACH_LITTLETON is not set
+# CONFIG_MACH_TAVOREVB is not set
+# CONFIG_MACH_SAAR is not set
+# CONFIG_MACH_ARMCORE is not set
+# CONFIG_MACH_CM_X300 is not set
+# CONFIG_MACH_H4700 is not set
+# CONFIG_MACH_MAGICIAN is not set
+# CONFIG_MACH_HIMALAYA is not set
+# CONFIG_MACH_MIOA701 is not set
+# CONFIG_MACH_PCM027 is not set
+# CONFIG_ARCH_PXA_PALM is not set
+# CONFIG_MACH_CSB726 is not set
+# CONFIG_PXA_EZX is not set
+CONFIG_MACH_XCEP=y
+CONFIG_PXA25x=y
+CONFIG_PXA_SSP=y
+CONFIG_PLAT_PXA=y
+
+#
+# Processor Type
+#
+CONFIG_CPU_32=y
+CONFIG_CPU_XSCALE=y
+CONFIG_CPU_32v5=y
+CONFIG_CPU_ABRT_EV5T=y
+CONFIG_CPU_PABRT_NOIFAR=y
+CONFIG_CPU_CACHE_VIVT=y
+CONFIG_CPU_TLB_V4WBI=y
+CONFIG_CPU_CP15=y
+CONFIG_CPU_CP15_MMU=y
+
+#
+# Processor Features
+#
+CONFIG_ARM_THUMB=y
+# CONFIG_CPU_DCACHE_DISABLE is not set
+CONFIG_IWMMXT=y
+CONFIG_XSCALE_PMU=y
+CONFIG_COMMON_CLKDEV=y
+
+#
+# Bus support
+#
+# CONFIG_PCI_SYSCALL is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+
+#
+# Kernel Features
+#
+CONFIG_TICK_ONESHOT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+CONFIG_VMSPLIT_3G=y
+# CONFIG_VMSPLIT_2G is not set
+# CONFIG_VMSPLIT_1G is not set
+CONFIG_PAGE_OFFSET=0xC0000000
+# CONFIG_PREEMPT is not set
+CONFIG_HZ=100
+CONFIG_AEABI=y
+CONFIG_OABI_COMPAT=y
+# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set
+# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set
+# CONFIG_HIGHMEM is not set
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4096
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_ZONE_DMA_FLAG=0
+CONFIG_VIRT_TO_BUS=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
+CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
+CONFIG_ALIGNMENT_TRAP=y
+# CONFIG_UACCESS_WITH_MEMCPY is not set
+
+#
+# Boot options
+#
+CONFIG_ZBOOT_ROM_TEXT=0x0
+CONFIG_ZBOOT_ROM_BSS=0x0
+CONFIG_CMDLINE="root=mtd4 rootfstype=jffs2 ro console=ttyS0,115200"
+# CONFIG_XIP_KERNEL is not set
+# CONFIG_KEXEC is not set
+
+#
+# CPU Power Management
+#
+# CONFIG_CPU_FREQ is not set
+# CONFIG_CPU_IDLE is not set
+
+#
+# Floating point emulation
+#
+
+#
+# At least one emulation must be selected
+#
+CONFIG_FPE_NWFPE=y
+# CONFIG_FPE_NWFPE_XP is not set
+# CONFIG_FPE_FASTFPE is not set
+
+#
+# Userspace binary formats
+#
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_HAVE_AOUT=y
+# CONFIG_BINFMT_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+
+#
+# Power management options
+#
+# CONFIG_PM is not set
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=m
+CONFIG_PACKET_MMAP=y
+CONFIG_UNIX=y
+CONFIG_XFRM=y
+# CONFIG_XFRM_USER is not set
+# CONFIG_XFRM_SUB_POLICY is not set
+# CONFIG_XFRM_MIGRATE is not set
+# CONFIG_XFRM_STATISTICS is not set
+CONFIG_NET_KEY=y
+# CONFIG_NET_KEY_MIGRATE is not set
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+# CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+CONFIG_INET_LRO=y
+# CONFIG_INET_DIAG is not set
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
+# CONFIG_IEEE802154 is not set
+# CONFIG_NET_SCHED is not set
+# CONFIG_DCB is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_NET_TCPPROBE is not set
+# CONFIG_NET_DROP_MONITOR is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+# CONFIG_WIRELESS is not set
+# CONFIG_WIMAX is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_STANDALONE=y
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_SYS_HYPERVISOR is not set
+# CONFIG_CONNECTOR is not set
+CONFIG_MTD=y
+# CONFIG_MTD_DEBUG is not set
+CONFIG_MTD_CONCAT=y
+CONFIG_MTD_PARTITIONS=y
+# CONFIG_MTD_TESTS is not set
+# CONFIG_MTD_REDBOOT_PARTS is not set
+# CONFIG_MTD_CMDLINE_PARTS is not set
+# CONFIG_MTD_AFS_PARTS is not set
+# CONFIG_MTD_AR7_PARTS is not set
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=y
+# CONFIG_MTD_OOPS is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+CONFIG_MTD_CFI=y
+# CONFIG_MTD_JEDECPROBE is not set
+CONFIG_MTD_GEN_PROBE=y
+# CONFIG_MTD_CFI_ADV_OPTIONS is not set
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+CONFIG_MTD_CFI_INTELEXT=y
+# CONFIG_MTD_CFI_AMDSTD is not set
+# CONFIG_MTD_CFI_STAA is not set
+CONFIG_MTD_CFI_UTIL=y
+# CONFIG_MTD_RAM is not set
+# CONFIG_MTD_ROM is not set
+# CONFIG_MTD_ABSENT is not set
+# CONFIG_MTD_XIP is not set
+
+#
+# Mapping drivers for chip access
+#
+CONFIG_MTD_COMPLEX_MAPPINGS=y
+CONFIG_MTD_PHYSMAP=y
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
+CONFIG_MTD_PXA2XX=y
+# CONFIG_MTD_ARM_INTEGRATOR is not set
+# CONFIG_MTD_PLATRAM is not set
+
+#
+# Self-contained MTD device drivers
+#
+# CONFIG_MTD_SLRAM is not set
+# CONFIG_MTD_PHRAM is not set
+# CONFIG_MTD_MTDRAM is not set
+
+#
+# Disk-On-Chip Device Drivers
+#
+# CONFIG_MTD_DOC2000 is not set
+# CONFIG_MTD_DOC2001 is not set
+# CONFIG_MTD_DOC2001PLUS is not set
+# CONFIG_MTD_NAND is not set
+# CONFIG_MTD_ONENAND is not set
+
+#
+# LPDDR flash memory drivers
+#
+# CONFIG_MTD_LPDDR is not set
+
+#
+# UBI - Unsorted block images
+#
+# CONFIG_MTD_UBI is not set
+# CONFIG_PARPORT is not set
+# CONFIG_MISC_DEVICES is not set
+CONFIG_HAVE_IDE=y
+
+#
+# SCSI device support
+#
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+CONFIG_NETDEVICES=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_VETH is not set
+# CONFIG_PHYLIB is not set
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=y
+# CONFIG_AX88796 is not set
+CONFIG_SMC91X=y
+# CONFIG_DM9000 is not set
+# CONFIG_ETHOC is not set
+# CONFIG_SMC911X is not set
+# CONFIG_SMSC911X is not set
+# CONFIG_DNET is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
+# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
+# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
+# CONFIG_B44 is not set
+# CONFIG_KS8842 is not set
+# CONFIG_NETDEV_1000 is not set
+# CONFIG_NETDEV_10000 is not set
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+
+#
+# Enable WiMAX (Networking options) to see the WiMAX drivers
+#
+# CONFIG_WAN is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+# CONFIG_ISDN is not set
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
+
+#
+# Userland interfaces
+#
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input Device Drivers
+#
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT_JOYSTICK is not set
+# CONFIG_INPUT_TABLET is not set
+# CONFIG_INPUT_TOUCHSCREEN is not set
+# CONFIG_INPUT_MISC is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_CONSOLE_TRANSLATIONS=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+# CONFIG_VT_HW_CONSOLE_BINDING is not set
+# CONFIG_DEVKMEM is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_PXA=y
+CONFIG_SERIAL_PXA_CONSOLE=y
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_R3964 is not set
+# CONFIG_TCG_TPM is not set
+CONFIG_I2C=m
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_CHARDEV=m
+CONFIG_I2C_HELPER_AUTO=y
+
+#
+# I2C Hardware Bus support
+#
+
+#
+# I2C system bus drivers (mostly embedded / system-on-chip)
+#
+# CONFIG_I2C_DESIGNWARE is not set
+# CONFIG_I2C_GPIO is not set
+# CONFIG_I2C_OCORES is not set
+CONFIG_I2C_PXA=m
+# CONFIG_I2C_PXA_SLAVE is not set
+# CONFIG_I2C_SIMTEC is not set
+
+#
+# External I2C/SMBus adapter drivers
+#
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_TAOS_EVM is not set
+
+#
+# Other I2C/SMBus bus drivers
+#
+# CONFIG_I2C_PCA_PLATFORM is not set
+# CONFIG_I2C_STUB is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_DS1682 is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_PCF8575 is not set
+# CONFIG_SENSORS_PCA9539 is not set
+# CONFIG_SENSORS_TSL2550 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+# CONFIG_SPI is not set
+CONFIG_ARCH_REQUIRE_GPIOLIB=y
+CONFIG_GPIOLIB=y
+# CONFIG_DEBUG_GPIO is not set
+# CONFIG_GPIO_SYSFS is not set
+
+#
+# Memory mapped GPIO expanders:
+#
+
+#
+# I2C GPIO expanders:
+#
+# CONFIG_GPIO_MAX732X is not set
+# CONFIG_GPIO_PCA953X is not set
+# CONFIG_GPIO_PCF857X is not set
+
+#
+# PCI GPIO expanders:
+#
+
+#
+# SPI GPIO expanders:
+#
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+CONFIG_HWMON=m
+# CONFIG_HWMON_VID is not set
+# CONFIG_SENSORS_AD7414 is not set
+# CONFIG_SENSORS_AD7418 is not set
+CONFIG_SENSORS_ADM1021=m
+# CONFIG_SENSORS_ADM1025 is not set
+# CONFIG_SENSORS_ADM1026 is not set
+# CONFIG_SENSORS_ADM1029 is not set
+# CONFIG_SENSORS_ADM1031 is not set
+# CONFIG_SENSORS_ADM9240 is not set
+# CONFIG_SENSORS_ADT7462 is not set
+# CONFIG_SENSORS_ADT7470 is not set
+# CONFIG_SENSORS_ADT7473 is not set
+# CONFIG_SENSORS_ADT7475 is not set
+# CONFIG_SENSORS_ATXP1 is not set
+# CONFIG_SENSORS_DS1621 is not set
+# CONFIG_SENSORS_F71805F is not set
+# CONFIG_SENSORS_F71882FG is not set
+# CONFIG_SENSORS_F75375S is not set
+# CONFIG_SENSORS_G760A is not set
+# CONFIG_SENSORS_GL518SM is not set
+# CONFIG_SENSORS_GL520SM is not set
+# CONFIG_SENSORS_IT87 is not set
+# CONFIG_SENSORS_LM63 is not set
+# CONFIG_SENSORS_LM75 is not set
+# CONFIG_SENSORS_LM77 is not set
+# CONFIG_SENSORS_LM78 is not set
+# CONFIG_SENSORS_LM80 is not set
+# CONFIG_SENSORS_LM83 is not set
+# CONFIG_SENSORS_LM85 is not set
+# CONFIG_SENSORS_LM87 is not set
+# CONFIG_SENSORS_LM90 is not set
+# CONFIG_SENSORS_LM92 is not set
+# CONFIG_SENSORS_LM93 is not set
+# CONFIG_SENSORS_LTC4215 is not set
+# CONFIG_SENSORS_LTC4245 is not set
+# CONFIG_SENSORS_LM95241 is not set
+# CONFIG_SENSORS_MAX1619 is not set
+CONFIG_SENSORS_MAX6650=m
+# CONFIG_SENSORS_PC87360 is not set
+# CONFIG_SENSORS_PC87427 is not set
+# CONFIG_SENSORS_PCF8591 is not set
+# CONFIG_SENSORS_SHT15 is not set
+# CONFIG_SENSORS_DME1737 is not set
+# CONFIG_SENSORS_SMSC47M1 is not set
+# CONFIG_SENSORS_SMSC47M192 is not set
+# CONFIG_SENSORS_SMSC47B397 is not set
+# CONFIG_SENSORS_ADS7828 is not set
+# CONFIG_SENSORS_THMC50 is not set
+# CONFIG_SENSORS_TMP401 is not set
+# CONFIG_SENSORS_VT1211 is not set
+# CONFIG_SENSORS_W83781D is not set
+# CONFIG_SENSORS_W83791D is not set
+# CONFIG_SENSORS_W83792D is not set
+# CONFIG_SENSORS_W83793 is not set
+# CONFIG_SENSORS_W83L785TS is not set
+# CONFIG_SENSORS_W83L786NG is not set
+# CONFIG_SENSORS_W83627HF is not set
+# CONFIG_SENSORS_W83627EHF is not set
+# CONFIG_HWMON_DEBUG_CHIP is not set
+# CONFIG_THERMAL is not set
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_MFD_ASIC3 is not set
+# CONFIG_HTC_EGPIO is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_TPS65010 is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_MFD_T7L66XB is not set
+# CONFIG_MFD_TC6387XB is not set
+# CONFIG_MFD_TC6393XB is not set
+# CONFIG_MFD_WM8400 is not set
+# CONFIG_MFD_WM8350_I2C is not set
+# CONFIG_MFD_PCF50633 is not set
+# CONFIG_AB3100_CORE is not set
+# CONFIG_MEDIA_SUPPORT is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+
+#
+# Console display driver support
+#
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_DUMMY_CONSOLE=y
+# CONFIG_SOUND is not set
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_ACCESSIBILITY is not set
+# CONFIG_NEW_LEDS is not set
+CONFIG_RTC_LIB=y
+CONFIG_RTC_CLASS=m
+
+#
+# RTC interfaces
+#
+CONFIG_RTC_INTF_SYSFS=y
+CONFIG_RTC_INTF_PROC=y
+CONFIG_RTC_INTF_DEV=y
+# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set
+# CONFIG_RTC_DRV_TEST is not set
+
+#
+# I2C RTC drivers
+#
+# CONFIG_RTC_DRV_DS1307 is not set
+# CONFIG_RTC_DRV_DS1374 is not set
+# CONFIG_RTC_DRV_DS1672 is not set
+# CONFIG_RTC_DRV_MAX6900 is not set
+# CONFIG_RTC_DRV_RS5C372 is not set
+# CONFIG_RTC_DRV_ISL1208 is not set
+# CONFIG_RTC_DRV_X1205 is not set
+# CONFIG_RTC_DRV_PCF8563 is not set
+# CONFIG_RTC_DRV_PCF8583 is not set
+# CONFIG_RTC_DRV_M41T80 is not set
+# CONFIG_RTC_DRV_S35390A is not set
+# CONFIG_RTC_DRV_FM3130 is not set
+# CONFIG_RTC_DRV_RX8581 is not set
+# CONFIG_RTC_DRV_RX8025 is not set
+
+#
+# SPI RTC drivers
+#
+
+#
+# Platform RTC drivers
+#
+# CONFIG_RTC_DRV_CMOS is not set
+# CONFIG_RTC_DRV_DS1286 is not set
+# CONFIG_RTC_DRV_DS1511 is not set
+# CONFIG_RTC_DRV_DS1553 is not set
+# CONFIG_RTC_DRV_DS1742 is not set
+# CONFIG_RTC_DRV_STK17TA8 is not set
+# CONFIG_RTC_DRV_M48T86 is not set
+# CONFIG_RTC_DRV_M48T35 is not set
+# CONFIG_RTC_DRV_M48T59 is not set
+# CONFIG_RTC_DRV_BQ4802 is not set
+# CONFIG_RTC_DRV_V3020 is not set
+
+#
+# on-CPU RTC drivers
+#
+CONFIG_RTC_DRV_SA1100=m
+# CONFIG_RTC_DRV_PXA is not set
+CONFIG_DMADEVICES=y
+
+#
+# DMA Devices
+#
+# CONFIG_AUXDISPLAY is not set
+# CONFIG_REGULATOR is not set
+# CONFIG_UIO is not set
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+CONFIG_FILE_LOCKING=y
+# CONFIG_FSNOTIFY is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY is not set
+# CONFIG_INOTIFY_USER is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_FUSE_FS is not set
+
+#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+# CONFIG_HUGETLB_PAGE is not set
+# CONFIG_CONFIGFS_FS is not set
+CONFIG_MISC_FILESYSTEMS=y
+CONFIG_JFFS2_FS=y
+CONFIG_JFFS2_FS_DEBUG=0
+CONFIG_JFFS2_FS_WRITEBUFFER=y
+CONFIG_JFFS2_FS_WBUF_VERIFY=y
+# CONFIG_JFFS2_SUMMARY is not set
+# CONFIG_JFFS2_FS_XATTR is not set
+# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
+CONFIG_JFFS2_ZLIB=y
+# CONFIG_JFFS2_LZO is not set
+CONFIG_JFFS2_RTIME=y
+# CONFIG_JFFS2_RUBIN is not set
+# CONFIG_ROMFS_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFSD is not set
+CONFIG_LOCKD=m
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=m
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+CONFIG_NLS=m
+CONFIG_NLS_DEFAULT="utf8"
+# CONFIG_NLS_CODEPAGE_437 is not set
+# CONFIG_NLS_CODEPAGE_737 is not set
+# CONFIG_NLS_CODEPAGE_775 is not set
+# CONFIG_NLS_CODEPAGE_850 is not set
+# CONFIG_NLS_CODEPAGE_852 is not set
+# CONFIG_NLS_CODEPAGE_855 is not set
+# CONFIG_NLS_CODEPAGE_857 is not set
+# CONFIG_NLS_CODEPAGE_860 is not set
+# CONFIG_NLS_CODEPAGE_861 is not set
+# CONFIG_NLS_CODEPAGE_862 is not set
+# CONFIG_NLS_CODEPAGE_863 is not set
+# CONFIG_NLS_CODEPAGE_864 is not set
+# CONFIG_NLS_CODEPAGE_865 is not set
+# CONFIG_NLS_CODEPAGE_866 is not set
+# CONFIG_NLS_CODEPAGE_869 is not set
+# CONFIG_NLS_CODEPAGE_936 is not set
+# CONFIG_NLS_CODEPAGE_950 is not set
+# CONFIG_NLS_CODEPAGE_932 is not set
+# CONFIG_NLS_CODEPAGE_949 is not set
+# CONFIG_NLS_CODEPAGE_874 is not set
+# CONFIG_NLS_ISO8859_8 is not set
+# CONFIG_NLS_CODEPAGE_1250 is not set
+# CONFIG_NLS_CODEPAGE_1251 is not set
+# CONFIG_NLS_ASCII is not set
+# CONFIG_NLS_ISO8859_1 is not set
+# CONFIG_NLS_ISO8859_2 is not set
+# CONFIG_NLS_ISO8859_3 is not set
+# CONFIG_NLS_ISO8859_4 is not set
+# CONFIG_NLS_ISO8859_5 is not set
+# CONFIG_NLS_ISO8859_6 is not set
+# CONFIG_NLS_ISO8859_7 is not set
+# CONFIG_NLS_ISO8859_9 is not set
+# CONFIG_NLS_ISO8859_13 is not set
+# CONFIG_NLS_ISO8859_14 is not set
+# CONFIG_NLS_ISO8859_15 is not set
+# CONFIG_NLS_KOI8_R is not set
+# CONFIG_NLS_KOI8_U is not set
+CONFIG_NLS_UTF8=m
+# CONFIG_DLM is not set
+
+#
+# Kernel hacking
+#
+CONFIG_PRINTK_TIME=y
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_UNUSED_SYMBOLS is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_TIMER_STATS is not set
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_DEBUG_KMEMLEAK is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
+CONFIG_FRAME_POINTER=y
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_KPROBES_SANITY_TEST is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_LATENCYTOP is not set
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+# CONFIG_PAGE_POISONING is not set
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_TRACING_SUPPORT=y
+# CONFIG_FTRACE is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_KGDB is not set
+# CONFIG_ARM_UNWIND is not set
+# CONFIG_DEBUG_USER is not set
+# CONFIG_DEBUG_ERRORS is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_LL is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+# CONFIG_CRYPTO_FIPS is not set
+CONFIG_CRYPTO_ALGAPI=m
+CONFIG_CRYPTO_ALGAPI2=m
+CONFIG_CRYPTO_HASH=m
+CONFIG_CRYPTO_HASH2=m
+# CONFIG_CRYPTO_MANAGER is not set
+# CONFIG_CRYPTO_MANAGER2 is not set
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+# CONFIG_CRYPTO_CBC is not set
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+# CONFIG_CRYPTO_ECB is not set
+# CONFIG_CRYPTO_LRW is not set
+# CONFIG_CRYPTO_PCBC is not set
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+CONFIG_CRYPTO_CRC32C=m
+# CONFIG_CRYPTO_MD4 is not set
+# CONFIG_CRYPTO_MD5 is not set
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+# CONFIG_CRYPTO_DES is not set
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
+# CONFIG_CRYPTO_LZO is not set
+
+#
+# Random Number Generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+# CONFIG_CRYPTO_HW is not set
+# CONFIG_BINARY_PRINTF is not set
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_T10DIF is not set
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+CONFIG_LIBCRC32C=m
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=y
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y
diff --git a/arch/arm/configs/zylonite_defconfig b/arch/arm/configs/zylonite_defconfig
deleted file mode 100644 (file)
index 7949d04..0000000
+++ /dev/null
@@ -1,736 +0,0 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.23
-# Tue Oct 23 13:33:20 2007
-#
-CONFIG_ARM=y
-CONFIG_SYS_SUPPORTS_APM_EMULATION=y
-CONFIG_GENERIC_GPIO=y
-CONFIG_GENERIC_TIME=y
-CONFIG_GENERIC_CLOCKEVENTS=y
-CONFIG_MMU=y
-# CONFIG_NO_IOPORT is not set
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_STACKTRACE_SUPPORT=y
-CONFIG_LOCKDEP_SUPPORT=y
-CONFIG_TRACE_IRQFLAGS_SUPPORT=y
-CONFIG_HARDIRQS_SW_RESEND=y
-CONFIG_GENERIC_IRQ_PROBE=y
-CONFIG_RWSEM_GENERIC_SPINLOCK=y
-# CONFIG_ARCH_HAS_ILOG2_U32 is not set
-# CONFIG_ARCH_HAS_ILOG2_U64 is not set
-CONFIG_GENERIC_HWEIGHT=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_ZONE_DMA=y
-CONFIG_ARCH_MTD_XIP=y
-CONFIG_VECTORS_BASE=0xffff0000
-CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
-
-#
-# General setup
-#
-CONFIG_EXPERIMENTAL=y
-CONFIG_BROKEN_ON_SMP=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_SWAP=y
-CONFIG_SYSVIPC=y
-CONFIG_SYSVIPC_SYSCTL=y
-# CONFIG_POSIX_MQUEUE is not set
-# CONFIG_BSD_PROCESS_ACCT is not set
-# CONFIG_TASKSTATS is not set
-# CONFIG_USER_NS is not set
-# CONFIG_AUDIT is not set
-# CONFIG_IKCONFIG is not set
-CONFIG_LOG_BUF_SHIFT=18
-# CONFIG_CGROUPS is not set
-CONFIG_FAIR_GROUP_SCHED=y
-CONFIG_FAIR_USER_SCHED=y
-# CONFIG_FAIR_CGROUP_SCHED is not set
-CONFIG_SYSFS_DEPRECATED=y
-# CONFIG_RELAY is not set
-# CONFIG_BLK_DEV_INITRD is not set
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_SYSCTL=y
-# CONFIG_EMBEDDED is not set
-CONFIG_UID16=y
-CONFIG_SYSCTL_SYSCALL=y
-CONFIG_KALLSYMS=y
-# CONFIG_KALLSYMS_EXTRA_PASS is not set
-CONFIG_HOTPLUG=y
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-CONFIG_ANON_INODES=y
-CONFIG_EPOLL=y
-CONFIG_SIGNALFD=y
-CONFIG_EVENTFD=y
-CONFIG_SHMEM=y
-CONFIG_VM_EVENT_COUNTERS=y
-CONFIG_SLUB_DEBUG=y
-# CONFIG_SLAB is not set
-CONFIG_SLUB=y
-# CONFIG_SLOB is not set
-CONFIG_RT_MUTEXES=y
-# CONFIG_TINY_SHMEM is not set
-CONFIG_BASE_SMALL=0
-CONFIG_MODULES=y
-# CONFIG_MODULE_UNLOAD is not set
-# CONFIG_MODVERSIONS is not set
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-# CONFIG_KMOD is not set
-CONFIG_BLOCK=y
-# CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
-# CONFIG_LSF is not set
-# CONFIG_BLK_DEV_BSG is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
-# CONFIG_DEFAULT_AS is not set
-# CONFIG_DEFAULT_DEADLINE is not set
-CONFIG_DEFAULT_CFQ=y
-# CONFIG_DEFAULT_NOOP is not set
-CONFIG_DEFAULT_IOSCHED="cfq"
-
-#
-# System Type
-#
-# CONFIG_ARCH_AAEC2000 is not set
-# CONFIG_ARCH_INTEGRATOR is not set
-# CONFIG_ARCH_REALVIEW is not set
-# CONFIG_ARCH_VERSATILE is not set
-# CONFIG_ARCH_AT91 is not set
-# CONFIG_ARCH_CLPS7500 is not set
-# CONFIG_ARCH_CLPS711X is not set
-# CONFIG_ARCH_CO285 is not set
-# CONFIG_ARCH_EBSA110 is not set
-# CONFIG_ARCH_EP93XX is not set
-# CONFIG_ARCH_FOOTBRIDGE is not set
-# CONFIG_ARCH_NETX is not set
-# CONFIG_ARCH_H720X is not set
-# CONFIG_ARCH_IMX is not set
-# CONFIG_ARCH_IOP13XX is not set
-# CONFIG_ARCH_IOP32X is not set
-# CONFIG_ARCH_IOP33X is not set
-# CONFIG_ARCH_IXP23XX is not set
-# CONFIG_ARCH_IXP2000 is not set
-# CONFIG_ARCH_IXP4XX is not set
-# CONFIG_ARCH_L7200 is not set
-# CONFIG_ARCH_KS8695 is not set
-# CONFIG_ARCH_NS9XXX is not set
-# CONFIG_ARCH_MXC is not set
-# CONFIG_ARCH_PNX4008 is not set
-CONFIG_ARCH_PXA=y
-# CONFIG_ARCH_RPC is not set
-# CONFIG_ARCH_SA1100 is not set
-# CONFIG_ARCH_S3C2410 is not set
-# CONFIG_ARCH_SHARK is not set
-# CONFIG_ARCH_LH7A40X is not set
-# CONFIG_ARCH_DAVINCI is not set
-# CONFIG_ARCH_OMAP is not set
-
-#
-# Intel PXA2xx/PXA3xx Implementations
-#
-
-#
-# Supported PXA3xx Processor Variants
-#
-CONFIG_CPU_PXA300=y
-CONFIG_CPU_PXA310=y
-CONFIG_CPU_PXA320=y
-# CONFIG_ARCH_LUBBOCK is not set
-# CONFIG_MACH_LOGICPD_PXA270 is not set
-# CONFIG_MACH_MAINSTONE is not set
-# CONFIG_ARCH_PXA_IDP is not set
-# CONFIG_PXA_SHARPSL is not set
-# CONFIG_MACH_TRIZEPS4 is not set
-# CONFIG_MACH_EM_X270 is not set
-CONFIG_MACH_ZYLONITE=y
-# CONFIG_MACH_ARMCORE is not set
-CONFIG_PXA3xx=y
-
-#
-# Boot options
-#
-
-#
-# Power management
-#
-
-#
-# Processor Type
-#
-CONFIG_CPU_32=y
-CONFIG_CPU_XSC3=y
-CONFIG_CPU_32v5=y
-CONFIG_CPU_ABRT_EV5T=y
-CONFIG_CPU_CACHE_VIVT=y
-CONFIG_CPU_TLB_V4WBI=y
-CONFIG_CPU_CP15=y
-CONFIG_CPU_CP15_MMU=y
-CONFIG_IO_36=y
-
-#
-# Processor Features
-#
-# CONFIG_ARM_THUMB is not set
-# CONFIG_CPU_DCACHE_DISABLE is not set
-# CONFIG_CPU_BPREDICT_DISABLE is not set
-# CONFIG_OUTER_CACHE is not set
-CONFIG_IWMMXT=y
-
-#
-# Bus support
-#
-# CONFIG_PCI_SYSCALL is not set
-# CONFIG_ARCH_SUPPORTS_MSI is not set
-# CONFIG_PCCARD is not set
-
-#
-# Kernel Features
-#
-# CONFIG_TICK_ONESHOT is not set
-# CONFIG_NO_HZ is not set
-# CONFIG_HIGH_RES_TIMERS is not set
-CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
-# CONFIG_PREEMPT is not set
-CONFIG_HZ=100
-CONFIG_AEABI=y
-CONFIG_OABI_COMPAT=y
-# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set
-CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_FLATMEM_MANUAL=y
-# CONFIG_DISCONTIGMEM_MANUAL is not set
-# CONFIG_SPARSEMEM_MANUAL is not set
-CONFIG_FLATMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
-# CONFIG_SPARSEMEM_STATIC is not set
-# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
-CONFIG_SPLIT_PTLOCK_CPUS=4096
-# CONFIG_RESOURCES_64BIT is not set
-CONFIG_ZONE_DMA_FLAG=1
-CONFIG_BOUNCE=y
-CONFIG_VIRT_TO_BUS=y
-CONFIG_ALIGNMENT_TRAP=y
-
-#
-# Boot options
-#
-CONFIG_ZBOOT_ROM_TEXT=0x0
-CONFIG_ZBOOT_ROM_BSS=0x0
-CONFIG_CMDLINE="root=/dev/nfs rootfstype=nfs nfsroot=192.168.1.100:/nfs/rootfs/ ip=192.168.1.101:192.168.1.100::255.255.255.0::eth0:on console=ttyS0,38400 mem=64M debug"
-# CONFIG_XIP_KERNEL is not set
-# CONFIG_KEXEC is not set
-
-#
-# Floating point emulation
-#
-
-#
-# At least one emulation must be selected
-#
-CONFIG_FPE_NWFPE=y
-# CONFIG_FPE_NWFPE_XP is not set
-# CONFIG_FPE_FASTFPE is not set
-
-#
-# Userspace binary formats
-#
-CONFIG_BINFMT_ELF=y
-# CONFIG_BINFMT_AOUT is not set
-# CONFIG_BINFMT_MISC is not set
-
-#
-# Power management options
-#
-# CONFIG_PM is not set
-CONFIG_SUSPEND_UP_POSSIBLE=y
-
-#
-# Networking
-#
-CONFIG_NET=y
-
-#
-# Networking options
-#
-CONFIG_PACKET=y
-# CONFIG_PACKET_MMAP is not set
-CONFIG_UNIX=y
-# CONFIG_NET_KEY is not set
-CONFIG_INET=y
-# CONFIG_IP_MULTICAST is not set
-# CONFIG_IP_ADVANCED_ROUTER is not set
-CONFIG_IP_FIB_HASH=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_IP_PNP_RARP=y
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_ARPD is not set
-# CONFIG_SYN_COOKIES is not set
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_XFRM_TUNNEL is not set
-# CONFIG_INET_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-# CONFIG_INET_DIAG is not set
-# CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_CUBIC=y
-CONFIG_DEFAULT_TCP_CONG="cubic"
-# CONFIG_TCP_MD5SIG is not set
-# CONFIG_IPV6 is not set
-# CONFIG_INET6_XFRM_TUNNEL is not set
-# CONFIG_INET6_TUNNEL is not set
-# CONFIG_NETWORK_SECMARK is not set
-# CONFIG_NETFILTER is not set
-# CONFIG_IP_DCCP is not set
-# CONFIG_IP_SCTP is not set
-# CONFIG_TIPC is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-# CONFIG_NET_SCHED is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-# CONFIG_AF_RXRPC is not set
-
-#
-# Wireless
-#
-# CONFIG_CFG80211 is not set
-# CONFIG_WIRELESS_EXT is not set
-# CONFIG_MAC80211 is not set
-# CONFIG_IEEE80211 is not set
-# CONFIG_RFKILL is not set
-# CONFIG_NET_9P is not set
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-CONFIG_FW_LOADER=y
-# CONFIG_SYS_HYPERVISOR is not set
-# CONFIG_CONNECTOR is not set
-# CONFIG_MTD is not set
-# CONFIG_PARPORT is not set
-# CONFIG_BLK_DEV is not set
-# CONFIG_MISC_DEVICES is not set
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-# CONFIG_RAID_ATTRS is not set
-# CONFIG_SCSI is not set
-# CONFIG_SCSI_DMA is not set
-# CONFIG_SCSI_NETLINK is not set
-# CONFIG_ATA is not set
-# CONFIG_MD is not set
-CONFIG_NETDEVICES=y
-# CONFIG_NETDEVICES_MULTIQUEUE is not set
-# CONFIG_DUMMY is not set
-# CONFIG_BONDING is not set
-# CONFIG_MACVLAN is not set
-# CONFIG_EQUALIZER is not set
-# CONFIG_TUN is not set
-# CONFIG_VETH is not set
-# CONFIG_PHYLIB is not set
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-# CONFIG_AX88796 is not set
-CONFIG_SMC91X=y
-# CONFIG_DM9000 is not set
-# CONFIG_SMC911X is not set
-# CONFIG_IBM_NEW_EMAC_ZMII is not set
-# CONFIG_IBM_NEW_EMAC_RGMII is not set
-# CONFIG_IBM_NEW_EMAC_TAH is not set
-# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
-# CONFIG_B44 is not set
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
-
-#
-# Wireless LAN
-#
-# CONFIG_WLAN_PRE80211 is not set
-# CONFIG_WLAN_80211 is not set
-# CONFIG_WAN is not set
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
-# CONFIG_SHAPER is not set
-# CONFIG_NETCONSOLE is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-# CONFIG_ISDN is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-# CONFIG_INPUT_FF_MEMLESS is not set
-# CONFIG_INPUT_POLLDEV is not set
-
-#
-# Userland interfaces
-#
-CONFIG_INPUT_MOUSEDEV=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TABLET is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Hardware I/O ports
-#
-# CONFIG_SERIO is not set
-# CONFIG_GAMEPORT is not set
-
-#
-# Character devices
-#
-CONFIG_VT=y
-CONFIG_VT_CONSOLE=y
-CONFIG_HW_CONSOLE=y
-# CONFIG_VT_HW_CONSOLE_BINDING is not set
-# CONFIG_SERIAL_NONSTANDARD is not set
-
-#
-# Serial drivers
-#
-# CONFIG_SERIAL_8250 is not set
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_PXA=y
-CONFIG_SERIAL_PXA_CONSOLE=y
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-CONFIG_UNIX98_PTYS=y
-# CONFIG_LEGACY_PTYS is not set
-# CONFIG_IPMI_HANDLER is not set
-# CONFIG_HW_RANDOM is not set
-# CONFIG_NVRAM is not set
-# CONFIG_R3964 is not set
-# CONFIG_RAW_DRIVER is not set
-# CONFIG_TCG_TPM is not set
-# CONFIG_I2C is not set
-
-#
-# SPI support
-#
-# CONFIG_SPI is not set
-# CONFIG_SPI_MASTER is not set
-# CONFIG_W1 is not set
-# CONFIG_POWER_SUPPLY is not set
-# CONFIG_HWMON is not set
-
-#
-# Sonics Silicon Backplane
-#
-CONFIG_SSB_POSSIBLE=y
-# CONFIG_SSB is not set
-
-#
-# Multifunction device drivers
-#
-# CONFIG_MFD_SM501 is not set
-
-#
-# Multimedia devices
-#
-# CONFIG_VIDEO_DEV is not set
-# CONFIG_DVB_CORE is not set
-# CONFIG_DAB is not set
-
-#
-# Graphics support
-#
-# CONFIG_VGASTATE is not set
-# CONFIG_VIDEO_OUTPUT_CONTROL is not set
-CONFIG_FB=y
-# CONFIG_FIRMWARE_EDID is not set
-# CONFIG_FB_DDC is not set
-CONFIG_FB_CFB_FILLRECT=y
-CONFIG_FB_CFB_COPYAREA=y
-CONFIG_FB_CFB_IMAGEBLIT=y
-# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
-# CONFIG_FB_SYS_FILLRECT is not set
-# CONFIG_FB_SYS_COPYAREA is not set
-# CONFIG_FB_SYS_IMAGEBLIT is not set
-# CONFIG_FB_SYS_FOPS is not set
-CONFIG_FB_DEFERRED_IO=y
-# CONFIG_FB_SVGALIB is not set
-# CONFIG_FB_MACMODES is not set
-# CONFIG_FB_BACKLIGHT is not set
-# CONFIG_FB_MODE_HELPERS is not set
-# CONFIG_FB_TILEBLITTING is not set
-
-#
-# Frame buffer hardware drivers
-#
-# CONFIG_FB_S1D13XXX is not set
-CONFIG_FB_PXA=y
-# CONFIG_FB_PXA_PARAMETERS is not set
-# CONFIG_FB_MBX is not set
-# CONFIG_FB_VIRTUAL is not set
-# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
-
-#
-# Display device support
-#
-# CONFIG_DISPLAY_SUPPORT is not set
-
-#
-# Console display driver support
-#
-# CONFIG_VGA_CONSOLE is not set
-CONFIG_DUMMY_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
-# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
-CONFIG_FONTS=y
-# CONFIG_FONT_8x8 is not set
-# CONFIG_FONT_8x16 is not set
-CONFIG_FONT_6x11=y
-# CONFIG_FONT_7x14 is not set
-# CONFIG_FONT_PEARL_8x8 is not set
-# CONFIG_FONT_ACORN_8x8 is not set
-# CONFIG_FONT_MINI_4x6 is not set
-# CONFIG_FONT_SUN8x16 is not set
-# CONFIG_FONT_SUN12x22 is not set
-# CONFIG_FONT_10x18 is not set
-CONFIG_LOGO=y
-CONFIG_LOGO_LINUX_MONO=y
-CONFIG_LOGO_LINUX_VGA16=y
-CONFIG_LOGO_LINUX_CLUT224=y
-
-#
-# Sound
-#
-# CONFIG_SOUND is not set
-# CONFIG_HID_SUPPORT is not set
-# CONFIG_USB_SUPPORT is not set
-# CONFIG_MMC is not set
-# CONFIG_NEW_LEDS is not set
-CONFIG_RTC_LIB=y
-# CONFIG_RTC_CLASS is not set
-
-#
-# File systems
-#
-# CONFIG_EXT2_FS is not set
-# CONFIG_EXT3_FS is not set
-# CONFIG_EXT4DEV_FS is not set
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-CONFIG_FS_POSIX_ACL=y
-# CONFIG_XFS_FS is not set
-# CONFIG_GFS2_FS is not set
-# CONFIG_OCFS2_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_ROMFS_FS is not set
-# CONFIG_INOTIFY is not set
-# CONFIG_QUOTA is not set
-CONFIG_DNOTIFY=y
-# CONFIG_AUTOFS_FS is not set
-# CONFIG_AUTOFS4_FS is not set
-# CONFIG_FUSE_FS is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
-# CONFIG_ISO9660_FS is not set
-# CONFIG_UDF_FS is not set
-
-#
-# DOS/FAT/NT Filesystems
-#
-# CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_PROC_SYSCTL=y
-CONFIG_SYSFS=y
-# CONFIG_TMPFS is not set
-# CONFIG_HUGETLB_PAGE is not set
-# CONFIG_CONFIGFS_FS is not set
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-CONFIG_NETWORK_FILESYSTEMS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
-CONFIG_NFS_DIRECTIO=y
-# CONFIG_NFSD is not set
-CONFIG_ROOT_NFS=y
-CONFIG_LOCKD=y
-CONFIG_LOCKD_V4=y
-CONFIG_NFS_ACL_SUPPORT=y
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=y
-CONFIG_SUNRPC_GSS=y
-# CONFIG_SUNRPC_BIND34 is not set
-CONFIG_RPCSEC_GSS_KRB5=y
-# CONFIG_RPCSEC_GSS_SPKM3 is not set
-# CONFIG_SMB_FS is not set
-# CONFIG_CIFS is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-
-#
-# Partition Types
-#
-# CONFIG_PARTITION_ADVANCED is not set
-CONFIG_MSDOS_PARTITION=y
-# CONFIG_NLS is not set
-# CONFIG_DLM is not set
-# CONFIG_INSTRUMENTATION is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-CONFIG_ENABLE_MUST_CHECK=y
-# CONFIG_MAGIC_SYSRQ is not set
-# CONFIG_UNUSED_SYMBOLS is not set
-# CONFIG_DEBUG_FS is not set
-# CONFIG_HEADERS_CHECK is not set
-# CONFIG_DEBUG_KERNEL is not set
-# CONFIG_SLUB_DEBUG_ON is not set
-CONFIG_DEBUG_BUGVERBOSE=y
-CONFIG_FRAME_POINTER=y
-# CONFIG_SAMPLES is not set
-CONFIG_DEBUG_USER=y
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-# CONFIG_SECURITY_FILE_CAPABILITIES is not set
-CONFIG_CRYPTO=y
-CONFIG_CRYPTO_ALGAPI=y
-CONFIG_CRYPTO_BLKCIPHER=y
-CONFIG_CRYPTO_MANAGER=y
-# CONFIG_CRYPTO_HMAC is not set
-# CONFIG_CRYPTO_XCBC is not set
-# CONFIG_CRYPTO_NULL is not set
-# CONFIG_CRYPTO_MD4 is not set
-CONFIG_CRYPTO_MD5=y
-# CONFIG_CRYPTO_SHA1 is not set
-# CONFIG_CRYPTO_SHA256 is not set
-# CONFIG_CRYPTO_SHA512 is not set
-# CONFIG_CRYPTO_WP512 is not set
-# CONFIG_CRYPTO_TGR192 is not set
-# CONFIG_CRYPTO_GF128MUL is not set
-# CONFIG_CRYPTO_ECB is not set
-CONFIG_CRYPTO_CBC=y
-# CONFIG_CRYPTO_PCBC is not set
-# CONFIG_CRYPTO_LRW is not set
-# CONFIG_CRYPTO_XTS is not set
-# CONFIG_CRYPTO_CRYPTD is not set
-CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_FCRYPT is not set
-# CONFIG_CRYPTO_BLOWFISH is not set
-# CONFIG_CRYPTO_TWOFISH is not set
-# CONFIG_CRYPTO_SERPENT is not set
-# CONFIG_CRYPTO_AES is not set
-# CONFIG_CRYPTO_CAST5 is not set
-# CONFIG_CRYPTO_CAST6 is not set
-# CONFIG_CRYPTO_TEA is not set
-# CONFIG_CRYPTO_ARC4 is not set
-# CONFIG_CRYPTO_KHAZAD is not set
-# CONFIG_CRYPTO_ANUBIS is not set
-# CONFIG_CRYPTO_SEED is not set
-# CONFIG_CRYPTO_DEFLATE is not set
-# CONFIG_CRYPTO_MICHAEL_MIC is not set
-# CONFIG_CRYPTO_CRC32C is not set
-# CONFIG_CRYPTO_CAMELLIA is not set
-# CONFIG_CRYPTO_TEST is not set
-# CONFIG_CRYPTO_AUTHENC is not set
-# CONFIG_CRYPTO_HW is not set
-
-#
-# Library routines
-#
-CONFIG_BITREVERSE=y
-# CONFIG_CRC_CCITT is not set
-# CONFIG_CRC16 is not set
-# CONFIG_CRC_ITU_T is not set
-CONFIG_CRC32=y
-# CONFIG_CRC7 is not set
-# CONFIG_LIBCRC32C is not set
-CONFIG_PLIST=y
-CONFIG_HAS_IOMEM=y
-CONFIG_HAS_IOPORT=y
-CONFIG_HAS_DMA=y
index 9ed2377..d0daeab 100644 (file)
 
 #ifdef __KERNEL__
 
+/*
+ * On ARM, ordinary assignment (str instruction) doesn't clear the local
+ * strex/ldrex monitor on some implementations. The reason we can use it for
+ * atomic_set() is the clrex or dummy strex done on every exception return.
+ */
 #define atomic_read(v) ((v)->counter)
+#define atomic_set(v,i)        (((v)->counter) = (i))
 
 #if __LINUX_ARM_ARCH__ >= 6
 
 /*
  * ARMv6 UP and SMP safe atomic ops.  We use load exclusive and
  * store exclusive to ensure that these are atomic.  We may loop
- * to ensure that the update happens.  Writing to 'v->counter'
- * without using the following operations WILL break the atomic
- * nature of these ops.
+ * to ensure that the update happens.
  */
-static inline void atomic_set(atomic_t *v, int i)
-{
-       unsigned long tmp;
-
-       __asm__ __volatile__("@ atomic_set\n"
-"1:    ldrex   %0, [%1]\n"
-"      strex   %0, %2, [%1]\n"
-"      teq     %0, #0\n"
-"      bne     1b"
-       : "=&r" (tmp)
-       : "r" (&v->counter), "r" (i)
-       : "cc");
-}
-
 static inline void atomic_add(int i, atomic_t *v)
 {
        unsigned long tmp;
@@ -163,8 +153,6 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
 #error SMP not supported on pre-ARMv6 CPUs
 #endif
 
-#define atomic_set(v,i)        (((v)->counter) = (i))
-
 static inline int atomic_add_return(int i, atomic_t *v)
 {
        unsigned long flags;
index feaa75f..66c160b 100644 (file)
@@ -4,7 +4,7 @@
 #ifndef __ASMARM_CACHE_H
 #define __ASMARM_CACHE_H
 
-#define L1_CACHE_SHIFT         5
+#define L1_CACHE_SHIFT         CONFIG_ARM_L1_CACHE_SHIFT
 #define L1_CACHE_BYTES         (1 << L1_CACHE_SHIFT)
 
 /*
index b3e656c..20ae96c 100644 (file)
@@ -63,6 +63,11 @@ static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
        return read_cpuid(CPUID_CACHETYPE);
 }
 
+static inline unsigned int __attribute_const__ read_cpuid_tcmstatus(void)
+{
+       return read_cpuid(CPUID_TCM);
+}
+
 /*
  * Intel's XScale3 core supports some v6 features (supersections, L2)
  * but advertises itself as v5 as it does not support the v6 ISA.  For
@@ -73,7 +78,10 @@ static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
 #else
 static inline int cpu_is_xsc3(void)
 {
-       if ((read_cpuid_id() & 0xffffe000) == 0x69056000)
+       unsigned int id;
+       id = read_cpuid_id() & 0xffffe000;
+       /* It covers both Intel ID and Marvell ID */
+       if ((id == 0x69056000) || (id == 0x56056000))
                return 1;
 
        return 0;
index 83e6ba3..1a8c727 100644 (file)
@@ -187,11 +187,74 @@ union iop3xx_desc {
        void *ptr;
 };
 
+/* No support for p+q operations */
+static inline int
+iop_chan_pq_slot_count(size_t len, int src_cnt, int *slots_per_op)
+{
+       BUG();
+       return 0;
+}
+
+static inline void
+iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
+                 unsigned long flags)
+{
+       BUG();
+}
+
+static inline void
+iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr)
+{
+       BUG();
+}
+
+static inline void
+iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
+                        dma_addr_t addr, unsigned char coef)
+{
+       BUG();
+}
+
+static inline int
+iop_chan_pq_zero_sum_slot_count(size_t len, int src_cnt, int *slots_per_op)
+{
+       BUG();
+       return 0;
+}
+
+static inline void
+iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
+                         unsigned long flags)
+{
+       BUG();
+}
+
+static inline void
+iop_desc_set_pq_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
+{
+       BUG();
+}
+
+#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr
+
+static inline void
+iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx,
+                             dma_addr_t *src)
+{
+       BUG();
+}
+
 static inline int iop_adma_get_max_xor(void)
 {
        return 32;
 }
 
+static inline int iop_adma_get_max_pq(void)
+{
+       BUG();
+       return 0;
+}
+
 static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
 {
        int id = chan->device->id;
@@ -332,6 +395,11 @@ static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt,
        return slot_cnt;
 }
 
+static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
+{
+       return 0;
+}
+
 static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
                                        struct iop_adma_chan *chan)
 {
@@ -349,6 +417,14 @@ static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
        return 0;
 }
 
+
+static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
+                                         struct iop_adma_chan *chan)
+{
+       BUG();
+       return 0;
+}
+
 static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
                                        struct iop_adma_chan *chan)
 {
@@ -756,13 +832,14 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
        hw_desc->src[0] = val;
 }
 
-static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
+static inline enum sum_check_flags
+iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
 {
        struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
        struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
 
        iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en));
-       return desc_ctrl.zero_result_err;
+       return desc_ctrl.zero_result_err << SUM_CHECK_P;
 }
 
 static inline void iop_chan_append(struct iop_adma_chan *chan)
index 385c6e8..59b8c38 100644 (file)
@@ -86,6 +86,7 @@ struct iop_adma_chan {
  * @idx: pool index
  * @unmap_src_cnt: number of xor sources
  * @unmap_len: transaction bytecount
+ * @tx_list: list of descriptors that are associated with one operation
  * @async_tx: support for the async_tx api
  * @group_list: list of slots that make up a multi-descriptor transaction
  *     for example transfer lengths larger than the supported hw max
@@ -102,10 +103,12 @@ struct iop_adma_desc_slot {
        u16 idx;
        u16 unmap_src_cnt;
        size_t unmap_len;
+       struct list_head tx_list;
        struct dma_async_tx_descriptor async_tx;
        union {
                u32 *xor_check_result;
                u32 *crc32_result;
+               u32 *pq_check_result;
        };
 };
 
diff --git a/arch/arm/include/asm/mach/mmc.h b/arch/arm/include/asm/mach/mmc.h
deleted file mode 100644 (file)
index b490ecc..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- *  arch/arm/include/asm/mach/mmc.h
- */
-#ifndef ASMARM_MACH_MMC_H
-#define ASMARM_MACH_MMC_H
-
-#include <linux/mmc/host.h>
-
-struct mmc_platform_data {
-       unsigned int ocr_mask;                  /* available voltages */
-       u32 (*translate_vdd)(struct device *, unsigned int);
-       unsigned int (*status)(struct device *);
-       int     gpio_wp;
-       int     gpio_cd;
-};
-
-#endif
diff --git a/arch/arm/include/asm/tcm.h b/arch/arm/include/asm/tcm.h
new file mode 100644 (file)
index 0000000..5929ef5
--- /dev/null
@@ -0,0 +1,31 @@
+/*
+ *
+ * Copyright (C) 2008-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ *
+ * Author: Rickard Andersson <rickard.andersson@stericsson.com>
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ *
+ */
+#ifndef __ASMARM_TCM_H
+#define __ASMARM_TCM_H
+
+#ifndef CONFIG_HAVE_TCM
+#error "You should not be including tcm.h unless you have a TCM!"
+#endif
+
+#include <linux/compiler.h>
+
+/* Tag variables with this */
+#define __tcmdata __section(.tcm.data)
+/* Tag constants with this */
+#define __tcmconst __section(.tcm.rodata)
+/* Tag functions inside TCM called from outside TCM with this */
+#define __tcmfunc __attribute__((long_call)) __section(.tcm.text) noinline
+/* Tag function inside TCM called from inside TCM  with this */
+#define __tcmlocalfunc __section(.tcm.text)
+
+void *tcm_alloc(size_t len);
+void tcm_free(void *addr, size_t len);
+
+#endif
index 073e85b..bc63116 100644 (file)
@@ -35,7 +35,9 @@
 
 #define ARM(x...)
 #define THUMB(x...)    x
+#ifdef __ASSEMBLY__
 #define W(instr)       instr.w
+#endif
 #define BSYM(sym)      sym + 1
 
 #else  /* !CONFIG_THUMB2_KERNEL */
@@ -45,7 +47,9 @@
 
 #define ARM(x...)      x
 #define THUMB(x...)
+#ifdef __ASSEMBLY__
 #define W(instr)       instr
+#endif
 #define BSYM(sym)      sym
 
 #endif /* CONFIG_THUMB2_KERNEL */
index c446aef..79087dd 100644 (file)
@@ -35,6 +35,7 @@ obj-$(CONFIG_OABI_COMPAT)     += sys_oabi-compat.o
 obj-$(CONFIG_ARM_THUMBEE)      += thumbee.o
 obj-$(CONFIG_KGDB)             += kgdb.o
 obj-$(CONFIG_ARM_UNWIND)       += unwind.o
+obj-$(CONFIG_HAVE_TCM)         += tcm.o
 
 obj-$(CONFIG_CRUNCH)           += crunch.o crunch-bits.o
 AFLAGS_crunch-bits.o           := -Wa,-mcpu=ep9312
index 3d727a8..0a2ba51 100644 (file)
@@ -272,7 +272,15 @@ __und_svc:
        @
        @  r0 - instruction
        @
+#ifndef        CONFIG_THUMB2_KERNEL
        ldr     r0, [r2, #-4]
+#else
+       ldrh    r0, [r2, #-2]                   @ Thumb instruction at LR - 2
+       and     r9, r0, #0xf800
+       cmp     r9, #0xe800                     @ 32-bit instruction if xx >= 0
+       ldrhhs  r9, [r2]                        @ bottom 16 bits
+       orrhs   r0, r9, r0, lsl #16
+#endif
        adr     r9, BSYM(1f)
        bl      call_fpe
 
@@ -678,7 +686,9 @@ ENTRY(fp_enter)
        .word   no_fp
        .previous
 
-no_fp: mov     pc, lr
+ENTRY(no_fp)
+       mov     pc, lr
+ENDPROC(no_fp)
 
 __und_usr_unknown:
        enable_irq
@@ -734,13 +744,6 @@ ENTRY(__switch_to)
 #ifdef CONFIG_MMU
        ldr     r6, [r2, #TI_CPU_DOMAIN]
 #endif
-#if __LINUX_ARM_ARCH__ >= 6
-#ifdef CONFIG_CPU_32v6K
-       clrex
-#else
-       strex   r5, r4, [ip]                    @ Clear exclusive monitor
-#endif
-#endif
 #if defined(CONFIG_HAS_TLS_REG)
        mcr     p15, 0, r3, c13, c0, 3          @ set TLS register
 #elif !defined(CONFIG_TLS_REG_EMUL)
index a4eaf4f..e17e3c3 100644 (file)
 #ifndef CONFIG_THUMB2_KERNEL
        .macro  svc_exit, rpsr
        msr     spsr_cxsf, \rpsr
+#if defined(CONFIG_CPU_32v6K)
+       clrex                                   @ clear the exclusive monitor
        ldmia   sp, {r0 - pc}^                  @ load r0 - pc, cpsr
+#elif defined (CONFIG_CPU_V6)
+       ldr     r0, [sp]
+       strex   r1, r2, [sp]                    @ clear the exclusive monitor
+       ldmib   sp, {r1 - pc}^                  @ load r1 - pc, cpsr
+#endif
        .endm
 
        .macro  restore_user_regs, fast = 0, offset = 0
        ldr     r1, [sp, #\offset + S_PSR]      @ get calling cpsr
        ldr     lr, [sp, #\offset + S_PC]!      @ get pc
        msr     spsr_cxsf, r1                   @ save in spsr_svc
+#if defined(CONFIG_CPU_32v6K)
+       clrex                                   @ clear the exclusive monitor
+#elif defined (CONFIG_CPU_V6)
+       strex   r1, r2, [sp]                    @ clear the exclusive monitor
+#endif
        .if     \fast
        ldmdb   sp, {r1 - lr}^                  @ get calling r1 - lr
        .else
        .endm
 #else  /* CONFIG_THUMB2_KERNEL */
        .macro  svc_exit, rpsr
+       clrex                                   @ clear the exclusive monitor
        ldr     r0, [sp, #S_SP]                 @ top of the stack
        ldr     r1, [sp, #S_PC]                 @ return address
        tst     r0, #4                          @ orig stack 8-byte aligned?
        .endm
 
        .macro  restore_user_regs, fast = 0, offset = 0
+       clrex                                   @ clear the exclusive monitor
        mov     r2, sp
        load_user_sp_lr r2, r3, \offset + S_SP  @ calling sp, lr
        ldr     r1, [sp, #\offset + S_PSR]      @ get calling cpsr
index f692efd..60c62c3 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
 #include <linux/module.h>
+#include <linux/stop_machine.h>
 #include <linux/stringify.h>
 #include <asm/traps.h>
 #include <asm/cacheflush.h>
@@ -83,10 +84,24 @@ void __kprobes arch_arm_kprobe(struct kprobe *p)
        flush_insns(p->addr, 1);
 }
 
+/*
+ * The actual disarming is done here on each CPU and synchronized using
+ * stop_machine. This synchronization is necessary on SMP to avoid removing
+ * a probe between the moment the 'Undefined Instruction' exception is raised
+ * and the moment the exception handler reads the faulting instruction from
+ * memory.
+ */
+int __kprobes __arch_disarm_kprobe(void *p)
+{
+       struct kprobe *kp = p;
+       *kp->addr = kp->opcode;
+       flush_insns(kp->addr, 1);
+       return 0;
+}
+
 void __kprobes arch_disarm_kprobe(struct kprobe *p)
 {
-       *p->addr = p->opcode;
-       flush_insns(p->addr, 1);
+       stop_machine(__arch_disarm_kprobe, p, &cpu_online_map);
 }
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
index d4d4f77..c6c57b6 100644 (file)
@@ -45,6 +45,7 @@
 
 #include "compat.h"
 #include "atags.h"
+#include "tcm.h"
 
 #ifndef MEM_SIZE
 #define MEM_SIZE       (16*1024*1024)
@@ -749,6 +750,7 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
        cpu_init();
+       tcm_init();
 
        /*
         * Set up various architecture-specific pointers
diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c
new file mode 100644 (file)
index 0000000..e503038
--- /dev/null
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2008-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * TCM memory handling for ARM systems
+ *
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ * Author: Rickard Andersson <rickard.andersson@stericsson.com>
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/stddef.h>
+#include <linux/ioport.h>
+#include <linux/genalloc.h>
+#include <linux/string.h> /* memcpy */
+#include <asm/page.h> /* PAGE_SHIFT */
+#include <asm/cputype.h>
+#include <asm/mach/map.h>
+#include <mach/memory.h>
+#include "tcm.h"
+
+/* Scream and warn about misuse */
+#if !defined(ITCM_OFFSET) || !defined(ITCM_END) || \
+    !defined(DTCM_OFFSET) || !defined(DTCM_END)
+#error "TCM support selected but offsets not defined!"
+#endif
+
+static struct gen_pool *tcm_pool;
+
+/* TCM section definitions from the linker */
+extern char __itcm_start, __sitcm_text, __eitcm_text;
+extern char __dtcm_start, __sdtcm_data, __edtcm_data;
+
+/*
+ * TCM memory resources
+ */
+static struct resource dtcm_res = {
+       .name = "DTCM RAM",
+       .start = DTCM_OFFSET,
+       .end = DTCM_END,
+       .flags = IORESOURCE_MEM
+};
+
+static struct resource itcm_res = {
+       .name = "ITCM RAM",
+       .start = ITCM_OFFSET,
+       .end = ITCM_END,
+       .flags = IORESOURCE_MEM
+};
+
+static struct map_desc dtcm_iomap[] __initdata = {
+       {
+               .virtual        = DTCM_OFFSET,
+               .pfn            = __phys_to_pfn(DTCM_OFFSET),
+               .length         = (DTCM_END - DTCM_OFFSET + 1),
+               .type           = MT_UNCACHED
+       }
+};
+
+static struct map_desc itcm_iomap[] __initdata = {
+       {
+               .virtual        = ITCM_OFFSET,
+               .pfn            = __phys_to_pfn(ITCM_OFFSET),
+               .length         = (ITCM_END - ITCM_OFFSET + 1),
+               .type           = MT_UNCACHED
+       }
+};
+
+/*
+ * Allocate a chunk of TCM memory
+ */
+void *tcm_alloc(size_t len)
+{
+       unsigned long vaddr;
+
+       if (!tcm_pool)
+               return NULL;
+
+       vaddr = gen_pool_alloc(tcm_pool, len);
+       if (!vaddr)
+               return NULL;
+
+       return (void *) vaddr;
+}
+EXPORT_SYMBOL(tcm_alloc);
+
+/*
+ * Free a chunk of TCM memory
+ */
+void tcm_free(void *addr, size_t len)
+{
+       gen_pool_free(tcm_pool, (unsigned long) addr, len);
+}
+EXPORT_SYMBOL(tcm_free);
+
+
+static void __init setup_tcm_bank(u8 type, u32 offset, u32 expected_size)
+{
+       const int tcm_sizes[16] = { 0, -1, -1, 4, 8, 16, 32, 64, 128,
+                                   256, 512, 1024, -1, -1, -1, -1 };
+       u32 tcm_region;
+       int tcm_size;
+
+       /* Read the special TCM region register c9, 0 */
+       if (!type)
+               asm("mrc        p15, 0, %0, c9, c1, 0"
+                   : "=r" (tcm_region));
+       else
+               asm("mrc        p15, 0, %0, c9, c1, 1"
+                   : "=r" (tcm_region));
+
+       tcm_size = tcm_sizes[(tcm_region >> 2) & 0x0f];
+       if (tcm_size < 0) {
+               pr_err("CPU: %sTCM of unknown size!\n",
+                       type ? "I" : "D");
+       } else {
+               pr_info("CPU: found %sTCM %dk @ %08x, %senabled\n",
+                       type ? "I" : "D",
+                       tcm_size,
+                       (tcm_region & 0xfffff000U),
+                       (tcm_region & 1) ? "" : "not ");
+       }
+
+       if (tcm_size != expected_size) {
+               pr_crit("CPU: %sTCM was detected %dk but expected %dk!\n",
+                      type ? "I" : "D",
+                      tcm_size,
+                      expected_size);
+               /* Adjust to the expected size? what can we do... */
+       }
+
+       /* Force move the TCM bank to where we want it, enable */
+       tcm_region = offset | (tcm_region & 0x00000ffeU) | 1;
+
+       if (!type)
+               asm("mcr        p15, 0, %0, c9, c1, 0"
+                   : /* No output operands */
+                   : "r" (tcm_region));
+       else
+               asm("mcr        p15, 0, %0, c9, c1, 1"
+                   : /* No output operands */
+                   : "r" (tcm_region));
+
+       pr_debug("CPU: moved %sTCM %dk to %08x, enabled\n",
+                type ? "I" : "D",
+                tcm_size,
+                (tcm_region & 0xfffff000U));
+}
+
+/*
+ * This initializes the TCM memory
+ */
+void __init tcm_init(void)
+{
+       u32 tcm_status = read_cpuid_tcmstatus();
+       char *start;
+       char *end;
+       char *ram;
+
+       /* Setup DTCM if present */
+       if (tcm_status & (1 << 16)) {
+               setup_tcm_bank(0, DTCM_OFFSET,
+                              (DTCM_END - DTCM_OFFSET + 1) >> 10);
+               request_resource(&iomem_resource, &dtcm_res);
+               iotable_init(dtcm_iomap, 1);
+               /* Copy data from RAM to DTCM */
+               start = &__sdtcm_data;
+               end   = &__edtcm_data;
+               ram   = &__dtcm_start;
+               memcpy(start, ram, (end-start));
+               pr_debug("CPU DTCM: copied data from %p - %p\n", start, end);
+       }
+
+       /* Setup ITCM if present */
+       if (tcm_status & 1) {
+               setup_tcm_bank(1, ITCM_OFFSET,
+                              (ITCM_END - ITCM_OFFSET + 1) >> 10);
+               request_resource(&iomem_resource, &itcm_res);
+               iotable_init(itcm_iomap, 1);
+               /* Copy code from RAM to ITCM */
+               start = &__sitcm_text;
+               end   = &__eitcm_text;
+               ram   = &__itcm_start;
+               memcpy(start, ram, (end-start));
+               pr_debug("CPU ITCM: copied code from %p - %p\n", start, end);
+       }
+}
+
+/*
+ * This creates the TCM memory pool and has to be done later,
+ * during the core_initicalls, since the allocator is not yet
+ * up and running when the first initialization runs.
+ */
+static int __init setup_tcm_pool(void)
+{
+       u32 tcm_status = read_cpuid_tcmstatus();
+       u32 dtcm_pool_start = (u32) &__edtcm_data;
+       u32 itcm_pool_start = (u32) &__eitcm_text;
+       int ret;
+
+       /*
+        * Set up malloc pool, 2^2 = 4 bytes granularity since
+        * the TCM is sometimes just 4 KiB. NB: pages and cache
+        * line alignments does not matter in TCM!
+        */
+       tcm_pool = gen_pool_create(2, -1);
+
+       pr_debug("Setting up TCM memory pool\n");
+
+       /* Add the rest of DTCM to the TCM pool */
+       if (tcm_status & (1 << 16)) {
+               if (dtcm_pool_start < DTCM_END) {
+                       ret = gen_pool_add(tcm_pool, dtcm_pool_start,
+                                          DTCM_END - dtcm_pool_start + 1, -1);
+                       if (ret) {
+                               pr_err("CPU DTCM: could not add DTCM " \
+                                      "remainder to pool!\n");
+                               return ret;
+                       }
+                       pr_debug("CPU DTCM: Added %08x bytes @ %08x to " \
+                                "the TCM memory pool\n",
+                                DTCM_END - dtcm_pool_start + 1,
+                                dtcm_pool_start);
+               }
+       }
+
+       /* Add the rest of ITCM to the TCM pool */
+       if (tcm_status & 1) {
+               if (itcm_pool_start < ITCM_END) {
+                       ret = gen_pool_add(tcm_pool, itcm_pool_start,
+                                          ITCM_END - itcm_pool_start + 1, -1);
+                       if (ret) {
+                               pr_err("CPU ITCM: could not add ITCM " \
+                                      "remainder to pool!\n");
+                               return ret;
+                       }
+                       pr_debug("CPU ITCM: Added %08x bytes @ %08x to " \
+                                "the TCM memory pool\n",
+                                ITCM_END - itcm_pool_start + 1,
+                                itcm_pool_start);
+               }
+       }
+       return 0;
+}
+
+core_initcall(setup_tcm_pool);
diff --git a/arch/arm/kernel/tcm.h b/arch/arm/kernel/tcm.h
new file mode 100644 (file)
index 0000000..8015ad4
--- /dev/null
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2008-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * TCM memory handling for ARM systems
+ *
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ * Author: Rickard Andersson <rickard.andersson@stericsson.com>
+ */
+
+#ifdef CONFIG_HAVE_TCM
+void __init tcm_init(void);
+#else
+/* No TCM support, just blank inlines to be optimized out */
+inline void tcm_init(void)
+{
+}
+#endif
index 5cc4812..aecf87d 100644 (file)
@@ -199,6 +199,63 @@ SECTIONS
        }
        _edata_loc = __data_loc + SIZEOF(.data);
 
+#ifdef CONFIG_HAVE_TCM
+        /*
+        * We align everything to a page boundary so we can
+        * free it after init has commenced and TCM contents have
+        * been copied to its destination.
+        */
+       .tcm_start : {
+               . = ALIGN(PAGE_SIZE);
+               __tcm_start = .;
+               __itcm_start = .;
+       }
+
+       /*
+        * Link these to the ITCM RAM
+        * Put VMA to the TCM address and LMA to the common RAM
+        * and we'll upload the contents from RAM to TCM and free
+        * the used RAM after that.
+        */
+       .text_itcm ITCM_OFFSET : AT(__itcm_start)
+       {
+               __sitcm_text = .;
+               *(.tcm.text)
+               *(.tcm.rodata)
+               . = ALIGN(4);
+               __eitcm_text = .;
+       }
+
+       /*
+        * Reset the dot pointer, this is needed to create the
+        * relative __dtcm_start below (to be used as extern in code).
+        */
+       . = ADDR(.tcm_start) + SIZEOF(.tcm_start) + SIZEOF(.text_itcm);
+
+       .dtcm_start : {
+               __dtcm_start = .;
+       }
+
+       /* TODO: add remainder of ITCM as well, that can be used for data! */
+       .data_dtcm DTCM_OFFSET : AT(__dtcm_start)
+       {
+               . = ALIGN(4);
+               __sdtcm_data = .;
+               *(.tcm.data)
+               . = ALIGN(4);
+               __edtcm_data = .;
+       }
+
+       /* Reset the dot pointer or the linker gets confused */
+       . = ADDR(.dtcm_start) + SIZEOF(.data_dtcm);
+
+       /* End marker for freeing TCM copy in linked object */
+       .tcm_end : AT(ADDR(.dtcm_start) + SIZEOF(.data_dtcm)){
+               . = ALIGN(PAGE_SIZE);
+               __tcm_end = .;
+       }
+#endif
+
        .bss : {
                __bss_start = .;        /* BSS                          */
                *(.bss)
index 6ae04db..6ee2f67 100644 (file)
@@ -12,8 +12,9 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
+#include <asm/cache.h>
 
-#define COPY_COUNT (PAGE_SZ/64 PLD( -1 ))
+#define COPY_COUNT (PAGE_SZ / (2 * L1_CACHE_BYTES) PLD( -1 ))
 
                .text
                .align  5
 ENTRY(copy_page)
                stmfd   sp!, {r4, lr}                   @       2
        PLD(    pld     [r1, #0]                )
-       PLD(    pld     [r1, #32]               )
+       PLD(    pld     [r1, #L1_CACHE_BYTES]           )
                mov     r2, #COPY_COUNT                 @       1
                ldmia   r1!, {r3, r4, ip, lr}           @       4+1
-1:     PLD(    pld     [r1, #64]               )
-       PLD(    pld     [r1, #96]               )
-2:             stmia   r0!, {r3, r4, ip, lr}           @       4
-               ldmia   r1!, {r3, r4, ip, lr}           @       4+1
-               stmia   r0!, {r3, r4, ip, lr}           @       4
-               ldmia   r1!, {r3, r4, ip, lr}           @       4+1
+1:     PLD(    pld     [r1, #2 * L1_CACHE_BYTES])
+       PLD(    pld     [r1, #3 * L1_CACHE_BYTES])
+2:
+       .rept   (2 * L1_CACHE_BYTES / 16 - 1)
                stmia   r0!, {r3, r4, ip, lr}           @       4
                ldmia   r1!, {r3, r4, ip, lr}           @       4
+       .endr
                subs    r2, r2, #1                      @       1
                stmia   r0!, {r3, r4, ip, lr}           @       4
                ldmgtia r1!, {r3, r4, ip, lr}           @       4
index 412aa49..d1f775e 100644 (file)
@@ -771,9 +771,9 @@ void __init at91_add_device_pwm(u32 mask) {}
  *  AC97
  * -------------------------------------------------------------------- */
 
-#if defined(CONFIG_SND_AT91_AC97) || defined(CONFIG_SND_AT91_AC97_MODULE)
+#if defined(CONFIG_SND_ATMEL_AC97C) || defined(CONFIG_SND_ATMEL_AC97C_MODULE)
 static u64 ac97_dmamask = DMA_BIT_MASK(32);
-static struct atmel_ac97_data ac97_data;
+static struct ac97c_platform_data ac97_data;
 
 static struct resource ac97_resources[] = {
        [0] = {
@@ -789,7 +789,7 @@ static struct resource ac97_resources[] = {
 };
 
 static struct platform_device at91cap9_ac97_device = {
-       .name           = "ac97c",
+       .name           = "atmel_ac97c",
        .id             = 1,
        .dev            = {
                                .dma_mask               = &ac97_dmamask,
@@ -800,7 +800,7 @@ static struct platform_device at91cap9_ac97_device = {
        .num_resources  = ARRAY_SIZE(ac97_resources),
 };
 
-void __init at91_add_device_ac97(struct atmel_ac97_data *data)
+void __init at91_add_device_ac97(struct ac97c_platform_data *data)
 {
        if (!data)
                return;
@@ -818,7 +818,7 @@ void __init at91_add_device_ac97(struct atmel_ac97_data *data)
        platform_device_register(&at91cap9_ac97_device);
 }
 #else
-void __init at91_add_device_ac97(struct atmel_ac97_data *data) {}
+void __init at91_add_device_ac97(struct ac97c_platform_data *data) {}
 #endif
 
 
index d746e86..d581cff 100644 (file)
 #include <mach/at91sam9g45.h>
 #include <mach/at91sam9g45_matrix.h>
 #include <mach/at91sam9_smc.h>
+#include <mach/at_hdmac.h>
 
 #include "generic.h"
 
 
+/* --------------------------------------------------------------------
+ *  HDMAC - AHB DMA Controller
+ * -------------------------------------------------------------------- */
+
+#if defined(CONFIG_AT_HDMAC) || defined(CONFIG_AT_HDMAC_MODULE)
+static u64 hdmac_dmamask = DMA_BIT_MASK(32);
+
+static struct at_dma_platform_data atdma_pdata = {
+       .nr_channels    = 8,
+};
+
+static struct resource hdmac_resources[] = {
+       [0] = {
+               .start  = AT91_BASE_SYS + AT91_DMA,
+               .end    = AT91_BASE_SYS + AT91_DMA + SZ_512 - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+       [2] = {
+               .start  = AT91SAM9G45_ID_DMA,
+               .end    = AT91SAM9G45_ID_DMA,
+               .flags  = IORESOURCE_IRQ,
+       },
+};
+
+static struct platform_device at_hdmac_device = {
+       .name           = "at_hdmac",
+       .id             = -1,
+       .dev            = {
+                               .dma_mask               = &hdmac_dmamask,
+                               .coherent_dma_mask      = DMA_BIT_MASK(32),
+                               .platform_data          = &atdma_pdata,
+       },
+       .resource       = hdmac_resources,
+       .num_resources  = ARRAY_SIZE(hdmac_resources),
+};
+
+void __init at91_add_device_hdmac(void)
+{
+       dma_cap_set(DMA_MEMCPY, atdma_pdata.cap_mask);
+       dma_cap_set(DMA_SLAVE, atdma_pdata.cap_mask);
+       platform_device_register(&at_hdmac_device);
+}
+#else
+void __init at91_add_device_hdmac(void) {}
+#endif
+
+
 /* --------------------------------------------------------------------
  *  USB Host (OHCI)
  * -------------------------------------------------------------------- */
@@ -549,6 +597,61 @@ void __init at91_add_device_spi(struct spi_board_info *devices, int nr_devices)
 #endif
 
 
+/* --------------------------------------------------------------------
+ *  AC97
+ * -------------------------------------------------------------------- */
+
+#if defined(CONFIG_SND_ATMEL_AC97C) || defined(CONFIG_SND_ATMEL_AC97C_MODULE)
+static u64 ac97_dmamask = DMA_BIT_MASK(32);
+static struct ac97c_platform_data ac97_data;
+
+static struct resource ac97_resources[] = {
+       [0] = {
+               .start  = AT91SAM9G45_BASE_AC97C,
+               .end    = AT91SAM9G45_BASE_AC97C + SZ_16K - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .start  = AT91SAM9G45_ID_AC97C,
+               .end    = AT91SAM9G45_ID_AC97C,
+               .flags  = IORESOURCE_IRQ,
+       },
+};
+
+static struct platform_device at91sam9g45_ac97_device = {
+       .name           = "atmel_ac97c",
+       .id             = 0,
+       .dev            = {
+                               .dma_mask               = &ac97_dmamask,
+                               .coherent_dma_mask      = DMA_BIT_MASK(32),
+                               .platform_data          = &ac97_data,
+       },
+       .resource       = ac97_resources,
+       .num_resources  = ARRAY_SIZE(ac97_resources),
+};
+
+void __init at91_add_device_ac97(struct ac97c_platform_data *data)
+{
+       if (!data)
+               return;
+
+       at91_set_A_periph(AT91_PIN_PD8, 0);     /* AC97FS */
+       at91_set_A_periph(AT91_PIN_PD9, 0);     /* AC97CK */
+       at91_set_A_periph(AT91_PIN_PD7, 0);     /* AC97TX */
+       at91_set_A_periph(AT91_PIN_PD6, 0);     /* AC97RX */
+
+       /* reset */
+       if (data->reset_pin)
+               at91_set_gpio_output(data->reset_pin, 0);
+
+       ac97_data = *data;
+       platform_device_register(&at91sam9g45_ac97_device);
+}
+#else
+void __init at91_add_device_ac97(struct ac97c_platform_data *data) {}
+#endif
+
+
 /* --------------------------------------------------------------------
  *  LCD Controller
  * -------------------------------------------------------------------- */
@@ -1220,6 +1323,7 @@ void __init at91_add_device_serial(void) {}
  */
 static int __init at91_add_standard_devices(void)
 {
+       at91_add_device_hdmac();
        at91_add_device_rtc();
        at91_add_device_rtt();
        at91_add_device_watchdog();
index 7281865..d345f54 100644 (file)
 #include <mach/at91sam9rl.h>
 #include <mach/at91sam9rl_matrix.h>
 #include <mach/at91sam9_smc.h>
+#include <mach/at_hdmac.h>
 
 #include "generic.h"
 
 
+/* --------------------------------------------------------------------
+ *  HDMAC - AHB DMA Controller
+ * -------------------------------------------------------------------- */
+
+#if defined(CONFIG_AT_HDMAC) || defined(CONFIG_AT_HDMAC_MODULE)
+static u64 hdmac_dmamask = DMA_BIT_MASK(32);
+
+static struct at_dma_platform_data atdma_pdata = {
+       .nr_channels    = 2,
+};
+
+static struct resource hdmac_resources[] = {
+       [0] = {
+               .start  = AT91_BASE_SYS + AT91_DMA,
+               .end    = AT91_BASE_SYS + AT91_DMA + SZ_512 - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+       [2] = {
+               .start  = AT91SAM9RL_ID_DMA,
+               .end    = AT91SAM9RL_ID_DMA,
+               .flags  = IORESOURCE_IRQ,
+       },
+};
+
+static struct platform_device at_hdmac_device = {
+       .name           = "at_hdmac",
+       .id             = -1,
+       .dev            = {
+                               .dma_mask               = &hdmac_dmamask,
+                               .coherent_dma_mask      = DMA_BIT_MASK(32),
+                               .platform_data          = &atdma_pdata,
+       },
+       .resource       = hdmac_resources,
+       .num_resources  = ARRAY_SIZE(hdmac_resources),
+};
+
+void __init at91_add_device_hdmac(void)
+{
+       dma_cap_set(DMA_MEMCPY, atdma_pdata.cap_mask);
+       platform_device_register(&at_hdmac_device);
+}
+#else
+void __init at91_add_device_hdmac(void) {}
+#endif
+
 /* --------------------------------------------------------------------
  *  USB HS Device (Gadget)
  * -------------------------------------------------------------------- */
@@ -397,6 +443,61 @@ void __init at91_add_device_spi(struct spi_board_info *devices, int nr_devices)
 #endif
 
 
+/* --------------------------------------------------------------------
+ *  AC97
+ * -------------------------------------------------------------------- */
+
+#if defined(CONFIG_SND_ATMEL_AC97C) || defined(CONFIG_SND_ATMEL_AC97C_MODULE)
+static u64 ac97_dmamask = DMA_BIT_MASK(32);
+static struct ac97c_platform_data ac97_data;
+
+static struct resource ac97_resources[] = {
+       [0] = {
+               .start  = AT91SAM9RL_BASE_AC97C,
+               .end    = AT91SAM9RL_BASE_AC97C + SZ_16K - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .start  = AT91SAM9RL_ID_AC97C,
+               .end    = AT91SAM9RL_ID_AC97C,
+               .flags  = IORESOURCE_IRQ,
+       },
+};
+
+static struct platform_device at91sam9rl_ac97_device = {
+       .name           = "atmel_ac97c",
+       .id             = 0,
+       .dev            = {
+                               .dma_mask               = &ac97_dmamask,
+                               .coherent_dma_mask      = DMA_BIT_MASK(32),
+                               .platform_data          = &ac97_data,
+       },
+       .resource       = ac97_resources,
+       .num_resources  = ARRAY_SIZE(ac97_resources),
+};
+
+void __init at91_add_device_ac97(struct ac97c_platform_data *data)
+{
+       if (!data)
+               return;
+
+       at91_set_A_periph(AT91_PIN_PD1, 0);     /* AC97FS */
+       at91_set_A_periph(AT91_PIN_PD2, 0);     /* AC97CK */
+       at91_set_A_periph(AT91_PIN_PD3, 0);     /* AC97TX */
+       at91_set_A_periph(AT91_PIN_PD4, 0);     /* AC97RX */
+
+       /* reset */
+       if (data->reset_pin)
+               at91_set_gpio_output(data->reset_pin, 0);
+
+       ac97_data = *data;
+       platform_device_register(&at91sam9rl_ac97_device);
+}
+#else
+void __init at91_add_device_ac97(struct ac97c_platform_data *data) {}
+#endif
+
+
 /* --------------------------------------------------------------------
  *  LCD Controller
  * -------------------------------------------------------------------- */
@@ -1103,6 +1204,7 @@ void __init at91_add_device_serial(void) {}
  */
 static int __init at91_add_standard_devices(void)
 {
+       at91_add_device_hdmac();
        at91_add_device_rtc();
        at91_add_device_rtt();
        at91_add_device_watchdog();
index 83a1a0f..d694087 100644 (file)
@@ -364,7 +364,7 @@ static struct atmel_lcdfb_info __initdata cap9adk_lcdc_data;
 /*
  * AC97
  */
-static struct atmel_ac97_data cap9adk_ac97_data = {
+static struct ac97c_platform_data cap9adk_ac97_data = {
 //     .reset_pin      = ... not connected
 };
 
index 8c0b71c..7c1e382 100644 (file)
@@ -340,7 +340,7 @@ static void __init neocore926_add_device_buttons(void) {}
 /*
  * AC97
  */
-static struct atmel_ac97_data neocore926_ac97_data = {
+static struct ac97c_platform_data neocore926_ac97_data = {
        .reset_pin      = AT91_PIN_PA13,
 };
 
index b8558ea..64c3843 100644 (file)
@@ -310,6 +310,14 @@ static void __init ek_add_device_buttons(void) {}
 #endif
 
 
+/*
+ * AC97
+ * reset_pin is not connected: NRST
+ */
+static struct ac97c_platform_data ek_ac97_data = {
+};
+
+
 /*
  * LEDs ... these could all be PWM-driven, for variable brightness
  */
@@ -372,6 +380,8 @@ static void __init ek_board_init(void)
        at91_add_device_lcdc(&ek_lcdc_data);
        /* Push Buttons */
        ek_add_device_buttons();
+       /* AC97 */
+       at91_add_device_ac97(&ek_ac97_data);
        /* LEDs */
        at91_gpio_leds(ek_leds, ARRAY_SIZE(ek_leds));
        at91_pwm_leds(ek_pwm_led, ARRAY_SIZE(ek_pwm_led));
index 94ffb5c..bd28e98 100644 (file)
@@ -210,6 +210,14 @@ static struct atmel_lcdfb_info __initdata ek_lcdc_data;
 #endif
 
 
+/*
+ * AC97
+ * reset_pin is not connected: NRST
+ */
+static struct ac97c_platform_data ek_ac97_data = {
+};
+
+
 /*
  * LEDs
  */
@@ -299,6 +307,8 @@ static void __init ek_board_init(void)
        at91_add_device_mmc(0, &ek_mmc_data);
        /* LCD Controller */
        at91_add_device_lcdc(&ek_lcdc_data);
+       /* AC97 */
+       at91_add_device_ac97(&ek_ac97_data);
        /* Touch Screen Controller */
        at91_add_device_tsadcc();
        /* LEDs */
index 2a318eb..3f35293 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/amba/bus.h>
 #include <linux/amba/kmi.h>
 #include <linux/amba/clcd.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <asm/clkdev.h>
@@ -35,7 +36,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/irq.h>
-#include <asm/mach/mmc.h>
 #include <asm/mach/map.h>
 #include <asm/mach/time.h>
 
@@ -400,7 +400,7 @@ static unsigned int mmc_status(struct device *dev)
        return status & 8;
 }
 
-static struct mmc_platform_data mmc_data = {
+static struct mmci_platform_data mmc_data = {
        .ocr_mask       = MMC_VDD_32_33|MMC_VDD_33_34,
        .status         = mmc_status,
        .gpio_wp        = -1,
index 5722e86..6d3782d 100644 (file)
@@ -150,6 +150,8 @@ static inline int iop_adma_get_max_xor(void)
        return 16;
 }
 
+#define iop_adma_get_max_pq iop_adma_get_max_xor
+
 static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
 {
        return __raw_readl(ADMA_ADAR(chan));
@@ -211,7 +213,10 @@ iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op)
 #define IOP_ADMA_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
 #define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
 #define IOP_ADMA_XOR_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
+#define IOP_ADMA_PQ_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
 #define iop_chan_zero_sum_slot_count(l, s, o) iop_chan_xor_slot_count(l, s, o)
+#define iop_chan_pq_slot_count iop_chan_xor_slot_count
+#define iop_chan_pq_zero_sum_slot_count iop_chan_xor_slot_count
 
 static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
                                        struct iop_adma_chan *chan)
@@ -220,6 +225,13 @@ static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
        return hw_desc->dest_addr;
 }
 
+static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
+                                         struct iop_adma_chan *chan)
+{
+       struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
+       return hw_desc->q_dest_addr;
+}
+
 static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
                                        struct iop_adma_chan *chan)
 {
@@ -319,6 +331,58 @@ iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
        return 1;
 }
 
+static inline void
+iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
+                 unsigned long flags)
+{
+       struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
+       union {
+               u32 value;
+               struct iop13xx_adma_desc_ctrl field;
+       } u_desc_ctrl;
+
+       u_desc_ctrl.value = 0;
+       u_desc_ctrl.field.src_select = src_cnt - 1;
+       u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
+       u_desc_ctrl.field.pq_xfer_en = 1;
+       u_desc_ctrl.field.p_xfer_dis = !!(flags & DMA_PREP_PQ_DISABLE_P);
+       u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
+       hw_desc->desc_ctrl = u_desc_ctrl.value;
+}
+
+static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
+{
+       struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
+       union {
+               u32 value;
+               struct iop13xx_adma_desc_ctrl field;
+       } u_desc_ctrl;
+
+       u_desc_ctrl.value = hw_desc->desc_ctrl;
+       return u_desc_ctrl.field.pq_xfer_en;
+}
+
+static inline void
+iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
+                         unsigned long flags)
+{
+       struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
+       union {
+               u32 value;
+               struct iop13xx_adma_desc_ctrl field;
+       } u_desc_ctrl;
+
+       u_desc_ctrl.value = 0;
+       u_desc_ctrl.field.src_select = src_cnt - 1;
+       u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
+       u_desc_ctrl.field.zero_result = 1;
+       u_desc_ctrl.field.status_write_back_en = 1;
+       u_desc_ctrl.field.pq_xfer_en = 1;
+       u_desc_ctrl.field.p_xfer_dis = !!(flags & DMA_PREP_PQ_DISABLE_P);
+       u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
+       hw_desc->desc_ctrl = u_desc_ctrl.value;
+}
+
 static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc,
                                        struct iop_adma_chan *chan,
                                        u32 byte_count)
@@ -351,6 +415,7 @@ iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
        }
 }
 
+#define iop_desc_set_pq_zero_sum_byte_count iop_desc_set_zero_sum_byte_count
 
 static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
                                        struct iop_adma_chan *chan,
@@ -361,6 +426,16 @@ static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
        hw_desc->upper_dest_addr = 0;
 }
 
+static inline void
+iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr)
+{
+       struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
+
+       hw_desc->dest_addr = addr[0];
+       hw_desc->q_dest_addr = addr[1];
+       hw_desc->upper_dest_addr = 0;
+}
+
 static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc,
                                        dma_addr_t addr)
 {
@@ -388,6 +463,29 @@ static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc,
        } while (slot_cnt);
 }
 
+static inline void
+iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
+                        dma_addr_t addr, unsigned char coef)
+{
+       int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
+       struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter;
+       struct iop13xx_adma_src *src;
+       int i = 0;
+
+       do {
+               iter = iop_hw_desc_slot_idx(hw_desc, i);
+               src = &iter->src[src_idx];
+               src->src_addr = addr;
+               src->pq_upper_src_addr = 0;
+               src->pq_dmlt = coef;
+               slot_cnt -= slots_per_op;
+               if (slot_cnt) {
+                       i += slots_per_op;
+                       addr += IOP_ADMA_PQ_MAX_BYTE_COUNT;
+               }
+       } while (slot_cnt);
+}
+
 static inline void
 iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
        struct iop_adma_chan *chan)
@@ -399,6 +497,15 @@ iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
 }
 
 #define iop_desc_set_zero_sum_src_addr iop_desc_set_xor_src_addr
+#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr
+
+static inline void
+iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx,
+                             dma_addr_t *src)
+{
+       iop_desc_set_xor_src_addr(desc, pq_idx, src[pq_idx]);
+       iop_desc_set_xor_src_addr(desc, pq_idx+1, src[pq_idx+1]);
+}
 
 static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc,
                                        u32 next_desc_addr)
@@ -428,18 +535,20 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
        hw_desc->block_fill_data = val;
 }
 
-static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
+static inline enum sum_check_flags
+iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
 {
        struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
        struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
        struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field;
+       enum sum_check_flags flags;
 
        BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result));
 
-       if (desc_ctrl.pq_xfer_en)
-               return byte_count.zero_result_err_q;
-       else
-               return byte_count.zero_result_err;
+       flags = byte_count.zero_result_err_q << SUM_CHECK_Q;
+       flags |= byte_count.zero_result_err << SUM_CHECK_P;
+
+       return flags;
 }
 
 static inline void iop_chan_append(struct iop_adma_chan *chan)
index bee42c6..5c147fb 100644 (file)
@@ -477,10 +477,8 @@ void __init iop13xx_platform_init(void)
                        plat_data = &iop13xx_adma_0_data;
                        dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
                        dma_cap_set(DMA_XOR, plat_data->cap_mask);
-                       dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
-                       dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
+                       dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
                        dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
-                       dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
                        dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
                        break;
                case IOP13XX_INIT_ADMA_1:
@@ -489,10 +487,8 @@ void __init iop13xx_platform_init(void)
                        plat_data = &iop13xx_adma_1_data;
                        dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
                        dma_cap_set(DMA_XOR, plat_data->cap_mask);
-                       dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
-                       dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
+                       dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
                        dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
-                       dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
                        dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
                        break;
                case IOP13XX_INIT_ADMA_2:
@@ -501,14 +497,11 @@ void __init iop13xx_platform_init(void)
                        plat_data = &iop13xx_adma_2_data;
                        dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
                        dma_cap_set(DMA_XOR, plat_data->cap_mask);
-                       dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
-                       dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
+                       dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
                        dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
-                       dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
                        dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
-                       dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask);
-                       dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask);
-                       dma_cap_set(DMA_PQ_ZERO_SUM, plat_data->cap_mask);
+                       dma_cap_set(DMA_PQ, plat_data->cap_mask);
+                       dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask);
                        break;
                }
        }
index 89c992b..a6f8eab 100644 (file)
@@ -21,6 +21,11 @@ config CPU_PXA930
 
 config CPU_PXA935
        bool "PXA935 (codename Tavor-P65)"
+       select CPU_PXA930
+
+config CPU_PXA950
+       bool "PXA950 (codename Tavor-PV2)"
+       select CPU_PXA930
 
 endmenu
 
@@ -79,6 +84,12 @@ config MACH_MP900C
        bool "Nec Mobilepro 900/c"
        select PXA25x
 
+config MACH_BALLOON3
+       bool "Balloon 3 board"
+       select PXA27x
+       select IWMMXT
+       select PXA_HAVE_BOARD_IRQS
+
 config ARCH_PXA_IDP
        bool "Accelent Xscale IDP"
        select PXA25x
@@ -371,6 +382,15 @@ config MACH_PALMTE2
          Say Y here if you intend to run this kernel on a Palm Tungsten|E2
          handheld computer.
 
+config MACH_PALMTC
+       bool "Palm Tungsten|C"
+       default y
+       depends on ARCH_PXA_PALM
+       select PXA25x
+       help
+         Say Y here if you intend to run this kernel on a Palm Tungsten|C
+         handheld computer.
+
 config MACH_PALMT5
        bool "Palm Tungsten|T5"
        default y
@@ -458,6 +478,7 @@ config PXA_EZX
        select PXA27x
        select IWMMXT
        select HAVE_PWM
+       select PXA_HAVE_BOARD_IRQS
 
 config MACH_EZX_A780
        bool "Motorola EZX A780"
@@ -489,6 +510,21 @@ config MACH_EZX_E2
        default y
        depends on PXA_EZX
 
+config MACH_XCEP
+       bool "Iskratel Electronics XCEP"
+       select PXA25x
+       select MTD
+       select MTD_PARTITIONS
+       select MTD_PHYSMAP
+       select MTD_CFI_INTELEXT
+       select MTD_CFI
+       select MTD_CHAR
+       select SMC91X
+       select PXA_SSP
+       help
+         PXA255 based Single Board Computer with SMC 91C111 ethernet chip and 64 MB of flash.
+         Tuned for usage in Libera instruments for particle accelerators.
+
 endmenu
 
 config PXA25x
index d4c6122..f10e152 100644 (file)
@@ -31,6 +31,7 @@ obj-$(CONFIG_GUMSTIX_AM300EPD)        += am300epd.o
 obj-$(CONFIG_ARCH_LUBBOCK)     += lubbock.o
 obj-$(CONFIG_MACH_LOGICPD_PXA270) += lpd270.o
 obj-$(CONFIG_MACH_MAINSTONE)   += mainstone.o
+obj-$(CONFIG_MACH_BALLOON3)    += balloon3.o
 obj-$(CONFIG_MACH_MP900C)      += mp900.o
 obj-$(CONFIG_ARCH_PXA_IDP)     += idp.o
 obj-$(CONFIG_MACH_TRIZEPS4)    += trizeps4.o
@@ -58,6 +59,7 @@ obj-$(CONFIG_MACH_E750)               += e750.o
 obj-$(CONFIG_MACH_E400)                += e400.o
 obj-$(CONFIG_MACH_E800)                += e800.o
 obj-$(CONFIG_MACH_PALMTE2)     += palmte2.o
+obj-$(CONFIG_MACH_PALMTC)      += palmtc.o
 obj-$(CONFIG_MACH_PALMT5)      += palmt5.o
 obj-$(CONFIG_MACH_PALMTX)      += palmtx.o
 obj-$(CONFIG_MACH_PALMLD)      += palmld.o
@@ -78,6 +80,8 @@ obj-$(CONFIG_MACH_ARMCORE)      += cm-x2xx.o cm-x255.o cm-x270.o
 obj-$(CONFIG_MACH_CM_X300)      += cm-x300.o
 obj-$(CONFIG_PXA_EZX)           += ezx.o
 
+obj-$(CONFIG_MACH_XCEP)         += xcep.o
+
 obj-$(CONFIG_MACH_INTELMOTE2)   += imote2.o
 obj-$(CONFIG_MACH_STARGATE2)   += stargate2.o
 obj-$(CONFIG_MACH_CSB726)      += csb726.o
diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c
new file mode 100644 (file)
index 0000000..f23138b
--- /dev/null
@@ -0,0 +1,361 @@
+/*
+ *  linux/arch/arm/mach-pxa/balloon3.c
+ *
+ *  Support for Balloonboard.org Balloon3 board.
+ *
+ *  Author:    Nick Bane, Wookey, Jonathan McDowell
+ *  Created:   June, 2006
+ *  Copyright: Toby Churchill Ltd
+ *  Derived from mainstone.c, by Nico Pitre
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/sysdev.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/bitops.h>
+#include <linux/fb.h>
+#include <linux/gpio.h>
+#include <linux/ioport.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/types.h>
+
+#include <asm/setup.h>
+#include <asm/mach-types.h>
+#include <asm/irq.h>
+#include <asm/sizes.h>
+
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+#include <asm/mach/irq.h>
+#include <asm/mach/flash.h>
+
+#include <mach/pxa27x.h>
+#include <mach/balloon3.h>
+#include <mach/audio.h>
+#include <mach/pxafb.h>
+#include <mach/mmc.h>
+#include <mach/udc.h>
+#include <mach/pxa27x-udc.h>
+#include <mach/irda.h>
+#include <mach/ohci.h>
+
+#include <plat/i2c.h>
+
+#include "generic.h"
+#include "devices.h"
+
+static unsigned long balloon3_irq_enabled;
+
+static unsigned long balloon3_features_present =
+               (1 << BALLOON3_FEATURE_OHCI) | (1 << BALLOON3_FEATURE_CF) |
+               (1 << BALLOON3_FEATURE_AUDIO) |
+               (1 << BALLOON3_FEATURE_TOPPOLY);
+
+int balloon3_has(enum balloon3_features feature)
+{
+       return (balloon3_features_present & (1 << feature)) ? 1 : 0;
+}
+EXPORT_SYMBOL_GPL(balloon3_has);
+
+int __init parse_balloon3_features(char *arg)
+{
+       if (!arg)
+               return 0;
+
+       return strict_strtoul(arg, 0, &balloon3_features_present);
+}
+early_param("balloon3_features", parse_balloon3_features);
+
+static void balloon3_mask_irq(unsigned int irq)
+{
+       int balloon3_irq = (irq - BALLOON3_IRQ(0));
+       balloon3_irq_enabled &= ~(1 << balloon3_irq);
+       __raw_writel(~balloon3_irq_enabled, BALLOON3_INT_CONTROL_REG);
+}
+
+static void balloon3_unmask_irq(unsigned int irq)
+{
+       int balloon3_irq = (irq - BALLOON3_IRQ(0));
+       balloon3_irq_enabled |= (1 << balloon3_irq);
+       __raw_writel(~balloon3_irq_enabled, BALLOON3_INT_CONTROL_REG);
+}
+
+static struct irq_chip balloon3_irq_chip = {
+       .name           = "FPGA",
+       .ack            = balloon3_mask_irq,
+       .mask           = balloon3_mask_irq,
+       .unmask         = balloon3_unmask_irq,
+};
+
+static void balloon3_irq_handler(unsigned int irq, struct irq_desc *desc)
+{
+       unsigned long pending = __raw_readl(BALLOON3_INT_CONTROL_REG) &
+                                       balloon3_irq_enabled;
+
+       do {
+               /* clear useless edge notification */
+               if (desc->chip->ack)
+                       desc->chip->ack(BALLOON3_AUX_NIRQ);
+               while (pending) {
+                       irq = BALLOON3_IRQ(0) + __ffs(pending);
+                       generic_handle_irq(irq);
+                       pending &= pending - 1;
+               }
+               pending = __raw_readl(BALLOON3_INT_CONTROL_REG) &
+                               balloon3_irq_enabled;
+       } while (pending);
+}
+
+static void __init balloon3_init_irq(void)
+{
+       int irq;
+
+       pxa27x_init_irq();
+       /* setup extra Balloon3 irqs */
+       for (irq = BALLOON3_IRQ(0); irq <= BALLOON3_IRQ(7); irq++) {
+               set_irq_chip(irq, &balloon3_irq_chip);
+               set_irq_handler(irq, handle_level_irq);
+               set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
+       }
+
+       set_irq_chained_handler(BALLOON3_AUX_NIRQ, balloon3_irq_handler);
+       set_irq_type(BALLOON3_AUX_NIRQ, IRQ_TYPE_EDGE_FALLING);
+
+       pr_debug("%s: chained handler installed - irq %d automatically "
+               "enabled\n", __func__, BALLOON3_AUX_NIRQ);
+}
+
+static void balloon3_backlight_power(int on)
+{
+       pr_debug("%s: power is %s\n", __func__, on ? "on" : "off");
+       gpio_set_value(BALLOON3_GPIO_RUN_BACKLIGHT, on);
+}
+
+static unsigned long balloon3_lcd_pin_config[] = {
+       /* LCD - 16bpp Active TFT */
+       GPIO58_LCD_LDD_0,
+       GPIO59_LCD_LDD_1,
+       GPIO60_LCD_LDD_2,
+       GPIO61_LCD_LDD_3,
+       GPIO62_LCD_LDD_4,
+       GPIO63_LCD_LDD_5,
+       GPIO64_LCD_LDD_6,
+       GPIO65_LCD_LDD_7,
+       GPIO66_LCD_LDD_8,
+       GPIO67_LCD_LDD_9,
+       GPIO68_LCD_LDD_10,
+       GPIO69_LCD_LDD_11,
+       GPIO70_LCD_LDD_12,
+       GPIO71_LCD_LDD_13,
+       GPIO72_LCD_LDD_14,
+       GPIO73_LCD_LDD_15,
+       GPIO74_LCD_FCLK,
+       GPIO75_LCD_LCLK,
+       GPIO76_LCD_PCLK,
+       GPIO77_LCD_BIAS,
+
+       GPIO99_GPIO,            /* Backlight */
+};
+
+static struct pxafb_mode_info balloon3_lcd_modes[] = {
+       {
+               .pixclock               = 38000,
+               .xres                   = 480,
+               .yres                   = 640,
+               .bpp                    = 16,
+               .hsync_len              = 8,
+               .left_margin            = 8,
+               .right_margin           = 8,
+               .vsync_len              = 2,
+               .upper_margin           = 4,
+               .lower_margin           = 5,
+               .sync                   = 0,
+       },
+};
+
+static struct pxafb_mach_info balloon3_pxafb_info = {
+       .modes                  = balloon3_lcd_modes,
+       .num_modes              = ARRAY_SIZE(balloon3_lcd_modes),
+       .lcd_conn               = LCD_COLOR_TFT_16BPP | LCD_PCLK_EDGE_FALL,
+       .pxafb_backlight_power  = balloon3_backlight_power,
+};
+
+static unsigned long balloon3_mmc_pin_config[] = {
+       GPIO32_MMC_CLK,
+       GPIO92_MMC_DAT_0,
+       GPIO109_MMC_DAT_1,
+       GPIO110_MMC_DAT_2,
+       GPIO111_MMC_DAT_3,
+       GPIO112_MMC_CMD,
+};
+
+static void balloon3_mci_setpower(struct device *dev, unsigned int vdd)
+{
+       struct pxamci_platform_data *p_d = dev->platform_data;
+
+       if ((1 << vdd) & p_d->ocr_mask) {
+               pr_debug("%s: on\n", __func__);
+               /* FIXME something to prod here? */
+       } else {
+               pr_debug("%s: off\n", __func__);
+               /* FIXME something to prod here? */
+       }
+}
+
+static struct pxamci_platform_data balloon3_mci_platform_data = {
+       .ocr_mask       = MMC_VDD_32_33|MMC_VDD_33_34,
+       .setpower       = balloon3_mci_setpower,
+};
+
+static int balloon3_udc_is_connected(void)
+{
+       pr_debug("%s: udc connected\n", __func__);
+       return 1;
+}
+
+static void balloon3_udc_command(int cmd)
+{
+       switch (cmd) {
+       case PXA2XX_UDC_CMD_CONNECT:
+               UP2OCR |= (UP2OCR_DPPUE + UP2OCR_DPPUBE);
+               pr_debug("%s: connect\n", __func__);
+               break;
+       case PXA2XX_UDC_CMD_DISCONNECT:
+               UP2OCR &= ~UP2OCR_DPPUE;
+               pr_debug("%s: disconnect\n", __func__);
+               break;
+       }
+}
+
+static struct pxa2xx_udc_mach_info balloon3_udc_info = {
+       .udc_is_connected = balloon3_udc_is_connected,
+       .udc_command      = balloon3_udc_command,
+};
+
+static struct pxaficp_platform_data balloon3_ficp_platform_data = {
+       .transceiver_cap  = IR_SIRMODE | IR_FIRMODE | IR_OFF,
+};
+
+static unsigned long balloon3_ohci_pin_config[] = {
+       GPIO88_USBH1_PWR,
+       GPIO89_USBH1_PEN,
+};
+
+static struct pxaohci_platform_data balloon3_ohci_platform_data = {
+       .port_mode      = PMM_PERPORT_MODE,
+       .flags          = ENABLE_PORT_ALL | POWER_CONTROL_LOW | POWER_SENSE_LOW,
+};
+
+static unsigned long balloon3_pin_config[] __initdata = {
+       /* Select BTUART 'COM1/ttyS0' as IO option for pins 42/43/44/45 */
+       GPIO42_BTUART_RXD,
+       GPIO43_BTUART_TXD,
+       GPIO44_BTUART_CTS,
+       GPIO45_BTUART_RTS,
+
+       /* Wakeup GPIO */
+       GPIO1_GPIO | WAKEUP_ON_EDGE_BOTH,
+
+       /* NAND & IDLE LED GPIOs */
+       GPIO9_GPIO,
+       GPIO10_GPIO,
+};
+
+static struct gpio_led balloon3_gpio_leds[] = {
+       {
+               .name                   = "balloon3:green:idle",
+               .default_trigger        = "heartbeat",
+               .gpio                   = BALLOON3_GPIO_LED_IDLE,
+               .active_low             = 1,
+       },
+       {
+               .name                   = "balloon3:green:nand",
+               .default_trigger        = "nand-disk",
+               .gpio                   = BALLOON3_GPIO_LED_NAND,
+               .active_low             = 1,
+       },
+};
+
+static struct gpio_led_platform_data balloon3_gpio_leds_platform_data = {
+       .leds           = balloon3_gpio_leds,
+       .num_leds       = ARRAY_SIZE(balloon3_gpio_leds),
+};
+
+static struct platform_device balloon3led_device = {
+       .name   = "leds-gpio",
+       .id     = -1,
+       .dev    = {
+               .platform_data  = &balloon3_gpio_leds_platform_data,
+       },
+};
+
+static void __init balloon3_init(void)
+{
+       pr_info("Initialising Balloon3\n");
+
+       /* system bus arbiter setting
+        * - Core_Park
+        * - LCD_wt:DMA_wt:CORE_Wt = 2:3:4
+        */
+       ARB_CNTRL = ARB_CORE_PARK | 0x234;
+
+       pxa_set_i2c_info(NULL);
+       if (balloon3_has(BALLOON3_FEATURE_AUDIO))
+               pxa_set_ac97_info(NULL);
+
+       if (balloon3_has(BALLOON3_FEATURE_TOPPOLY)) {
+               pxa2xx_mfp_config(ARRAY_AND_SIZE(balloon3_lcd_pin_config));
+               gpio_request(BALLOON3_GPIO_RUN_BACKLIGHT,
+                       "LCD Backlight Power");
+               gpio_direction_output(BALLOON3_GPIO_RUN_BACKLIGHT, 1);
+               set_pxa_fb_info(&balloon3_pxafb_info);
+       }
+
+       if (balloon3_has(BALLOON3_FEATURE_MMC)) {
+               pxa2xx_mfp_config(ARRAY_AND_SIZE(balloon3_mmc_pin_config));
+               pxa_set_mci_info(&balloon3_mci_platform_data);
+       }
+       pxa_set_ficp_info(&balloon3_ficp_platform_data);
+       if (balloon3_has(BALLOON3_FEATURE_OHCI)) {
+               pxa2xx_mfp_config(ARRAY_AND_SIZE(balloon3_ohci_pin_config));
+               pxa_set_ohci_info(&balloon3_ohci_platform_data);
+       }
+       pxa_set_udc_info(&balloon3_udc_info);
+
+       pxa2xx_mfp_config(ARRAY_AND_SIZE(balloon3_pin_config));
+
+       platform_device_register(&balloon3led_device);
+}
+
+static struct map_desc balloon3_io_desc[] __initdata = {
+       {       /* CPLD/FPGA */
+               .virtual        =  BALLOON3_FPGA_VIRT,
+               .pfn            = __phys_to_pfn(BALLOON3_FPGA_PHYS),
+               .length         = BALLOON3_FPGA_LENGTH,
+               .type           = MT_DEVICE,
+       },
+};
+
+static void __init balloon3_map_io(void)
+{
+       pxa_map_io();
+       iotable_init(balloon3_io_desc, ARRAY_SIZE(balloon3_io_desc));
+}
+
+MACHINE_START(BALLOON3, "Balloon3")
+       /* Maintainer: Nick Bane. */
+       .phys_io        = 0x40000000,
+       .io_pg_offst    = (io_p2v(0x40000000) >> 18) & 0xfffc,
+       .map_io         = balloon3_map_io,
+       .init_irq       = balloon3_init_irq,
+       .timer          = &pxa_timer,
+       .init_machine   = balloon3_init,
+       .boot_params    = PHYS_OFFSET + 0x100,
+MACHINE_END
index 5599bce..978a366 100644 (file)
@@ -12,7 +12,6 @@ struct clk {
        unsigned int            cken;
        unsigned int            delay;
        unsigned int            enabled;
-       struct clk              *other;
 };
 
 #define INIT_CLKREG(_clk,_devname,_conname)            \
index 1d2cec2..eea78b6 100644 (file)
 #include <linux/sysdev.h>
 #include <linux/irq.h>
 #include <linux/gpio.h>
+#include <linux/delay.h>
 
 #include <linux/rtc-v3020.h>
 #include <video/mbxfb.h>
 
+#include <linux/spi/spi.h>
+#include <linux/spi/libertas_spi.h>
+
 #include <mach/pxa27x.h>
 #include <mach/ohci.h>
 #include <mach/mmc.h>
+#include <mach/pxa2xx_spi.h>
 
 #include "generic.h"
 
 /* MMC power enable */
 #define GPIO105_MMC_POWER      (105)
 
+/* WLAN GPIOS */
+#define GPIO19_WLAN_STRAP      (19)
+#define GPIO102_WLAN_RST       (102)
+
 static unsigned long cmx270_pin_config[] = {
        /* AC'97 */
        GPIO28_AC97_BITCLK,
@@ -94,8 +103,8 @@ static unsigned long cmx270_pin_config[] = {
        GPIO26_SSP1_RXD,
 
        /* SSP2 */
-       GPIO19_SSP2_SCLK,
-       GPIO14_SSP2_SFRM,
+       GPIO19_GPIO,    /* SSP2 clock is used as GPIO for Libertas pin-strap */
+       GPIO14_GPIO,
        GPIO87_SSP2_TXD,
        GPIO88_SSP2_RXD,
 
@@ -123,6 +132,7 @@ static unsigned long cmx270_pin_config[] = {
        GPIO0_GPIO      | WAKEUP_ON_EDGE_BOTH,
        GPIO105_GPIO    | MFP_LPM_DRIVE_HIGH,   /* MMC/SD power */
        GPIO53_GPIO,                            /* PC card reset */
+       GPIO102_GPIO,                           /* WLAN reset */
 
        /* NAND controls */
        GPIO11_GPIO     | MFP_LPM_DRIVE_HIGH,   /* NAND CE# */
@@ -131,6 +141,7 @@ static unsigned long cmx270_pin_config[] = {
        /* interrupts */
        GPIO10_GPIO,    /* DM9000 interrupt */
        GPIO83_GPIO,    /* MMC card detect */
+       GPIO95_GPIO,    /* WLAN interrupt */
 };
 
 /* V3020 RTC */
@@ -271,64 +282,114 @@ static inline void cmx270_init_ohci(void) {}
 #endif
 
 #if defined(CONFIG_MMC) || defined(CONFIG_MMC_MODULE)
-static int cmx270_mci_init(struct device *dev,
-                          irq_handler_t cmx270_detect_int,
-                          void *data)
+static struct pxamci_platform_data cmx270_mci_platform_data = {
+       .ocr_mask               = MMC_VDD_32_33|MMC_VDD_33_34,
+       .gpio_card_detect       = GPIO83_MMC_IRQ,
+       .gpio_card_ro           = -1,
+       .gpio_power             = GPIO105_MMC_POWER,
+       .gpio_power_invert      = 1,
+};
+
+static void __init cmx270_init_mmc(void)
 {
-       int err;
+       pxa_set_mci_info(&cmx270_mci_platform_data);
+}
+#else
+static inline void cmx270_init_mmc(void) {}
+#endif
+
+#if defined(CONFIG_SPI_PXA2XX) || defined(CONFIG_SPI_PXA2XX_MODULE)
+static struct pxa2xx_spi_master cm_x270_spi_info = {
+       .num_chipselect = 1,
+       .enable_dma     = 1,
+};
+
+static struct pxa2xx_spi_chip cm_x270_libertas_chip = {
+       .rx_threshold   = 1,
+       .tx_threshold   = 1,
+       .timeout        = 1000,
+       .gpio_cs        = 14,
+};
+
+static unsigned long cm_x270_libertas_pin_config[] = {
+       /* SSP2 */
+       GPIO19_SSP2_SCLK,
+       GPIO14_GPIO,
+       GPIO87_SSP2_TXD,
+       GPIO88_SSP2_RXD,
+
+};
 
-       err = gpio_request(GPIO105_MMC_POWER, "MMC/SD power");
-       if (err) {
-               dev_warn(dev, "power gpio unavailable\n");
+static int cm_x270_libertas_setup(struct spi_device *spi)
+{
+       int err = gpio_request(GPIO19_WLAN_STRAP, "WLAN STRAP");
+       if (err)
                return err;
-       }
 
-       gpio_direction_output(GPIO105_MMC_POWER, 0);
+       err = gpio_request(GPIO102_WLAN_RST, "WLAN RST");
+       if (err)
+               goto err_free_strap;
 
-       err = request_irq(CMX270_MMC_IRQ, cmx270_detect_int,
-                         IRQF_DISABLED | IRQF_TRIGGER_FALLING,
-                         "MMC card detect", data);
-       if (err) {
-               gpio_free(GPIO105_MMC_POWER);
-               dev_err(dev, "cmx270_mci_init: MMC/SD: can't"
-                       " request MMC card detect IRQ\n");
-       }
+       err = gpio_direction_output(GPIO102_WLAN_RST, 0);
+       if (err)
+               goto err_free_strap;
+       msleep(100);
+
+       err = gpio_direction_output(GPIO19_WLAN_STRAP, 1);
+       if (err)
+               goto err_free_strap;
+       msleep(100);
+
+       pxa2xx_mfp_config(ARRAY_AND_SIZE(cm_x270_libertas_pin_config));
+
+       gpio_set_value(GPIO102_WLAN_RST, 1);
+       msleep(100);
+
+       spi->bits_per_word = 16;
+       spi_setup(spi);
+
+       return 0;
+
+err_free_strap:
+       gpio_free(GPIO19_WLAN_STRAP);
 
        return err;
 }
 
-static void cmx270_mci_setpower(struct device *dev, unsigned int vdd)
+static int cm_x270_libertas_teardown(struct spi_device *spi)
 {
-       struct pxamci_platform_data *p_d = dev->platform_data;
-
-       if ((1 << vdd) & p_d->ocr_mask) {
-               dev_dbg(dev, "power on\n");
-               gpio_set_value(GPIO105_MMC_POWER, 0);
-       } else {
-               gpio_set_value(GPIO105_MMC_POWER, 1);
-               dev_dbg(dev, "power off\n");
-       }
-}
+       gpio_set_value(GPIO102_WLAN_RST, 0);
+       gpio_free(GPIO102_WLAN_RST);
+       gpio_free(GPIO19_WLAN_STRAP);
 
-static void cmx270_mci_exit(struct device *dev, void *data)
-{
-       free_irq(CMX270_MMC_IRQ, data);
-       gpio_free(GPIO105_MMC_POWER);
+       return 0;
 }
 
-static struct pxamci_platform_data cmx270_mci_platform_data = {
-       .ocr_mask       = MMC_VDD_32_33|MMC_VDD_33_34,
-       .init           = cmx270_mci_init,
-       .setpower       = cmx270_mci_setpower,
-       .exit           = cmx270_mci_exit,
+struct libertas_spi_platform_data cm_x270_libertas_pdata = {
+       .use_dummy_writes       = 1,
+       .setup                  = cm_x270_libertas_setup,
+       .teardown               = cm_x270_libertas_teardown,
 };
 
-static void __init cmx270_init_mmc(void)
+static struct spi_board_info cm_x270_spi_devices[] __initdata = {
+       {
+               .modalias               = "libertas_spi",
+               .max_speed_hz           = 13000000,
+               .bus_num                = 2,
+               .irq                    = gpio_to_irq(95),
+               .chip_select            = 0,
+               .controller_data        = &cm_x270_libertas_chip,
+               .platform_data          = &cm_x270_libertas_pdata,
+       },
+};
+
+static void __init cmx270_init_spi(void)
 {
-       pxa_set_mci_info(&cmx270_mci_platform_data);
+       pxa2xx_set_spi_info(2, &cm_x270_spi_info);
+       spi_register_board_info(ARRAY_AND_SIZE(cm_x270_spi_devices));
 }
 #else
-static inline void cmx270_init_mmc(void) {}
+static inline void cmx270_init_spi(void) {}
 #endif
 
 void __init cmx270_init(void)
@@ -343,4 +404,5 @@ void __init cmx270_init(void)
        cmx270_init_mmc();
        cmx270_init_ohci();
        cmx270_init_2700G();
+       cmx270_init_spi();
 }
index 465da26..aac2cda 100644 (file)
@@ -306,68 +306,21 @@ static void cm_x300_mci_exit(struct device *dev, void *data)
 }
 
 static struct pxamci_platform_data cm_x300_mci_platform_data = {
-       .detect_delay   = 20,
-       .ocr_mask       = MMC_VDD_32_33|MMC_VDD_33_34,
-       .init           = cm_x300_mci_init,
-       .exit           = cm_x300_mci_exit,
+       .detect_delay           = 20,
+       .ocr_mask               = MMC_VDD_32_33|MMC_VDD_33_34,
+       .init                   = cm_x300_mci_init,
+       .exit                   = cm_x300_mci_exit,
+       .gpio_card_detect       = -1,
+       .gpio_card_ro           = -1,
+       .gpio_power             = -1,
 };
 
-static int cm_x300_mci2_ro(struct device *dev)
-{
-       return gpio_get_value(GPIO85_MMC2_WP);
-}
-
-static int cm_x300_mci2_init(struct device *dev,
-                            irq_handler_t cm_x300_detect_int,
-                            void *data)
-{
-       int err;
-
-       /*
-        * setup GPIO for CM-X300 MMC controller
-        */
-       err = gpio_request(GPIO82_MMC2_IRQ, "mmc card detect");
-       if (err)
-               goto err_request_cd;
-       gpio_direction_input(GPIO82_MMC2_IRQ);
-
-       err = gpio_request(GPIO85_MMC2_WP, "mmc write protect");
-       if (err)
-               goto err_request_wp;
-       gpio_direction_input(GPIO85_MMC2_WP);
-
-       err = request_irq(CM_X300_MMC2_IRQ, cm_x300_detect_int,
-                         IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
-                         "MMC card detect", data);
-       if (err) {
-               printk(KERN_ERR "%s: MMC/SD/SDIO: "
-                               "can't request card detect IRQ\n", __func__);
-               goto err_request_irq;
-       }
-
-       return 0;
-
-err_request_irq:
-       gpio_free(GPIO85_MMC2_WP);
-err_request_wp:
-       gpio_free(GPIO82_MMC2_IRQ);
-err_request_cd:
-       return err;
-}
-
-static void cm_x300_mci2_exit(struct device *dev, void *data)
-{
-       free_irq(CM_X300_MMC2_IRQ, data);
-       gpio_free(GPIO82_MMC2_IRQ);
-       gpio_free(GPIO85_MMC2_WP);
-}
-
 static struct pxamci_platform_data cm_x300_mci2_platform_data = {
-       .detect_delay   = 20,
-       .ocr_mask       = MMC_VDD_32_33|MMC_VDD_33_34,
-       .init           = cm_x300_mci2_init,
-       .exit           = cm_x300_mci2_exit,
-       .get_ro         = cm_x300_mci2_ro,
+       .detect_delay           = 20,
+       .ocr_mask               = MMC_VDD_32_33|MMC_VDD_33_34,
+       .gpio_card_detect       = GPIO82_MMC2_IRQ,
+       .gpio_card_ro           = GPIO85_MMC2_WP,
+       .gpio_power             = -1,
 };
 
 static void __init cm_x300_init_mmc(void)
index 7c9c34c..37c239c 100644 (file)
@@ -172,6 +172,7 @@ void __init colibri_pxa300_init(void)
 {
        colibri_pxa300_init_eth();
        colibri_pxa300_init_ohci();
+       colibri_pxa3xx_init_nand();
        colibri_pxa300_init_lcd();
        colibri_pxa3xx_init_lcd(mfp_to_gpio(GPIO39_GPIO));
        colibri_pxa310_init_ac97();
index a18d37b..4945728 100644 (file)
@@ -164,15 +164,48 @@ static inline void __init colibri_pxa320_init_ac97(void)
 static inline void colibri_pxa320_init_ac97(void) {}
 #endif
 
+/*
+ * The following configuration is verified to work with the Toradex Orchid
+ * carrier board
+ */
+static mfp_cfg_t colibri_pxa320_uart_pin_config[] __initdata = {
+       /* UART 1 configuration (may be set by bootloader) */
+       GPIO99_UART1_CTS,
+       GPIO104_UART1_RTS,
+       GPIO97_UART1_RXD,
+       GPIO98_UART1_TXD,
+       GPIO101_UART1_DTR,
+       GPIO103_UART1_DSR,
+       GPIO100_UART1_DCD,
+       GPIO102_UART1_RI,
+
+       /* UART 2 configuration */
+       GPIO109_UART2_CTS,
+       GPIO112_UART2_RTS,
+       GPIO110_UART2_RXD,
+       GPIO111_UART2_TXD,
+
+       /* UART 3 configuration */
+       GPIO30_UART3_RXD,
+       GPIO31_UART3_TXD,
+};
+
+static void __init colibri_pxa320_init_uart(void)
+{
+       pxa3xx_mfp_config(ARRAY_AND_SIZE(colibri_pxa320_uart_pin_config));
+}
+
 void __init colibri_pxa320_init(void)
 {
        colibri_pxa320_init_eth();
        colibri_pxa320_init_ohci();
+       colibri_pxa3xx_init_nand();
        colibri_pxa320_init_lcd();
        colibri_pxa3xx_init_lcd(mfp_to_gpio(GPIO49_GPIO));
        colibri_pxa320_init_ac97();
        colibri_pxa3xx_init_mmc(ARRAY_AND_SIZE(colibri_pxa320_mmc_pin_config),
                                mfp_to_gpio(MFP_PIN_GPIO28));
+       colibri_pxa320_init_uart();
 }
 
 MACHINE_START(COLIBRI320, "Toradex Colibri PXA320")
index ea34e34..efebaf4 100644 (file)
@@ -25,6 +25,7 @@
 #include <mach/colibri.h>
 #include <mach/mmc.h>
 #include <mach/pxafb.h>
+#include <mach/pxa3xx_nand.h>
 
 #include "generic.h"
 #include "devices.h"
@@ -95,10 +96,13 @@ static void colibri_pxa3xx_mci_exit(struct device *dev, void *data)
 }
 
 static struct pxamci_platform_data colibri_pxa3xx_mci_platform_data = {
-       .detect_delay   = 20,
-       .ocr_mask       = MMC_VDD_32_33 | MMC_VDD_33_34,
-       .init           = colibri_pxa3xx_mci_init,
-       .exit           = colibri_pxa3xx_mci_exit,
+       .detect_delay           = 20,
+       .ocr_mask               = MMC_VDD_32_33 | MMC_VDD_33_34,
+       .init                   = colibri_pxa3xx_mci_init,
+       .exit                   = colibri_pxa3xx_mci_exit,
+       .gpio_card_detect       = -1,
+       .gpio_card_ro           = -1,
+       .gpio_power             = -1,
 };
 
 void __init colibri_pxa3xx_init_mmc(mfp_cfg_t *pins, int len, int detect_pin)
@@ -154,3 +158,43 @@ void __init colibri_pxa3xx_init_lcd(int bl_pin)
 }
 #endif
 
+#if defined(CONFIG_MTD_NAND_PXA3xx) || defined(CONFIG_MTD_NAND_PXA3xx_MODULE)
+static struct mtd_partition colibri_nand_partitions[] = {
+       {
+               .name        = "bootloader",
+               .offset      = 0,
+               .size        = SZ_512K,
+               .mask_flags  = MTD_WRITEABLE, /* force read-only */
+       },
+       {
+               .name        = "kernel",
+               .offset      = MTDPART_OFS_APPEND,
+               .size        = SZ_4M,
+               .mask_flags  = MTD_WRITEABLE, /* force read-only */
+       },
+       {
+               .name        = "reserved",
+               .offset      = MTDPART_OFS_APPEND,
+               .size        = SZ_1M,
+               .mask_flags  = MTD_WRITEABLE, /* force read-only */
+       },
+       {
+               .name        = "fs",
+               .offset      = MTDPART_OFS_APPEND,
+               .size        = MTDPART_SIZ_FULL,
+       },
+};
+
+static struct pxa3xx_nand_platform_data colibri_nand_info = {
+       .enable_arbiter = 1,
+       .keep_config    = 1,
+       .parts          = colibri_nand_partitions,
+       .nr_parts       = ARRAY_SIZE(colibri_nand_partitions),
+};
+
+void __init colibri_pxa3xx_init_nand(void)
+{
+       pxa3xx_set_nand_info(&colibri_nand_info);
+}
+#endif
+
index 5363e1a..b536b5a 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/spi/ads7846.h>
 #include <linux/spi/corgi_lcd.h>
 #include <linux/mtd/sharpsl.h>
+#include <linux/input/matrix_keypad.h>
 #include <video/w100fb.h>
 
 #include <asm/setup.h>
@@ -104,6 +105,28 @@ static unsigned long corgi_pin_config[] __initdata = {
        GPIO6_MMC_CLK,
        GPIO8_MMC_CS0,
 
+       /* GPIO Matrix Keypad */
+       GPIO66_GPIO,    /* column 0 */
+       GPIO67_GPIO,    /* column 1 */
+       GPIO68_GPIO,    /* column 2 */
+       GPIO69_GPIO,    /* column 3 */
+       GPIO70_GPIO,    /* column 4 */
+       GPIO71_GPIO,    /* column 5 */
+       GPIO72_GPIO,    /* column 6 */
+       GPIO73_GPIO,    /* column 7 */
+       GPIO74_GPIO,    /* column 8 */
+       GPIO75_GPIO,    /* column 9 */
+       GPIO76_GPIO,    /* column 10 */
+       GPIO77_GPIO,    /* column 11 */
+       GPIO58_GPIO,    /* row 0 */
+       GPIO59_GPIO,    /* row 1 */
+       GPIO60_GPIO,    /* row 2 */
+       GPIO61_GPIO,    /* row 3 */
+       GPIO62_GPIO,    /* row 4 */
+       GPIO63_GPIO,    /* row 5 */
+       GPIO64_GPIO,    /* row 6 */
+       GPIO65_GPIO,    /* row 7 */
+
        /* GPIO */
        GPIO9_GPIO,     /* CORGI_GPIO_nSD_DETECT */
        GPIO7_GPIO,     /* CORGI_GPIO_nSD_WP */
@@ -267,9 +290,115 @@ static struct platform_device corgifb_device = {
 /*
  * Corgi Keyboard Device
  */
+#define CORGI_KEY_CALENDER     KEY_F1
+#define CORGI_KEY_ADDRESS      KEY_F2
+#define CORGI_KEY_FN           KEY_F3
+#define CORGI_KEY_CANCEL       KEY_F4
+#define CORGI_KEY_OFF          KEY_SUSPEND
+#define CORGI_KEY_EXOK         KEY_F5
+#define CORGI_KEY_EXCANCEL     KEY_F6
+#define CORGI_KEY_EXJOGDOWN    KEY_F7
+#define CORGI_KEY_EXJOGUP      KEY_F8
+#define CORGI_KEY_JAP1         KEY_LEFTCTRL
+#define CORGI_KEY_JAP2         KEY_LEFTALT
+#define CORGI_KEY_MAIL         KEY_F10
+#define CORGI_KEY_OK           KEY_F11
+#define CORGI_KEY_MENU         KEY_F12
+
+static const uint32_t corgikbd_keymap[] = {
+       KEY(0, 1, KEY_1),
+       KEY(0, 2, KEY_3),
+       KEY(0, 3, KEY_5),
+       KEY(0, 4, KEY_6),
+       KEY(0, 5, KEY_7),
+       KEY(0, 6, KEY_9),
+       KEY(0, 7, KEY_0),
+       KEY(0, 8, KEY_BACKSPACE),
+       KEY(1, 1, KEY_2),
+       KEY(1, 2, KEY_4),
+       KEY(1, 3, KEY_R),
+       KEY(1, 4, KEY_Y),
+       KEY(1, 5, KEY_8),
+       KEY(1, 6, KEY_I),
+       KEY(1, 7, KEY_O),
+       KEY(1, 8, KEY_P),
+       KEY(2, 0, KEY_TAB),
+       KEY(2, 1, KEY_Q),
+       KEY(2, 2, KEY_E),
+       KEY(2, 3, KEY_T),
+       KEY(2, 4, KEY_G),
+       KEY(2, 5, KEY_U),
+       KEY(2, 6, KEY_J),
+       KEY(2, 7, KEY_K),
+       KEY(3, 0, CORGI_KEY_CALENDER),
+       KEY(3, 1, KEY_W),
+       KEY(3, 2, KEY_S),
+       KEY(3, 3, KEY_F),
+       KEY(3, 4, KEY_V),
+       KEY(3, 5, KEY_H),
+       KEY(3, 6, KEY_M),
+       KEY(3, 7, KEY_L),
+       KEY(3, 9, KEY_RIGHTSHIFT),
+       KEY(4, 0, CORGI_KEY_ADDRESS),
+       KEY(4, 1, KEY_A),
+       KEY(4, 2, KEY_D),
+       KEY(4, 3, KEY_C),
+       KEY(4, 4, KEY_B),
+       KEY(4, 5, KEY_N),
+       KEY(4, 6, KEY_DOT),
+       KEY(4, 8, KEY_ENTER),
+       KEY(4, 10, KEY_LEFTSHIFT),
+       KEY(5, 0, CORGI_KEY_MAIL),
+       KEY(5, 1, KEY_Z),
+       KEY(5, 2, KEY_X),
+       KEY(5, 3, KEY_MINUS),
+       KEY(5, 4, KEY_SPACE),
+       KEY(5, 5, KEY_COMMA),
+       KEY(5, 7, KEY_UP),
+       KEY(5, 11, CORGI_KEY_FN),
+       KEY(6, 0, KEY_SYSRQ),
+       KEY(6, 1, CORGI_KEY_JAP1),
+       KEY(6, 2, CORGI_KEY_JAP2),
+       KEY(6, 3, CORGI_KEY_CANCEL),
+       KEY(6, 4, CORGI_KEY_OK),
+       KEY(6, 5, CORGI_KEY_MENU),
+       KEY(6, 6, KEY_LEFT),
+       KEY(6, 7, KEY_DOWN),
+       KEY(6, 8, KEY_RIGHT),
+       KEY(7, 0, CORGI_KEY_OFF),
+       KEY(7, 1, CORGI_KEY_EXOK),
+       KEY(7, 2, CORGI_KEY_EXCANCEL),
+       KEY(7, 3, CORGI_KEY_EXJOGDOWN),
+       KEY(7, 4, CORGI_KEY_EXJOGUP),
+};
+
+static struct matrix_keymap_data corgikbd_keymap_data = {
+       .keymap         = corgikbd_keymap,
+       .keymap_size    = ARRAY_SIZE(corgikbd_keymap),
+};
+
+static const int corgikbd_row_gpios[] =
+               { 58, 59, 60, 61, 62, 63, 64, 65 };
+static const int corgikbd_col_gpios[] =
+               { 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77 };
+
+static struct matrix_keypad_platform_data corgikbd_pdata = {
+       .keymap_data            = &corgikbd_keymap_data,
+       .row_gpios              = corgikbd_row_gpios,
+       .col_gpios              = corgikbd_col_gpios,
+       .num_row_gpios          = ARRAY_SIZE(corgikbd_row_gpios),
+       .num_col_gpios          = ARRAY_SIZE(corgikbd_col_gpios),
+       .col_scan_delay_us      = 10,
+       .debounce_ms            = 10,
+       .wakeup                 = 1,
+};
+
 static struct platform_device corgikbd_device = {
-       .name           = "corgi-keyboard",
+       .name           = "matrix-keypad",
        .id             = -1,
+       .dev            = {
+               .platform_data = &corgikbd_pdata,
+       },
 };
 
 /*
@@ -307,111 +436,20 @@ static struct platform_device corgiled_device = {
  * The card detect interrupt isn't debounced so we delay it by 250ms
  * to give the card a chance to fully insert/eject.
  */
-static struct pxamci_platform_data corgi_mci_platform_data;
-
-static int corgi_mci_init(struct device *dev, irq_handler_t corgi_detect_int, void *data)
-{
-       int err;
-
-       err = gpio_request(CORGI_GPIO_nSD_DETECT, "nSD_DETECT");
-       if (err)
-               goto err_out;
-
-       err = gpio_request(CORGI_GPIO_nSD_WP, "nSD_WP");
-       if (err)
-               goto err_free_1;
-
-       err = gpio_request(CORGI_GPIO_SD_PWR, "SD_PWR");
-       if (err)
-               goto err_free_2;
-
-       gpio_direction_input(CORGI_GPIO_nSD_DETECT);
-       gpio_direction_input(CORGI_GPIO_nSD_WP);
-       gpio_direction_output(CORGI_GPIO_SD_PWR, 0);
-
-       corgi_mci_platform_data.detect_delay = msecs_to_jiffies(250);
-
-       err = request_irq(CORGI_IRQ_GPIO_nSD_DETECT, corgi_detect_int,
-                               IRQF_DISABLED | IRQF_TRIGGER_RISING |
-                               IRQF_TRIGGER_FALLING,
-                               "MMC card detect", data);
-       if (err) {
-               pr_err("%s: MMC/SD: can't request MMC card detect IRQ\n",
-                               __func__);
-               goto err_free_3;
-       }
-       return 0;
-
-err_free_3:
-       gpio_free(CORGI_GPIO_SD_PWR);
-err_free_2:
-       gpio_free(CORGI_GPIO_nSD_WP);
-err_free_1:
-       gpio_free(CORGI_GPIO_nSD_DETECT);
-err_out:
-       return err;
-}
-
-static void corgi_mci_setpower(struct device *dev, unsigned int vdd)
-{
-       struct pxamci_platform_data* p_d = dev->platform_data;
-
-       gpio_set_value(CORGI_GPIO_SD_PWR, ((1 << vdd) & p_d->ocr_mask));
-}
-
-static int corgi_mci_get_ro(struct device *dev)
-{
-       return gpio_get_value(CORGI_GPIO_nSD_WP);
-}
-
-static void corgi_mci_exit(struct device *dev, void *data)
-{
-       free_irq(CORGI_IRQ_GPIO_nSD_DETECT, data);
-       gpio_free(CORGI_GPIO_SD_PWR);
-       gpio_free(CORGI_GPIO_nSD_WP);
-       gpio_free(CORGI_GPIO_nSD_DETECT);
-}
-
 static struct pxamci_platform_data corgi_mci_platform_data = {
-       .ocr_mask       = MMC_VDD_32_33|MMC_VDD_33_34,
-       .init           = corgi_mci_init,
-       .get_ro         = corgi_mci_get_ro,
-       .setpower       = corgi_mci_setpower,
-       .exit           = corgi_mci_exit,
+       .ocr_mask               = MMC_VDD_32_33|MMC_VDD_33_34,
+       .gpio_card_detect       = -1,
+       .gpio_card_ro           = CORGI_GPIO_nSD_WP,
+       .gpio_power             = CORGI_GPIO_SD_PWR,
 };
 
 
 /*
  * Irda
  */
-static void corgi_irda_transceiver_mode(struct device *dev, int mode)
-{
-       gpio_set_value(CORGI_GPIO_IR_ON, mode & IR_OFF);
-       pxa2xx_transceiver_mode(dev, mode);
-}
-
-static int corgi_irda_startup(struct device *dev)
-{
-       int err;
-
-       err = gpio_request(CORGI_GPIO_IR_ON, "IR_ON");
-       if (err)
-               return err;
-
-       gpio_direction_output(CORGI_GPIO_IR_ON, 1);
-       return 0;
-}
-
-static void corgi_irda_shutdown(struct device *dev)
-{
-       gpio_free(CORGI_GPIO_IR_ON);
-}
-
 static struct pxaficp_platform_data corgi_ficp_platform_data = {
+       .gpio_pwdown            = CORGI_GPIO_IR_ON,
        .transceiver_cap        = IR_SIRMODE | IR_OFF,
-       .transceiver_mode       = corgi_irda_transceiver_mode,
-       .startup                = corgi_irda_startup,
-       .shutdown               = corgi_irda_shutdown,
 };
 
 
@@ -636,6 +674,7 @@ static void __init corgi_init(void)
        corgi_init_spi();
 
        pxa_set_udc_info(&udc_info);
+       corgi_mci_platform_data.detect_delay = msecs_to_jiffies(250);
        pxa_set_mci_info(&corgi_mci_platform_data);
        pxa_set_ficp_info(&corgi_ficp_platform_data);
        pxa_set_i2c_info(NULL);
index 7d3e1b4..79141f8 100644 (file)
@@ -130,61 +130,17 @@ static struct pxamci_platform_data csb726_mci_data;
 static int csb726_mci_init(struct device *dev,
                irq_handler_t detect, void *data)
 {
-       int err;
-
        csb726_mci_data.detect_delay = msecs_to_jiffies(500);
-
-       err = gpio_request(CSB726_GPIO_MMC_DETECT, "MMC detect");
-       if (err)
-               goto err_det_req;
-
-       err = gpio_direction_input(CSB726_GPIO_MMC_DETECT);
-       if (err)
-               goto err_det_dir;
-
-       err = gpio_request(CSB726_GPIO_MMC_RO, "MMC ro");
-       if (err)
-               goto err_ro_req;
-
-       err = gpio_direction_input(CSB726_GPIO_MMC_RO);
-       if (err)
-               goto err_ro_dir;
-
-       err = request_irq(gpio_to_irq(CSB726_GPIO_MMC_DETECT), detect,
-                       IRQF_DISABLED, "MMC card detect", data);
-       if (err)
-               goto err_irq;
-
        return 0;
-
-err_irq:
-err_ro_dir:
-       gpio_free(CSB726_GPIO_MMC_RO);
-err_ro_req:
-err_det_dir:
-       gpio_free(CSB726_GPIO_MMC_DETECT);
-err_det_req:
-       return err;
-}
-
-static int csb726_mci_get_ro(struct device *dev)
-{
-       return gpio_get_value(CSB726_GPIO_MMC_RO);
-}
-
-static void csb726_mci_exit(struct device *dev, void *data)
-{
-       free_irq(gpio_to_irq(CSB726_GPIO_MMC_DETECT), data);
-       gpio_free(CSB726_GPIO_MMC_RO);
-       gpio_free(CSB726_GPIO_MMC_DETECT);
 }
 
 static struct pxamci_platform_data csb726_mci = {
-       .ocr_mask       = MMC_VDD_32_33|MMC_VDD_33_34,
-       .init           = csb726_mci_init,
-       .get_ro         = csb726_mci_get_ro,
+       .ocr_mask               = MMC_VDD_32_33|MMC_VDD_33_34,
+       .init                   = csb726_mci_init,
        /* FIXME setpower */
-       .exit           = csb726_mci_exit,
+       .gpio_card_detect       = CSB726_GPIO_MMC_DETECT,
+       .gpio_card_ro           = CSB726_GPIO_MMC_RO,
+       .gpio_power             = -1,
 };
 
 static struct pxaohci_platform_data csb726_ohci_platform_data = {
index ecc08f3..46fabe1 100644 (file)
@@ -935,6 +935,33 @@ void __init pxa3xx_set_nand_info(struct pxa3xx_nand_platform_data *info)
 {
        pxa_register_device(&pxa3xx_device_nand, info);
 }
+
+static struct resource pxa3xx_resources_gcu[] = {
+       {
+               .start  = 0x54000000,
+               .end    = 0x54000fff,
+               .flags  = IORESOURCE_MEM,
+       },
+       {
+               .start  = IRQ_GCU,
+               .end    = IRQ_GCU,
+               .flags  = IORESOURCE_IRQ,
+       },
+};
+
+static u64 pxa3xx_gcu_dmamask = DMA_BIT_MASK(32);
+
+struct platform_device pxa3xx_device_gcu = {
+       .name           = "pxa3xx-gcu",
+       .id             = -1,
+       .num_resources  = ARRAY_SIZE(pxa3xx_resources_gcu),
+       .resource       = pxa3xx_resources_gcu,
+       .dev            = {
+               .dma_mask = &pxa3xx_gcu_dmamask,
+               .coherent_dma_mask = 0xffffffff,
+       },
+};
+
 #endif /* CONFIG_PXA3xx */
 
 /* pxa2xx-spi platform-device ID equals respective SSP platform-device ID + 1.
index ecc24a4..93817d9 100644 (file)
@@ -35,4 +35,6 @@ extern struct platform_device pxa27x_device_pwm1;
 extern struct platform_device pxa3xx_device_nand;
 extern struct platform_device pxa3xx_device_i2c_power;
 
+extern struct platform_device pxa3xx_device_gcu;
+
 void __init pxa_register_device(struct platform_device *dev, void *data);
index a36fc17..49acdfa 100644 (file)
@@ -199,7 +199,6 @@ static void __init e740_init(void)
        platform_add_devices(devices, ARRAY_SIZE(devices));
        pxa_set_udc_info(&e7xx_udc_mach_info);
        pxa_set_ac97_info(NULL);
-       e7xx_irda_init();
        pxa_set_ficp_info(&e7xx_ficp_platform_data);
 }
 
index 1d00110..4052ece 100644 (file)
@@ -200,7 +200,6 @@ static void __init e750_init(void)
        platform_add_devices(devices, ARRAY_SIZE(devices));
        pxa_set_udc_info(&e7xx_udc_mach_info);
        pxa_set_ac97_info(NULL);
-       e7xx_irda_init();
        pxa_set_ficp_info(&e7xx_ficp_platform_data);
 }
 
index 9cd0946..aec7f42 100644 (file)
@@ -646,13 +646,16 @@ static int em_x270_mci_get_ro(struct device *dev)
 }
 
 static struct pxamci_platform_data em_x270_mci_platform_data = {
-       .ocr_mask       = MMC_VDD_20_21|MMC_VDD_21_22|MMC_VDD_22_23|
-                         MMC_VDD_24_25|MMC_VDD_25_26|MMC_VDD_26_27|
-                         MMC_VDD_27_28|MMC_VDD_28_29|MMC_VDD_29_30|
-                         MMC_VDD_30_31|MMC_VDD_31_32,
-       .init           = em_x270_mci_init,
-       .setpower       = em_x270_mci_setpower,
-       .exit           = em_x270_mci_exit,
+       .ocr_mask               = MMC_VDD_20_21|MMC_VDD_21_22|MMC_VDD_22_23|
+                                 MMC_VDD_24_25|MMC_VDD_25_26|MMC_VDD_26_27|
+                                 MMC_VDD_27_28|MMC_VDD_28_29|MMC_VDD_29_30|
+                                 MMC_VDD_30_31|MMC_VDD_31_32,
+       .init                   = em_x270_mci_init,
+       .setpower               = em_x270_mci_setpower,
+       .exit                   = em_x270_mci_exit,
+       .gpio_card_detect       = -1,
+       .gpio_card_ro           = -1,
+       .gpio_power             = -1,
 };
 
 static void __init em_x270_init_mmc(void)
@@ -1022,22 +1025,32 @@ static int em_x270_sensor_power(struct device *dev, int on)
        return 0;
 }
 
-static struct soc_camera_link iclink = {
-       .bus_id = 0,
-       .power = em_x270_sensor_power,
-};
-
 static struct i2c_board_info em_x270_i2c_cam_info[] = {
        {
                I2C_BOARD_INFO("mt9m111", 0x48),
+       },
+};
+
+static struct soc_camera_link iclink = {
+       .bus_id         = 0,
+       .power          = em_x270_sensor_power,
+       .board_info     = &em_x270_i2c_cam_info[0],
+       .i2c_adapter_id = 0,
+       .module_name    = "mt9m111",
+};
+
+static struct platform_device em_x270_camera = {
+       .name   = "soc-camera-pdrv",
+       .id     = -1,
+       .dev    = {
                .platform_data = &iclink,
        },
 };
 
 static void  __init em_x270_init_camera(void)
 {
-       i2c_register_board_info(0, ARRAY_AND_SIZE(em_x270_i2c_cam_info));
        pxa_set_camera_info(&em_x270_camera_platform_data);
+       platform_device_register(&em_x270_camera);
 }
 #else
 static inline void em_x270_init_camera(void) {}
@@ -1103,6 +1116,7 @@ REGULATOR_CONSUMER(ldo5, NULL, "vcc cam");
 REGULATOR_CONSUMER(ldo10, &pxa_device_mci.dev, "vcc sdio");
 REGULATOR_CONSUMER(ldo12, NULL, "vcc usb");
 REGULATOR_CONSUMER(ldo19, &em_x270_gprs_userspace_consumer.dev, "vcc gprs");
+REGULATOR_CONSUMER(buck2, NULL, "vcc_core");
 
 #define REGULATOR_INIT(_ldo, _min_uV, _max_uV, _ops_mask)              \
        static struct regulator_init_data _ldo##_data = {               \
@@ -1125,6 +1139,7 @@ REGULATOR_INIT(ldo10, 2000000, 3200000,
               REGULATOR_CHANGE_STATUS | REGULATOR_CHANGE_VOLTAGE);
 REGULATOR_INIT(ldo12, 3000000, 3000000, REGULATOR_CHANGE_STATUS);
 REGULATOR_INIT(ldo19, 3200000, 3200000, REGULATOR_CHANGE_STATUS);
+REGULATOR_INIT(buck2, 1000000, 1650000, REGULATOR_CHANGE_VOLTAGE);
 
 struct led_info em_x270_led_info = {
        .name = "em-x270:orange",
@@ -1194,6 +1209,8 @@ struct da903x_subdev_info em_x270_da9030_subdevs[] = {
        DA9030_LDO(12),
        DA9030_LDO(19),
 
+       DA9030_SUBDEV(regulator, BUCK2, &buck2_data),
+
        DA9030_SUBDEV(led, LED_PC, &em_x270_led_info),
        DA9030_SUBDEV(backlight, WLED, &em_x270_led_info),
        DA9030_SUBDEV(battery, BAT, &em_x270_batterty_info),
@@ -1245,7 +1262,6 @@ static void __init em_x270_init_i2c(void)
 
 static void __init em_x270_module_init(void)
 {
-       pr_info("%s\n", __func__);
        pxa2xx_mfp_config(ARRAY_AND_SIZE(em_x270_pin_config));
 
        mmc_cd = GPIO13_MMC_CD;
@@ -1257,7 +1273,6 @@ static void __init em_x270_module_init(void)
 
 static void __init em_x270_exeda_init(void)
 {
-       pr_info("%s\n", __func__);
        pxa2xx_mfp_config(ARRAY_AND_SIZE(exeda_pin_config));
 
        mmc_cd = GPIO114_MMC_CD;
index c60dadf..91417f0 100644 (file)
@@ -47,44 +47,9 @@ struct pxa2xx_udc_mach_info e7xx_udc_mach_info = {
        .gpio_pullup_inverted = 1
 };
 
-static void e7xx_irda_transceiver_mode(struct device *dev, int mode)
-{
-       if (mode & IR_OFF) {
-               gpio_set_value(GPIO_E7XX_IR_OFF, 1);
-               pxa2xx_transceiver_mode(dev, mode);
-       } else {
-               pxa2xx_transceiver_mode(dev, mode);
-               gpio_set_value(GPIO_E7XX_IR_OFF, 0);
-       }
-}
-
-int e7xx_irda_init(void)
-{
-       int ret;
-
-       ret = gpio_request(GPIO_E7XX_IR_OFF, "IrDA power");
-       if (ret)
-               goto out;
-
-       ret = gpio_direction_output(GPIO_E7XX_IR_OFF, 0);
-       if (ret)
-               goto out;
-
-       e7xx_irda_transceiver_mode(NULL, IR_SIRMODE | IR_OFF);
-out:
-       return ret;
-}
-
-static void e7xx_irda_shutdown(struct device *dev)
-{
-       e7xx_irda_transceiver_mode(dev, IR_SIRMODE | IR_OFF);
-       gpio_free(GPIO_E7XX_IR_OFF);
-}
-
 struct pxaficp_platform_data e7xx_ficp_platform_data = {
-       .transceiver_cap  = IR_SIRMODE | IR_OFF,
-       .transceiver_mode = e7xx_irda_transceiver_mode,
-       .shutdown = e7xx_irda_shutdown,
+       .gpio_pwdown            = GPIO_E7XX_IR_OFF,
+       .transceiver_cap        = IR_SIRMODE | IR_OFF,
 };
 
 int eseries_tmio_enable(struct platform_device *dev)
index ca9912e..1708c01 100644 (file)
@@ -88,7 +88,10 @@ static struct platform_device *devices[] __initdata = {
 
 #ifdef CONFIG_MMC_PXA
 static struct pxamci_platform_data gumstix_mci_platform_data = {
-       .ocr_mask       = MMC_VDD_32_33|MMC_VDD_33_34,
+       .ocr_mask               = MMC_VDD_32_33|MMC_VDD_33_34,
+       .gpio_card_detect       = -1,
+       .gpio_card_ro           = -1,
+       .gpio_power             = -1,
 };
 
 static void __init gumstix_mmc_init(void)
index 81359d5..abff9e1 100644 (file)
@@ -45,6 +45,7 @@
 #include <mach/irda.h>
 #include <mach/pxa2xx_spi.h>
 
+#include <video/platform_lcd.h>
 #include <video/w100fb.h>
 
 #include "devices.h"
@@ -174,14 +175,9 @@ static int hx4700_gpio_request(struct gpio_ress *gpios, int size)
  * IRDA
  */
 
-static void irda_transceiver_mode(struct device *dev, int mode)
-{
-       gpio_set_value(GPIO105_HX4700_nIR_ON, mode & IR_OFF);
-}
-
 static struct pxaficp_platform_data ficp_info = {
-       .transceiver_cap  = IR_SIRMODE | IR_OFF,
-       .transceiver_mode = irda_transceiver_mode,
+       .gpio_pwdown            = GPIO105_HX4700_nIR_ON,
+       .transceiver_cap        = IR_SIRMODE | IR_OFF,
 };
 
 /*
@@ -368,8 +364,6 @@ static struct platform_device egpio = {
  * LCD - Sony display connected to ATI Imageon w3220
  */
 
-static int lcd_power;
-
 static void sony_lcd_init(void)
 {
        gpio_set_value(GPIO84_HX4700_LCD_SQN, 1);
@@ -410,35 +404,6 @@ static void sony_lcd_off(void)
        gpio_set_value(GPIO110_HX4700_LCD_LVDD_3V3_ON, 0);
 }
 
-static int hx4700_lcd_set_power(struct lcd_device *ldev, int level)
-{
-       switch (level) {
-       case FB_BLANK_UNBLANK:
-               sony_lcd_init();
-               break;
-       case FB_BLANK_NORMAL:
-       case FB_BLANK_VSYNC_SUSPEND:
-       case FB_BLANK_HSYNC_SUSPEND:
-       case FB_BLANK_POWERDOWN:
-               sony_lcd_off();
-               break;
-       }
-       lcd_power = level;
-       return 0;
-}
-
-static int hx4700_lcd_get_power(struct lcd_device *lm)
-{
-       return lcd_power;
-}
-
-static struct lcd_ops hx4700_lcd_ops = {
-       .get_power = hx4700_lcd_get_power,
-       .set_power = hx4700_lcd_set_power,
-};
-
-static struct lcd_device *hx4700_lcd_device;
-
 #ifdef CONFIG_PM
 static void w3220_lcd_suspend(struct w100fb_par *wfb)
 {
@@ -573,6 +538,27 @@ static struct platform_device w3220 = {
        .resource      = w3220_resources,
 };
 
+static void hx4700_lcd_set_power(struct plat_lcd_data *pd, unsigned int power)
+{
+       if (power)
+               sony_lcd_init();
+       else
+               sony_lcd_off();
+}
+
+static struct plat_lcd_data hx4700_lcd_data = {
+       .set_power = hx4700_lcd_set_power,
+};
+
+static struct platform_device hx4700_lcd = {
+       .name = "platform-lcd",
+       .id   = -1,
+       .dev  = {
+               .platform_data = &hx4700_lcd_data,
+               .parent        = &w3220.dev,
+       },
+};
+
 /*
  * Backlight
  */
@@ -872,9 +858,6 @@ static void __init hx4700_init(void)
        pxa2xx_set_spi_info(2, &pxa_ssp2_master_info);
        spi_register_board_info(ARRAY_AND_SIZE(tsc2046_board_info));
 
-       hx4700_lcd_device = lcd_device_register("w100fb", NULL,
-                                       (void *)&w3220_info, &hx4700_lcd_ops);
-
        gpio_set_value(GPIO71_HX4700_ASIC3_nRESET, 0);
        mdelay(10);
        gpio_set_value(GPIO71_HX4700_ASIC3_nRESET, 1);
index b6243b5..b6486ef 100644 (file)
@@ -168,7 +168,10 @@ static struct pxafb_mach_info sharp_lm8v31 = {
 };
 
 static struct pxamci_platform_data idp_mci_platform_data = {
-       .ocr_mask       = MMC_VDD_32_33|MMC_VDD_33_34,
+       .ocr_mask               = MMC_VDD_32_33|MMC_VDD_33_34,
+       .gpio_card_detect       = -1,
+       .gpio_card_ro           = -1,
+       .gpio_power             = -1,
 };
 
 static void __init idp_init(void)
index 961807d..2a4945d 100644 (file)
@@ -389,6 +389,9 @@ static int imote2_mci_get_ro(struct device *dev)
 static struct pxamci_platform_data imote2_mci_platform_data = {
        .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, /* default anyway */
        .get_ro = imote2_mci_get_ro,
+       .gpio_card_detect = -1,
+       .gpio_card_ro   = -1,
+       .gpio_power = -1,
 };
 
 static struct mtd_partition imote2flash_partitions[] = {
diff --git a/arch/arm/mach-pxa/include/mach/balloon3.h b/arch/arm/mach-pxa/include/mach/balloon3.h
new file mode 100644 (file)
index 0000000..bfec09b
--- /dev/null
@@ -0,0 +1,134 @@
+/*
+ *  linux/include/asm-arm/arch-pxa/balloon3.h
+ *
+ *  Authors:   Nick Bane and Wookey
+ *  Created:   Oct, 2005
+ *  Copyright: Toby Churchill Ltd
+ *  Cribbed from mainstone.c, by Nicholas Pitre
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef ASM_ARCH_BALLOON3_H
+#define ASM_ARCH_BALLOON3_H
+
+enum balloon3_features {
+       BALLOON3_FEATURE_OHCI,
+       BALLOON3_FEATURE_MMC,
+       BALLOON3_FEATURE_CF,
+       BALLOON3_FEATURE_AUDIO,
+       BALLOON3_FEATURE_TOPPOLY,
+};
+
+#define BALLOON3_FPGA_PHYS     PXA_CS4_PHYS
+#define BALLOON3_FPGA_VIRT     (0xf1000000)    /* as per balloon2 */
+#define BALLOON3_FPGA_LENGTH   0x01000000
+
+/* FPGA/CPLD registers */
+#define BALLOON3_PCMCIA0_REG           (BALLOON3_FPGA_VIRT + 0x00e00008)
+/* fixme - same for now */
+#define BALLOON3_PCMCIA1_REG           (BALLOON3_FPGA_VIRT + 0x00e00008)
+#define BALLOON3_NANDIO_IO_REG         (BALLOON3_FPGA_VIRT + 0x00e00000)
+/* fpga/cpld interrupt control register */
+#define BALLOON3_INT_CONTROL_REG       (BALLOON3_FPGA_VIRT + 0x00e0000C)
+#define BALLOON3_NANDIO_CTL2_REG       (BALLOON3_FPGA_VIRT + 0x00e00010)
+#define BALLOON3_NANDIO_CTL_REG        (BALLOON3_FPGA_VIRT + 0x00e00014)
+#define BALLOON3_VERSION_REG           (BALLOON3_FPGA_VIRT + 0x00e0001c)
+
+#define BALLOON3_SAMOSA_ADDR_REG       (BALLOON3_FPGA_VIRT + 0x00c00000)
+#define BALLOON3_SAMOSA_DATA_REG       (BALLOON3_FPGA_VIRT + 0x00c00004)
+#define BALLOON3_SAMOSA_STATUS_REG     (BALLOON3_FPGA_VIRT + 0x00c0001c)
+
+/* GPIOs for irqs */
+#define BALLOON3_GPIO_AUX_NIRQ         (94)
+#define BALLOON3_GPIO_CODEC_IRQ                (95)
+
+/* Timer and Idle LED locations */
+#define BALLOON3_GPIO_LED_NAND         (9)
+#define BALLOON3_GPIO_LED_IDLE         (10)
+
+/* backlight control */
+#define BALLOON3_GPIO_RUN_BACKLIGHT    (99)
+
+#define BALLOON3_GPIO_S0_CD            (105)
+
+/* FPGA Interrupt Mask/Acknowledge Register */
+#define BALLOON3_INT_S0_IRQ            (1 << 0)  /* PCMCIA 0 IRQ */
+#define BALLOON3_INT_S0_STSCHG         (1 << 1)  /* PCMCIA 0 status changed */
+
+/* CF Status Register */
+#define BALLOON3_PCMCIA_nIRQ           (1 << 0)  /* IRQ / ready signal */
+#define BALLOON3_PCMCIA_nSTSCHG_BVD1   (1 << 1)
+                                       /* VDD sense / card status changed */
+
+/* CF control register (write) */
+#define BALLOON3_PCMCIA_RESET          (1 << 0)   /* Card reset signal */
+#define BALLOON3_PCMCIA_ENABLE         (1 << 1)
+#define BALLOON3_PCMCIA_ADD_ENABLE     (1 << 2)
+
+/* CPLD (and FPGA) interface definitions */
+#define CPLD_LCD0_DATA_SET             0x00
+#define CPLD_LCD0_DATA_CLR             0x10
+#define CPLD_LCD0_COMMAND_SET          0x01
+#define CPLD_LCD0_COMMAND_CLR          0x11
+#define CPLD_LCD1_DATA_SET             0x02
+#define CPLD_LCD1_DATA_CLR             0x12
+#define CPLD_LCD1_COMMAND_SET          0x03
+#define CPLD_LCD1_COMMAND_CLR          0x13
+
+#define CPLD_MISC_SET                  0x07
+#define CPLD_MISC_CLR                  0x17
+#define CPLD_MISC_LOON_NRESET_BIT      0
+#define CPLD_MISC_LOON_UNSUSP_BIT      1
+#define CPLD_MISC_RUN_5V_BIT           2
+#define CPLD_MISC_CHG_D0_BIT           3
+#define CPLD_MISC_CHG_D1_BIT           4
+#define CPLD_MISC_DAC_NCS_BIT          5
+
+#define CPLD_LCD_SET                   0x08
+#define CPLD_LCD_CLR                   0x18
+#define CPLD_LCD_BACKLIGHT_EN_0_BIT    0
+#define CPLD_LCD_BACKLIGHT_EN_1_BIT    1
+#define CPLD_LCD_LED_RED_BIT           4
+#define CPLD_LCD_LED_GREEN_BIT         5
+#define CPLD_LCD_NRESET_BIT            7
+
+#define CPLD_LCD_RO_SET                0x09
+#define CPLD_LCD_RO_CLR                0x19
+#define CPLD_LCD_RO_LCD0_nWAIT_BIT     0
+#define CPLD_LCD_RO_LCD1_nWAIT_BIT     1
+
+#define CPLD_SERIAL_SET                0x0a
+#define CPLD_SERIAL_CLR                0x1a
+#define CPLD_SERIAL_GSM_RI_BIT         0
+#define CPLD_SERIAL_GSM_CTS_BIT        1
+#define CPLD_SERIAL_GSM_DTR_BIT        2
+#define CPLD_SERIAL_LPR_CTS_BIT        3
+#define CPLD_SERIAL_TC232_CTS_BIT      4
+#define CPLD_SERIAL_TC232_DSR_BIT      5
+
+#define CPLD_SROUTING_SET              0x0b
+#define CPLD_SROUTING_CLR              0x1b
+#define CPLD_SROUTING_MSP430_LPR       0
+#define CPLD_SROUTING_MSP430_TC232     1
+#define CPLD_SROUTING_MSP430_GSM       2
+#define CPLD_SROUTING_LOON_LPR         (0 << 4)
+#define CPLD_SROUTING_LOON_TC232       (1 << 4)
+#define CPLD_SROUTING_LOON_GSM         (2 << 4)
+
+#define CPLD_AROUTING_SET              0x0c
+#define CPLD_AROUTING_CLR              0x1c
+#define CPLD_AROUTING_MIC2PHONE_BIT    0
+#define CPLD_AROUTING_PHONE2INT_BIT    1
+#define CPLD_AROUTING_PHONE2EXT_BIT    2
+#define CPLD_AROUTING_LOONL2INT_BIT    3
+#define CPLD_AROUTING_LOONL2EXT_BIT    4
+#define CPLD_AROUTING_LOONR2PHONE_BIT  5
+#define CPLD_AROUTING_LOONR2INT_BIT    6
+#define CPLD_AROUTING_LOONR2EXT_BIT    7
+
+extern int balloon3_has(enum balloon3_features feature);
+
+#endif
index a88d7ca..811743c 100644 (file)
@@ -23,6 +23,12 @@ static inline void colibri_pxa3xx_init_lcd(int bl_pin) {}
 extern void colibri_pxa3xx_init_eth(struct ax_plat_data *plat_data);
 #endif
 
+#if defined(CONFIG_MTD_NAND_PXA3xx) || defined(CONFIG_MTD_NAND_PXA3xx_MODULE)
+extern void colibri_pxa3xx_init_nand(void);
+#else
+static inline void colibri_pxa3xx_init_nand(void) {}
+#endif
+
 /* physical memory regions */
 #define COLIBRI_SDRAM_BASE     0xa0000000      /* SDRAM region */
 
index f6b4bf3..2418806 100644 (file)
                mov     \tmp, \tmp, lsr #13
                and     \tmp, \tmp, #0x7                @ Core G
                cmp     \tmp, #1
-               bhi     1004f
+               bhi     1002f
 
+               @ Core Generation 1 (PXA25x)
                mov     \base, #io_p2v(0x40000000)      @ IIR Ctl = 0x40d00000
                add     \base, \base, #0x00d00000
                ldr     \irqstat, [\base, #0]           @ ICIP
                ldr     \irqnr, [\base, #4]             @ ICMR
-               b       1002f
 
-1004:
-               mrc     p6, 0, \irqstat, c6, c0, 0      @ ICIP2
-               mrc     p6, 0, \irqnr, c7, c0, 0        @ ICMR2
                ands    \irqnr, \irqstat, \irqnr
-               beq     1003f
+               beq     1001f
                rsb     \irqstat, \irqnr, #0
                and     \irqstat, \irqstat, \irqnr
                clz     \irqnr, \irqstat
-               rsb     \irqnr, \irqnr, #31
-               add     \irqnr, \irqnr, #(32 + PXA_IRQ(0))
+               rsb     \irqnr, \irqnr, #(31 + PXA_IRQ(0))
                b       1001f
-1003:
-               mrc     p6, 0, \irqstat, c0, c0, 0      @ ICIP
-               mrc     p6, 0, \irqnr, c1, c0, 0        @ ICMR
 1002:
-               ands    \irqnr, \irqstat, \irqnr
+               @ Core Generation 2 (PXA27x) or Core Generation 3 (PXA3xx)
+               mrc     p6, 0, \irqstat, c5, c0, 0      @ ICHP
+               tst     \irqstat, #0x80000000
                beq     1001f
-               rsb     \irqstat, \irqnr, #0
-               and     \irqstat, \irqstat, \irqnr
-               clz     \irqnr, \irqstat
-               rsb     \irqnr, \irqnr, #(31 + PXA_IRQ(0))
+               bic     \irqstat, \irqstat, #0x80000000
+               mov     \irqnr, \irqstat, lsr #16
 1001:
                .endm
index 16ab795..aa3d9f7 100644 (file)
 #define __cpu_is_pxa935(id)    (0)
 #endif
 
+#ifdef CONFIG_CPU_PXA950
+#define __cpu_is_pxa950(id)                             \
+       ({                                              \
+               unsigned int _id = (id) >> 4 & 0xfff;   \
+               id == 0x697;                            \
+        })
+#else
+#define __cpu_is_pxa950(id)    (0)
+#endif
+
 #define cpu_is_pxa210()                                        \
        ({                                              \
                __cpu_is_pxa210(read_cpuid_id());       \
                __cpu_is_pxa935(id);                    \
         })
 
+#define cpu_is_pxa950()                                        \
+       ({                                              \
+               unsigned int id = read_cpuid(CPUID_ID); \
+               __cpu_is_pxa950(id);                    \
+        })
+
+
 /*
  * CPUID Core Generation Bit
  * <= 0x2 for pxa21x/pxa25x/pxa26x/pxa27x
index 0a50c3c..3cd41f7 100644 (file)
@@ -12,6 +12,8 @@ struct pxaficp_platform_data {
        void (*transceiver_mode)(struct device *dev, int mode);
        int (*startup)(struct device *dev);
        void (*shutdown)(struct device *dev);
+       int gpio_pwdown;                /* powerdown GPIO for the IrDA chip */
+       bool gpio_pwdown_inverted;      /* gpio_pwdown is inverted */
 };
 
 extern void pxa_set_ficp_info(struct pxaficp_platform_data *info);
index 6a1d959..3677a9a 100644 (file)
 #ifdef CONFIG_PXA3xx
 #define IRQ_SSP4       PXA_IRQ(13)     /* SSP4 service request */
 #define IRQ_CIR                PXA_IRQ(34)     /* Consumer IR */
+#define IRQ_COMM_WDT   PXA_IRQ(35)     /* Comm WDT interrupt */
 #define IRQ_TSI                PXA_IRQ(36)     /* Touch Screen Interface (PXA320) */
 #define IRQ_USIM2      PXA_IRQ(38)     /* USIM2 Controller */
-#define IRQ_GRPHICS    PXA_IRQ(39)     /* Graphics Controller */
+#define IRQ_GCU                PXA_IRQ(39)     /* Graphics Controller */
 #define IRQ_MMC2       PXA_IRQ(41)     /* MMC2 Controller */
 #define IRQ_1WIRE      PXA_IRQ(44)     /* 1-Wire Controller */
 #define IRQ_NAND       PXA_IRQ(45)     /* NAND Controller */
 #define IRQ_MMC3       PXA_IRQ(55)     /* MMC3 Controller (PXA310) */
 #endif
 
-#define PXA_GPIO_IRQ_BASE      PXA_IRQ(64)
-#define PXA_GPIO_IRQ_NUM       (128)
+#ifdef CONFIG_CPU_PXA935
+#define IRQ_U2O                PXA_IRQ(64)     /* USB OTG 2.0 Controller (PXA935) */
+#define IRQ_U2H                PXA_IRQ(65)     /* USB Host 2.0 Controller (PXA935) */
+
+#define IRQ_MMC3_PXA935        PXA_IRQ(72)     /* MMC3 Controller (PXA935) */
+#define IRQ_MMC4_PXA935        PXA_IRQ(73)     /* MMC4 Controller (PXA935) */
+#define IRQ_MMC5_PXA935        PXA_IRQ(74)     /* MMC5 Controller (PXA935) */
+
+#define IRQ_U2P                PXA_IRQ(93)     /* USB PHY D+/D- Lines (PXA935) */
+#endif
+
+#ifdef CONFIG_CPU_PXA930
+#define IRQ_ENHROT     PXA_IRQ(37)     /* Enhanced Rotary (PXA930) */
+#define IRQ_ACIPC0     PXA_IRQ(5)
+#define IRQ_ACIPC1     PXA_IRQ(40)
+#define IRQ_ACIPC2     PXA_IRQ(19)
+#define IRQ_TRKBALL    PXA_IRQ(43)     /* Track Ball */
+#endif
+
+#ifdef CONFIG_CPU_PXA950
+#define IRQ_GC500      PXA_IRQ(70)     /* Graphics Controller (PXA950) */
+#endif
+
+#define PXA_GPIO_IRQ_BASE      PXA_IRQ(96)
+#define PXA_GPIO_IRQ_NUM       (192)
 
 #define GPIO_2_x_TO_IRQ(x)     (PXA_GPIO_IRQ_BASE + (x))
 #define IRQ_GPIO(x)    (((x) < 2) ? (IRQ_GPIO0 + (x)) : GPIO_2_x_TO_IRQ(x))
 #define IRQ_BOARD_END          (IRQ_BOARD_START + 70)
 #elif defined(CONFIG_MACH_ZYLONITE)
 #define IRQ_BOARD_END          (IRQ_BOARD_START + 32)
+#elif defined(CONFIG_PXA_EZX)
+#define IRQ_BOARD_END          (IRQ_BOARD_START + 23)
 #else
 #define IRQ_BOARD_END          (IRQ_BOARD_START + 16)
 #endif
 #define MAINSTONE_S1_STSCHG_IRQ        MAINSTONE_IRQ(14)
 #define MAINSTONE_S1_IRQ       MAINSTONE_IRQ(15)
 
+/* Balloon3 Interrupts */
+#define BALLOON3_IRQ(x)                (IRQ_BOARD_START + (x))
+
+#define BALLOON3_BP_CF_NRDY_IRQ        BALLOON3_IRQ(0)
+#define BALLOON3_BP_NSTSCHG_IRQ        BALLOON3_IRQ(1)
+
+#define BALLOON3_AUX_NIRQ      IRQ_GPIO(BALLOON3_GPIO_AUX_NIRQ)
+#define BALLOON3_CODEC_IRQ     IRQ_GPIO(BALLOON3_GPIO_CODEC_IRQ)
+#define BALLOON3_S0_CD_IRQ     IRQ_GPIO(BALLOON3_GPIO_S0_CD)
+
 /* LoCoMo Interrupts (CONFIG_SHARP_LOCOMO) */
 #define IRQ_LOCOMO_KEY_BASE    (IRQ_BOARD_START + 0)
 #define IRQ_LOCOMO_GPIO_BASE   (IRQ_BOARD_START + 1)
index 4821850..271e249 100644 (file)
 #ifndef __ASM_ARCH_MFP_H
 #define __ASM_ARCH_MFP_H
 
-#define mfp_to_gpio(m) ((m) % 128)
-
-/* list of all the configurable MFP pins */
-enum {
-       MFP_PIN_INVALID = -1,
-
-       MFP_PIN_GPIO0 = 0,
-       MFP_PIN_GPIO1,
-       MFP_PIN_GPIO2,
-       MFP_PIN_GPIO3,
-       MFP_PIN_GPIO4,
-       MFP_PIN_GPIO5,
-       MFP_PIN_GPIO6,
-       MFP_PIN_GPIO7,
-       MFP_PIN_GPIO8,
-       MFP_PIN_GPIO9,
-       MFP_PIN_GPIO10,
-       MFP_PIN_GPIO11,
-       MFP_PIN_GPIO12,
-       MFP_PIN_GPIO13,
-       MFP_PIN_GPIO14,
-       MFP_PIN_GPIO15,
-       MFP_PIN_GPIO16,
-       MFP_PIN_GPIO17,
-       MFP_PIN_GPIO18,
-       MFP_PIN_GPIO19,
-       MFP_PIN_GPIO20,
-       MFP_PIN_GPIO21,
-       MFP_PIN_GPIO22,
-       MFP_PIN_GPIO23,
-       MFP_PIN_GPIO24,
-       MFP_PIN_GPIO25,
-       MFP_PIN_GPIO26,
-       MFP_PIN_GPIO27,
-       MFP_PIN_GPIO28,
-       MFP_PIN_GPIO29,
-       MFP_PIN_GPIO30,
-       MFP_PIN_GPIO31,
-       MFP_PIN_GPIO32,
-       MFP_PIN_GPIO33,
-       MFP_PIN_GPIO34,
-       MFP_PIN_GPIO35,
-       MFP_PIN_GPIO36,
-       MFP_PIN_GPIO37,
-       MFP_PIN_GPIO38,
-       MFP_PIN_GPIO39,
-       MFP_PIN_GPIO40,
-       MFP_PIN_GPIO41,
-       MFP_PIN_GPIO42,
-       MFP_PIN_GPIO43,
-       MFP_PIN_GPIO44,
-       MFP_PIN_GPIO45,
-       MFP_PIN_GPIO46,
-       MFP_PIN_GPIO47,
-       MFP_PIN_GPIO48,
-       MFP_PIN_GPIO49,
-       MFP_PIN_GPIO50,
-       MFP_PIN_GPIO51,
-       MFP_PIN_GPIO52,
-       MFP_PIN_GPIO53,
-       MFP_PIN_GPIO54,
-       MFP_PIN_GPIO55,
-       MFP_PIN_GPIO56,
-       MFP_PIN_GPIO57,
-       MFP_PIN_GPIO58,
-       MFP_PIN_GPIO59,
-       MFP_PIN_GPIO60,
-       MFP_PIN_GPIO61,
-       MFP_PIN_GPIO62,
-       MFP_PIN_GPIO63,
-       MFP_PIN_GPIO64,
-       MFP_PIN_GPIO65,
-       MFP_PIN_GPIO66,
-       MFP_PIN_GPIO67,
-       MFP_PIN_GPIO68,
-       MFP_PIN_GPIO69,
-       MFP_PIN_GPIO70,
-       MFP_PIN_GPIO71,
-       MFP_PIN_GPIO72,
-       MFP_PIN_GPIO73,
-       MFP_PIN_GPIO74,
-       MFP_PIN_GPIO75,
-       MFP_PIN_GPIO76,
-       MFP_PIN_GPIO77,
-       MFP_PIN_GPIO78,
-       MFP_PIN_GPIO79,
-       MFP_PIN_GPIO80,
-       MFP_PIN_GPIO81,
-       MFP_PIN_GPIO82,
-       MFP_PIN_GPIO83,
-       MFP_PIN_GPIO84,
-       MFP_PIN_GPIO85,
-       MFP_PIN_GPIO86,
-       MFP_PIN_GPIO87,
-       MFP_PIN_GPIO88,
-       MFP_PIN_GPIO89,
-       MFP_PIN_GPIO90,
-       MFP_PIN_GPIO91,
-       MFP_PIN_GPIO92,
-       MFP_PIN_GPIO93,
-       MFP_PIN_GPIO94,
-       MFP_PIN_GPIO95,
-       MFP_PIN_GPIO96,
-       MFP_PIN_GPIO97,
-       MFP_PIN_GPIO98,
-       MFP_PIN_GPIO99,
-       MFP_PIN_GPIO100,
-       MFP_PIN_GPIO101,
-       MFP_PIN_GPIO102,
-       MFP_PIN_GPIO103,
-       MFP_PIN_GPIO104,
-       MFP_PIN_GPIO105,
-       MFP_PIN_GPIO106,
-       MFP_PIN_GPIO107,
-       MFP_PIN_GPIO108,
-       MFP_PIN_GPIO109,
-       MFP_PIN_GPIO110,
-       MFP_PIN_GPIO111,
-       MFP_PIN_GPIO112,
-       MFP_PIN_GPIO113,
-       MFP_PIN_GPIO114,
-       MFP_PIN_GPIO115,
-       MFP_PIN_GPIO116,
-       MFP_PIN_GPIO117,
-       MFP_PIN_GPIO118,
-       MFP_PIN_GPIO119,
-       MFP_PIN_GPIO120,
-       MFP_PIN_GPIO121,
-       MFP_PIN_GPIO122,
-       MFP_PIN_GPIO123,
-       MFP_PIN_GPIO124,
-       MFP_PIN_GPIO125,
-       MFP_PIN_GPIO126,
-       MFP_PIN_GPIO127,
-       MFP_PIN_GPIO0_2,
-       MFP_PIN_GPIO1_2,
-       MFP_PIN_GPIO2_2,
-       MFP_PIN_GPIO3_2,
-       MFP_PIN_GPIO4_2,
-       MFP_PIN_GPIO5_2,
-       MFP_PIN_GPIO6_2,
-       MFP_PIN_GPIO7_2,
-       MFP_PIN_GPIO8_2,
-       MFP_PIN_GPIO9_2,
-       MFP_PIN_GPIO10_2,
-       MFP_PIN_GPIO11_2,
-       MFP_PIN_GPIO12_2,
-       MFP_PIN_GPIO13_2,
-       MFP_PIN_GPIO14_2,
-       MFP_PIN_GPIO15_2,
-       MFP_PIN_GPIO16_2,
-       MFP_PIN_GPIO17_2,
-
-       MFP_PIN_ULPI_STP,
-       MFP_PIN_ULPI_NXT,
-       MFP_PIN_ULPI_DIR,
-
-       MFP_PIN_nXCVREN,
-       MFP_PIN_DF_CLE_nOE,
-       MFP_PIN_DF_nADV1_ALE,
-       MFP_PIN_DF_SCLK_E,
-       MFP_PIN_DF_SCLK_S,
-       MFP_PIN_nBE0,
-       MFP_PIN_nBE1,
-       MFP_PIN_DF_nADV2_ALE,
-       MFP_PIN_DF_INT_RnB,
-       MFP_PIN_DF_nCS0,
-       MFP_PIN_DF_nCS1,
-       MFP_PIN_nLUA,
-       MFP_PIN_nLLA,
-       MFP_PIN_DF_nWE,
-       MFP_PIN_DF_ALE_nWE,
-       MFP_PIN_DF_nRE_nOE,
-       MFP_PIN_DF_ADDR0,
-       MFP_PIN_DF_ADDR1,
-       MFP_PIN_DF_ADDR2,
-       MFP_PIN_DF_ADDR3,
-       MFP_PIN_DF_IO0,
-       MFP_PIN_DF_IO1,
-       MFP_PIN_DF_IO2,
-       MFP_PIN_DF_IO3,
-       MFP_PIN_DF_IO4,
-       MFP_PIN_DF_IO5,
-       MFP_PIN_DF_IO6,
-       MFP_PIN_DF_IO7,
-       MFP_PIN_DF_IO8,
-       MFP_PIN_DF_IO9,
-       MFP_PIN_DF_IO10,
-       MFP_PIN_DF_IO11,
-       MFP_PIN_DF_IO12,
-       MFP_PIN_DF_IO13,
-       MFP_PIN_DF_IO14,
-       MFP_PIN_DF_IO15,
-
-       /* additional pins on PXA930 */
-       MFP_PIN_GSIM_UIO,
-       MFP_PIN_GSIM_UCLK,
-       MFP_PIN_GSIM_UDET,
-       MFP_PIN_GSIM_nURST,
-       MFP_PIN_PMIC_INT,
-       MFP_PIN_RDY,
-
-       MFP_PIN_MAX,
-};
-
-/*
- * a possible MFP configuration is represented by a 32-bit integer
- *
- * bit  0.. 9 - MFP Pin Number (1024 Pins Maximum)
- * bit 10..12 - Alternate Function Selection
- * bit 13..15 - Drive Strength
- * bit 16..18 - Low Power Mode State
- * bit 19..20 - Low Power Mode Edge Detection
- * bit 21..22 - Run Mode Pull State
- *
- * to facilitate the definition, the following macros are provided
- *
- * MFP_CFG_DEFAULT - default MFP configuration value, with
- *               alternate function = 0,
- *               drive strength = fast 3mA (MFP_DS03X)
- *               low power mode = default
- *               edge detection = none
- *
- * MFP_CFG     - default MFPR value with alternate function
- * MFP_CFG_DRV - default MFPR value with alternate function and
- *               pin drive strength
- * MFP_CFG_LPM - default MFPR value with alternate function and
- *               low power mode
- * MFP_CFG_X   - default MFPR value with alternate function,
- *               pin drive strength and low power mode
- */
-
-typedef unsigned long mfp_cfg_t;
-
-#define MFP_PIN(x)             ((x) & 0x3ff)
-
-#define MFP_AF0                        (0x0 << 10)
-#define MFP_AF1                        (0x1 << 10)
-#define MFP_AF2                        (0x2 << 10)
-#define MFP_AF3                        (0x3 << 10)
-#define MFP_AF4                        (0x4 << 10)
-#define MFP_AF5                        (0x5 << 10)
-#define MFP_AF6                        (0x6 << 10)
-#define MFP_AF7                        (0x7 << 10)
-#define MFP_AF_MASK            (0x7 << 10)
-#define MFP_AF(x)              (((x) >> 10) & 0x7)
-
-#define MFP_DS01X              (0x0 << 13)
-#define MFP_DS02X              (0x1 << 13)
-#define MFP_DS03X              (0x2 << 13)
-#define MFP_DS04X              (0x3 << 13)
-#define MFP_DS06X              (0x4 << 13)
-#define MFP_DS08X              (0x5 << 13)
-#define MFP_DS10X              (0x6 << 13)
-#define MFP_DS13X              (0x7 << 13)
-#define MFP_DS_MASK            (0x7 << 13)
-#define MFP_DS(x)              (((x) >> 13) & 0x7)
-
-#define MFP_LPM_DEFAULT                (0x0 << 16)
-#define MFP_LPM_DRIVE_LOW      (0x1 << 16)
-#define MFP_LPM_DRIVE_HIGH     (0x2 << 16)
-#define MFP_LPM_PULL_LOW       (0x3 << 16)
-#define MFP_LPM_PULL_HIGH      (0x4 << 16)
-#define MFP_LPM_FLOAT          (0x5 << 16)
-#define MFP_LPM_INPUT          (0x6 << 16)
-#define MFP_LPM_STATE_MASK     (0x7 << 16)
-#define MFP_LPM_STATE(x)       (((x) >> 16) & 0x7)
-
-#define MFP_LPM_EDGE_NONE      (0x0 << 19)
-#define MFP_LPM_EDGE_RISE      (0x1 << 19)
-#define MFP_LPM_EDGE_FALL      (0x2 << 19)
-#define MFP_LPM_EDGE_BOTH      (0x3 << 19)
-#define MFP_LPM_EDGE_MASK      (0x3 << 19)
-#define MFP_LPM_EDGE(x)                (((x) >> 19) & 0x3)
-
-#define MFP_PULL_NONE          (0x0 << 21)
-#define MFP_PULL_LOW           (0x1 << 21)
-#define MFP_PULL_HIGH          (0x2 << 21)
-#define MFP_PULL_BOTH          (0x3 << 21)
-#define MFP_PULL_MASK          (0x3 << 21)
-#define MFP_PULL(x)            (((x) >> 21) & 0x3)
-
-#define MFP_CFG_DEFAULT                (MFP_AF0 | MFP_DS03X | MFP_LPM_DEFAULT |\
-                                MFP_LPM_EDGE_NONE | MFP_PULL_NONE)
-
-#define MFP_CFG(pin, af)               \
-       ((MFP_CFG_DEFAULT & ~MFP_AF_MASK) |\
-        (MFP_PIN(MFP_PIN_##pin) | MFP_##af))
-
-#define MFP_CFG_DRV(pin, af, drv)      \
-       ((MFP_CFG_DEFAULT & ~(MFP_AF_MASK | MFP_DS_MASK)) |\
-        (MFP_PIN(MFP_PIN_##pin) | MFP_##af | MFP_##drv))
-
-#define MFP_CFG_LPM(pin, af, lpm)      \
-       ((MFP_CFG_DEFAULT & ~(MFP_AF_MASK | MFP_LPM_STATE_MASK)) |\
-        (MFP_PIN(MFP_PIN_##pin) | MFP_##af | MFP_LPM_##lpm))
-
-#define MFP_CFG_X(pin, af, drv, lpm)   \
-       ((MFP_CFG_DEFAULT & ~(MFP_AF_MASK | MFP_DS_MASK | MFP_LPM_STATE_MASK)) |\
-        (MFP_PIN(MFP_PIN_##pin) | MFP_##af | MFP_##drv | MFP_LPM_##lpm))
+#include <plat/mfp.h>
 
 #endif /* __ASM_ARCH_MFP_H */
index 6d1304c..02a69dc 100644 (file)
@@ -14,6 +14,11 @@ struct pxamci_platform_data {
        int (*get_ro)(struct device *);
        void (*setpower)(struct device *, unsigned int);
        void (*exit)(struct device *, void *);
+       int gpio_card_detect;                   /* gpio detecting card insertion */
+       int gpio_card_ro;                       /* gpio detecting read only toggle */
+       bool gpio_card_ro_invert;               /* gpio ro is inverted */
+       int gpio_power;                         /* gpio powering up MMC bus */
+       bool gpio_power_invert;                 /* gpio power is inverted */
 };
 
 extern void pxa_set_mci_info(struct pxamci_platform_data *info);
diff --git a/arch/arm/mach-pxa/include/mach/palmtc.h b/arch/arm/mach-pxa/include/mach/palmtc.h
new file mode 100644 (file)
index 0000000..3dc9b07
--- /dev/null
@@ -0,0 +1,86 @@
+/*
+ * linux/include/asm-arm/arch-pxa/palmtc-gpio.h
+ *
+ * GPIOs and interrupts for Palm Tungsten|C Handheld Computer
+ *
+ * Authors:    Alex Osborne <bobofdoom@gmail.com>
+ *             Marek Vasut <marek.vasut@gmail.com>
+ *             Holger Bocklet <bitz.email@gmx.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef _INCLUDE_PALMTC_H_
+#define _INCLUDE_PALMTC_H_
+
+/** HERE ARE GPIOs **/
+
+/* GPIOs */
+#define GPIO_NR_PALMTC_EARPHONE_DETECT 2
+#define GPIO_NR_PALMTC_CRADLE_DETECT   5
+#define GPIO_NR_PALMTC_HOTSYNC_BUTTON  7
+
+/* SD/MMC */
+#define GPIO_NR_PALMTC_SD_DETECT_N     12
+#define GPIO_NR_PALMTC_SD_POWER                32
+#define GPIO_NR_PALMTC_SD_READONLY     54
+
+/* WLAN */
+#define GPIO_NR_PALMTC_PCMCIA_READY    13
+#define GPIO_NR_PALMTC_PCMCIA_PWRREADY 14
+#define GPIO_NR_PALMTC_PCMCIA_POWER1   15
+#define GPIO_NR_PALMTC_PCMCIA_POWER2   33
+#define GPIO_NR_PALMTC_PCMCIA_POWER3   55
+#define GPIO_NR_PALMTC_PCMCIA_RESET    78
+
+/* UDC */
+#define GPIO_NR_PALMTC_USB_DETECT_N    4
+#define GPIO_NR_PALMTC_USB_POWER       36
+
+/* LCD/BACKLIGHT */
+#define GPIO_NR_PALMTC_BL_POWER                16
+#define GPIO_NR_PALMTC_LCD_POWER       44
+#define GPIO_NR_PALMTC_LCD_BLANK       38
+
+/* UART */
+#define GPIO_NR_PALMTC_RS232_POWER     37
+
+/* IRDA */
+#define GPIO_NR_PALMTC_IR_DISABLE      45
+
+/* IRQs */
+#define IRQ_GPIO_PALMTC_SD_DETECT_N    IRQ_GPIO(GPIO_NR_PALMTC_SD_DETECT_N)
+#define IRQ_GPIO_PALMTC_WLAN_READY     IRQ_GPIO(GPIO_NR_PALMTC_WLAN_READY)
+
+/* UCB1400 GPIOs */
+#define GPIO_NR_PALMTC_POWER_DETECT    (0x80 | 0x00)
+#define GPIO_NR_PALMTC_HEADPHONE_DETECT        (0x80 | 0x01)
+#define GPIO_NR_PALMTC_SPEAKER_ENABLE  (0x80 | 0x03)
+#define GPIO_NR_PALMTC_VIBRA_POWER     (0x80 | 0x05)
+#define GPIO_NR_PALMTC_LED_POWER       (0x80 | 0x07)
+
+/** HERE ARE INIT VALUES **/
+#define PALMTC_UCB1400_GPIO_OFFSET     0x80
+
+/* BATTERY */
+#define PALMTC_BAT_MAX_VOLTAGE         4000    /* 4.00V maximum voltage */
+#define PALMTC_BAT_MIN_VOLTAGE         3550    /* 3.55V critical voltage */
+#define PALMTC_BAT_MAX_CURRENT         0       /* unknokn */
+#define PALMTC_BAT_MIN_CURRENT         0       /* unknown */
+#define PALMTC_BAT_MAX_CHARGE          1       /* unknown */
+#define PALMTC_BAT_MIN_CHARGE          1       /* unknown */
+#define PALMTC_MAX_LIFE_MINS           240     /* on-life in minutes */
+
+#define PALMTC_BAT_MEASURE_DELAY       (HZ * 1)
+
+/* BACKLIGHT */
+#define PALMTC_MAX_INTENSITY           0xFE
+#define PALMTC_DEFAULT_INTENSITY       0x7E
+#define PALMTC_LIMIT_MASK              0x7F
+#define PALMTC_PRESCALER               0x3F
+#define PALMTC_PERIOD_NS               3500
+
+#endif
index e74082c..1be0db6 100644 (file)
 #define PALMTX_PHYS_FLASH_START        PXA_CS0_PHYS    /* ChipSelect 0 */
 #define PALMTX_PHYS_NAND_START PXA_CS1_PHYS    /* ChipSelect 1 */
 
+#define PALMTX_NAND_ALE_PHYS   (PALMTX_PHYS_NAND_START | (1 << 24))
+#define PALMTX_NAND_CLE_PHYS   (PALMTX_PHYS_NAND_START | (1 << 25))
+#define PALMTX_NAND_ALE_VIRT   0xff100000
+#define PALMTX_NAND_CLE_VIRT   0xff200000
+
 /* TOUCHSCREEN */
 #define AC97_LINK_FRAME                        21
 
index 7d1a059..e91d63c 100644 (file)
 #define CKEN_MVED      43      /* < MVED clock enable */
 
 /* Note: GCU clock enable bit differs on PXA300/PXA310 and PXA320 */
-#define PXA300_CKEN_GRAPHICS   42      /* Graphics controller clock enable */
-#define PXA320_CKEN_GRAPHICS   7       /* Graphics controller clock enable */
+#define CKEN_PXA300_GCU                42      /* Graphics controller clock enable */
+#define CKEN_PXA320_GCU                7       /* Graphics controller clock enable */
 
 #endif /* __ASM_ARCH_PXA3XX_REGS_H */
index 6932720..f73061c 100644 (file)
@@ -118,7 +118,8 @@ struct pxafb_mach_info {
        u_int           fixed_modes:1,
                        cmap_inverse:1,
                        cmap_static:1,
-                       unused:29;
+                       acceleration_enabled:1,
+                       unused:28;
 
        /* The following should be defined in LCCR0
         *      LCCR0_Act or LCCR0_Pas          Active or Passive
index ad23e74..68464ce 100644 (file)
@@ -13,6 +13,7 @@
 #define ICFP           __REG(0x40D0000C)  /* Interrupt Controller FIQ Pending Register */
 #define ICPR           __REG(0x40D00010)  /* Interrupt Controller Pending Register */
 #define ICCR           __REG(0x40D00014)  /* Interrupt Controller Control Register */
+#define ICHP           __REG(0x40D00018)  /* Interrupt Controller Highest Priority Register */
 
 #define ICIP2          __REG(0x40D0009C)  /* Interrupt Controller IRQ Pending Register 2 */
 #define ICMR2          __REG(0x40D000A0)  /* Interrupt Controller Mask Register 2 */
 #define ICFP2          __REG(0x40D000A8)  /* Interrupt Controller FIQ Pending Register 2 */
 #define ICPR2          __REG(0x40D000AC)  /* Interrupt Controller Pending Register 2 */
 
+#define ICIP3          __REG(0x40D00130)  /* Interrupt Controller IRQ Pending Register 3 */
+#define ICMR3          __REG(0x40D00134)  /* Interrupt Controller Mask Register 3 */
+#define ICLR3          __REG(0x40D00138)  /* Interrupt Controller Level Register 3 */
+#define ICFP3          __REG(0x40D0013C)  /* Interrupt Controller FIQ Pending Register 3 */
+#define ICPR3          __REG(0x40D00140)  /* Interrupt Controller Pending Register 3 */
+
+#define IPR(x)         __REG(0x40D0001C + (x < 32 ? (x << 2)           \
+                               : (x < 64 ? (0x94 + ((x - 32) << 2))    \
+                               : (0x128 + ((x - 64) << 2)))))
+
 #endif /* __ASM_MACH_REGS_INTC_H */
index b547494..237734b 100644 (file)
@@ -37,7 +37,7 @@ static inline void arch_decomp_setup(void)
 {
        if (machine_is_littleton() || machine_is_intelmote2()
            || machine_is_csb726() || machine_is_stargate2()
-           || machine_is_cm_x300())
+           || machine_is_cm_x300() || machine_is_balloon3())
                UART = STUART;
 }
 
index f6e0300..d694ce2 100644 (file)
@@ -120,7 +120,7 @@ static void __init pxa_init_low_gpio_irq(set_wake_t fn)
 
 void __init pxa_init_irq(int irq_nr, set_wake_t fn)
 {
-       int irq;
+       int irq, i;
 
        pxa_internal_irq_nr = irq_nr;
 
@@ -129,6 +129,12 @@ void __init pxa_init_irq(int irq_nr, set_wake_t fn)
                _ICLR(irq) = 0; /* all IRQs are IRQ, not FIQ */
        }
 
+       /* initialize interrupt priority */
+       if (cpu_is_pxa27x() || cpu_is_pxa3xx()) {
+               for (i = 0; i < irq_nr; i++)
+                       IPR(i) = i | (1 << 31);
+       }
+
        /* only unmasked interrupts kick us out of idle */
        ICCR = 1;
 
index 55b3788..1384895 100644 (file)
@@ -265,45 +265,12 @@ static inline void littleton_init_keypad(void) {}
 #endif
 
 #if defined(CONFIG_MMC_PXA) || defined(CONFIG_MMC_PXA_MODULE)
-static int littleton_mci_init(struct device *dev,
-                             irq_handler_t littleton_detect_int, void *data)
-{
-       int err, gpio_cd = GPIO_MMC1_CARD_DETECT;
-
-       err = gpio_request(gpio_cd, "mmc card detect");
-       if (err)
-               goto err_request_cd;
-
-       gpio_direction_input(gpio_cd);
-
-       err = request_irq(gpio_to_irq(gpio_cd), littleton_detect_int,
-                         IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
-                         "mmc card detect", data);
-       if (err) {
-               dev_err(dev, "failed to request card detect IRQ\n");
-               goto err_request_irq;
-       }
-       return 0;
-
-err_request_irq:
-       gpio_free(gpio_cd);
-err_request_cd:
-       return err;
-}
-
-static void littleton_mci_exit(struct device *dev, void *data)
-{
-       int gpio_cd = GPIO_MMC1_CARD_DETECT;
-
-       free_irq(gpio_to_irq(gpio_cd), data);
-       gpio_free(gpio_cd);
-}
-
 static struct pxamci_platform_data littleton_mci_platform_data = {
-       .detect_delay   = 20,
-       .ocr_mask       = MMC_VDD_32_33 | MMC_VDD_33_34,
-       .init           = littleton_mci_init,
-       .exit           = littleton_mci_exit,
+       .detect_delay           = 20,
+       .ocr_mask               = MMC_VDD_32_33 | MMC_VDD_33_34,
+       .gpio_card_detect       = GPIO_MMC1_CARD_DETECT,
+       .gpio_card_ro           = -1,
+       .gpio_power             = -1,
 };
 
 static void __init littleton_init_mmc(void)
index f04c833..c6a94d3 100644 (file)
@@ -482,11 +482,14 @@ static void lubbock_mci_exit(struct device *dev, void *data)
 }
 
 static struct pxamci_platform_data lubbock_mci_platform_data = {
-       .ocr_mask       = MMC_VDD_32_33|MMC_VDD_33_34,
-       .detect_delay   = 1,
-       .init           = lubbock_mci_init,
-       .get_ro         = lubbock_mci_get_ro,
-       .exit           = lubbock_mci_exit,
+       .ocr_mask               = MMC_VDD_32_33|MMC_VDD_33_34,
+       .detect_delay           = 1,
+       .init                   = lubbock_mci_init,
+       .get_ro                 = lubbock_mci_get_ro,
+       .exit                   = lubbock_mci_exit,
+       .gpio_card_detect       = -1,
+       .gpio_card_ro           = -1,
+       .gpio_power             = -1,
 };
 
 static void lubbock_irda_transceiver_mode(struct device *dev, int mode)
@@ -504,8 +507,9 @@ static void lubbock_irda_transceiver_mode(struct device *dev, int mode)
 }
 
 static struct pxaficp_platform_data lubbock_ficp_platform_data = {
-       .transceiver_cap  = IR_SIRMODE | IR_FIRMODE,
-       .transceiver_mode = lubbock_irda_transceiver_mode,
+       .gpio_pwdown            = -1,
+       .transceiver_cap        = IR_SIRMODE | IR_FIRMODE,
+       .transceiver_mode       = lubbock_irda_transceiver_mode,
 };
 
 static void __init lubbock_init(void)
index ca39669..5360c07 100644 (file)
@@ -140,15 +140,9 @@ static unsigned long magician_pin_config[] __initdata = {
  * IRDA
  */
 
-static void magician_irda_transceiver_mode(struct device *dev, int mode)
-{
-       gpio_set_value(GPIO83_MAGICIAN_nIR_EN, mode & IR_OFF);
-       pxa2xx_transceiver_mode(dev, mode);
-}
-
 static struct pxaficp_platform_data magician_ficp_info = {
-       .transceiver_cap  = IR_SIRMODE | IR_OFF,
-       .transceiver_mode = magician_irda_transceiver_mode,
+       .gpio_pwdown            = GPIO83_MAGICIAN_nIR_EN,
+       .transceiver_cap        = IR_SIRMODE | IR_OFF,
 };
 
 /*
@@ -651,55 +645,24 @@ static struct platform_device bq24022 = {
 static int magician_mci_init(struct device *dev,
                                irq_handler_t detect_irq, void *data)
 {
-       int err;
-
-       err = request_irq(IRQ_MAGICIAN_SD, detect_irq,
+       return request_irq(IRQ_MAGICIAN_SD, detect_irq,
                                IRQF_DISABLED | IRQF_SAMPLE_RANDOM,
-                               "MMC card detect", data);
-       if (err)
-               goto err_request_irq;
-       err = gpio_request(EGPIO_MAGICIAN_SD_POWER, "SD_POWER");
-       if (err)
-               goto err_request_power;
-       err = gpio_request(EGPIO_MAGICIAN_nSD_READONLY, "nSD_READONLY");
-       if (err)
-               goto err_request_readonly;
-
-       return 0;
-
-err_request_readonly:
-       gpio_free(EGPIO_MAGICIAN_SD_POWER);
-err_request_power:
-       free_irq(IRQ_MAGICIAN_SD, data);
-err_request_irq:
-       return err;
-}
-
-static void magician_mci_setpower(struct device *dev, unsigned int vdd)
-{
-       struct pxamci_platform_data *pdata = dev->platform_data;
-
-       gpio_set_value(EGPIO_MAGICIAN_SD_POWER, (1 << vdd) & pdata->ocr_mask);
-}
-
-static int magician_mci_get_ro(struct device *dev)
-{
-       return (!gpio_get_value(EGPIO_MAGICIAN_nSD_READONLY));
+                               "mmc card detect", data);
 }
 
 static void magician_mci_exit(struct device *dev, void *data)
 {
-       gpio_free(EGPIO_MAGICIAN_nSD_READONLY);
-       gpio_free(EGPIO_MAGICIAN_SD_POWER);
        free_irq(IRQ_MAGICIAN_SD, data);
 }
 
 static struct pxamci_platform_data magician_mci_info = {
-       .ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
-       .init     = magician_mci_init,
-       .get_ro   = magician_mci_get_ro,
-       .setpower = magician_mci_setpower,
-       .exit     = magician_mci_exit,
+       .ocr_mask               = MMC_VDD_32_33|MMC_VDD_33_34,
+       .init                   = magician_mci_init,
+       .exit                   = magician_mci_exit,
+       .gpio_card_detect       = -1,
+       .gpio_card_ro           = EGPIO_MAGICIAN_nSD_READONLY,
+       .gpio_card_ro_invert    = 1,
+       .gpio_power             = EGPIO_MAGICIAN_SD_POWER,
 };
 
 
index f4dabf0..a4eeae3 100644 (file)
@@ -450,10 +450,13 @@ static void mainstone_mci_exit(struct device *dev, void *data)
 }
 
 static struct pxamci_platform_data mainstone_mci_platform_data = {
-       .ocr_mask       = MMC_VDD_32_33|MMC_VDD_33_34,
-       .init           = mainstone_mci_init,
-       .setpower       = mainstone_mci_setpower,
-       .exit           = mainstone_mci_exit,
+       .ocr_mask               = MMC_VDD_32_33|MMC_VDD_33_34,
+       .init                   = mainstone_mci_init,
+       .setpower               = mainstone_mci_setpower,
+       .exit                   = mainstone_mci_exit,
+       .gpio_card_detect       = -1,
+       .gpio_card_ro           = -1,
+       .gpio_power             = -1,
 };
 
 static void mainstone_irda_transceiver_mode(struct device *dev, int mode)
@@ -476,8 +479,9 @@ static void mainstone_irda_transceiver_mode(struct device *dev, int mode)
 }
 
 static struct pxaficp_platform_data mainstone_ficp_platform_data = {
-       .transceiver_cap  = IR_SIRMODE | IR_FIRMODE | IR_OFF,
-       .transceiver_mode = mainstone_irda_transceiver_mode,
+       .gpio_pwdown            = -1,
+       .transceiver_cap        = IR_SIRMODE | IR_FIRMODE | IR_OFF,
+       .transceiver_mode       = mainstone_irda_transceiver_mode,
 };
 
 static struct gpio_keys_button gpio_keys_button[] = {
index 2d28132..3cab452 100644 (file)
@@ -434,72 +434,15 @@ struct gpio_vbus_mach_info gpio_vbus_data = {
 /*
  * SDIO/MMC Card controller
  */
-static void mci_setpower(struct device *dev, unsigned int vdd)
-{
-       struct pxamci_platform_data *p_d = dev->platform_data;
-
-       if ((1 << vdd) & p_d->ocr_mask)
-               gpio_set_value(GPIO91_SDIO_EN, 1);      /* enable SDIO power */
-       else
-               gpio_set_value(GPIO91_SDIO_EN, 0);      /* disable SDIO power */
-}
-
-static int mci_get_ro(struct device *dev)
-{
-       return gpio_get_value(GPIO78_SDIO_RO);
-}
-
-struct gpio_ress mci_gpios[] = {
-       MIO_GPIO_IN(GPIO78_SDIO_RO,     "SDIO readonly detect"),
-       MIO_GPIO_IN(GPIO15_SDIO_INSERT, "SDIO insertion detect"),
-       MIO_GPIO_OUT(GPIO91_SDIO_EN, 0, "SDIO power enable")
-};
-
-static void mci_exit(struct device *dev, void *data)
-{
-       mio_gpio_free(ARRAY_AND_SIZE(mci_gpios));
-       free_irq(gpio_to_irq(GPIO15_SDIO_INSERT), data);
-}
-
-static struct pxamci_platform_data mioa701_mci_info;
-
 /**
  * The card detect interrupt isn't debounced so we delay it by 250ms
  * to give the card a chance to fully insert/eject.
  */
-static int mci_init(struct device *dev, irq_handler_t detect_int, void *data)
-{
-       int rc;
-       int irq = gpio_to_irq(GPIO15_SDIO_INSERT);
-
-       rc = mio_gpio_request(ARRAY_AND_SIZE(mci_gpios));
-       if (rc)
-               goto err_gpio;
-       /* enable RE/FE interrupt on card insertion and removal */
-       rc = request_irq(irq, detect_int,
-                        IRQF_DISABLED | IRQF_TRIGGER_RISING |
-                        IRQF_TRIGGER_FALLING,
-                        "MMC card detect", data);
-       if (rc)
-               goto err_irq;
-
-       mioa701_mci_info.detect_delay = msecs_to_jiffies(250);
-       return 0;
-
-err_irq:
-       dev_err(dev, "mioa701_mci_init: MMC/SD:"
-               " can't request MMC card detect IRQ\n");
-       mio_gpio_free(ARRAY_AND_SIZE(mci_gpios));
-err_gpio:
-       return rc;
-}
-
 static struct pxamci_platform_data mioa701_mci_info = {
-       .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34,
-       .init     = mci_init,
-       .get_ro   = mci_get_ro,
-       .setpower = mci_setpower,
-       .exit     = mci_exit,
+       .ocr_mask               = MMC_VDD_32_33 | MMC_VDD_33_34,
+       .gpio_card_detect       = GPIO15_SDIO_INSERT,
+       .gpio_card_ro           = GPIO78_SDIO_RO,
+       .gpio_power             = GPIO91_SDIO_EN,
 };
 
 /* FlashRAM */
@@ -765,19 +708,20 @@ static struct i2c_board_info __initdata mioa701_pi2c_devices[] = {
        },
 };
 
-static struct soc_camera_link iclink = {
-       .bus_id = 0, /* Must match id in pxa27x_device_camera in device.c */
-};
-
 /* Board I2C devices. */
 static struct i2c_board_info __initdata mioa701_i2c_devices[] = {
        {
-               /* Must initialize before the camera(s) */
                I2C_BOARD_INFO("mt9m111", 0x5d),
-               .platform_data = &iclink,
        },
 };
 
+static struct soc_camera_link iclink = {
+       .bus_id         = 0, /* Match id in pxa27x_device_camera in device.c */
+       .board_info     = &mioa701_i2c_devices[0],
+       .i2c_adapter_id = 0,
+       .module_name    = "mt9m111",
+};
+
 struct i2c_pxa_platform_data i2c_pdata = {
        .fast_mode = 1,
 };
@@ -811,6 +755,7 @@ MIO_SIMPLE_DEV(pxa2xx_pcm,    "pxa2xx-pcm",     NULL)
 MIO_SIMPLE_DEV(mioa701_sound,    "mioa701-wm9713", NULL)
 MIO_SIMPLE_DEV(mioa701_board,    "mioa701-board",  NULL)
 MIO_SIMPLE_DEV(gpio_vbus,        "gpio-vbus",      &gpio_vbus_data);
+MIO_SIMPLE_DEV(mioa701_camera,   "soc-camera-pdrv",&iclink);
 
 static struct platform_device *devices[] __initdata = {
        &mioa701_gpio_keys,
@@ -821,6 +766,7 @@ static struct platform_device *devices[] __initdata = {
        &power_dev,
        &strataflash,
        &gpio_vbus,
+       &mioa701_camera,
        &mioa701_board,
 };
 
@@ -841,7 +787,7 @@ static void mioa701_restart(char c, const char *cmd)
 static struct gpio_ress global_gpios[] = {
        MIO_GPIO_OUT(GPIO9_CHARGE_EN, 1, "Charger enable"),
        MIO_GPIO_OUT(GPIO18_POWEROFF, 0, "Power Off"),
-       MIO_GPIO_OUT(GPIO87_LCD_POWER, 0, "LCD Power")
+       MIO_GPIO_OUT(GPIO87_LCD_POWER, 0, "LCD Power"),
 };
 
 static void __init mioa701_machine_init(void)
@@ -855,6 +801,7 @@ static void __init mioa701_machine_init(void)
        mio_gpio_request(ARRAY_AND_SIZE(global_gpios));
        bootstrap_init();
        set_pxa_fb_info(&mioa701_pxafb_info);
+       mioa701_mci_info.detect_delay = msecs_to_jiffies(250);
        pxa_set_mci_info(&mioa701_mci_info);
        pxa_set_keypad_info(&mioa701_keypad_info);
        wm97xx_bat_set_pdata(&mioa701_battery_data);
@@ -869,7 +816,6 @@ static void __init mioa701_machine_init(void)
        pxa_set_i2c_info(&i2c_pdata);
        pxa27x_set_i2c_power_info(NULL);
        pxa_set_camera_info(&mioa701_pxacamera_platform_data);
-       i2c_register_board_info(0, ARRAY_AND_SIZE(mioa701_i2c_devices));
 }
 
 static void mioa701_machine_exit(void)
index 169fcc1..1ad029d 100644 (file)
@@ -25,6 +25,9 @@
 #include <linux/wm97xx_batt.h>
 #include <linux/power_supply.h>
 #include <linux/sysdev.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/physmap.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -141,85 +144,50 @@ static unsigned long palmld_pin_config[] __initdata = {
 };
 
 /******************************************************************************
- * SD/MMC card controller
+ * NOR Flash
  ******************************************************************************/
-static int palmld_mci_init(struct device *dev, irq_handler_t palmld_detect_int,
-                               void *data)
-{
-       int err = 0;
-
-       /* Setup an interrupt for detecting card insert/remove events */
-       err = gpio_request(GPIO_NR_PALMLD_SD_DETECT_N, "SD IRQ");
-       if (err)
-               goto err;
-       err = gpio_direction_input(GPIO_NR_PALMLD_SD_DETECT_N);
-       if (err)
-               goto err2;
-       err = request_irq(gpio_to_irq(GPIO_NR_PALMLD_SD_DETECT_N),
-                       palmld_detect_int, IRQF_DISABLED | IRQF_SAMPLE_RANDOM |
-                       IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
-                       "SD/MMC card detect", data);
-       if (err) {
-               printk(KERN_ERR "%s: cannot request SD/MMC card detect IRQ\n",
-                               __func__);
-               goto err2;
+static struct mtd_partition palmld_partitions[] = {
+       {
+               .name           = "Flash",
+               .offset         = 0x00000000,
+               .size           = MTDPART_SIZ_FULL,
+               .mask_flags     = 0
        }
+};
 
-       err = gpio_request(GPIO_NR_PALMLD_SD_POWER, "SD_POWER");
-       if (err)
-               goto err3;
-       err = gpio_direction_output(GPIO_NR_PALMLD_SD_POWER, 0);
-       if (err)
-               goto err4;
-
-       err = gpio_request(GPIO_NR_PALMLD_SD_READONLY, "SD_READONLY");
-       if (err)
-               goto err4;
-       err = gpio_direction_input(GPIO_NR_PALMLD_SD_READONLY);
-       if (err)
-               goto err5;
-
-       printk(KERN_DEBUG "%s: irq registered\n", __func__);
-
-       return 0;
-
-err5:
-       gpio_free(GPIO_NR_PALMLD_SD_READONLY);
-err4:
-       gpio_free(GPIO_NR_PALMLD_SD_POWER);
-err3:
-       free_irq(gpio_to_irq(GPIO_NR_PALMLD_SD_DETECT_N), data);
-err2:
-       gpio_free(GPIO_NR_PALMLD_SD_DETECT_N);
-err:
-       return err;
-}
-
-static void palmld_mci_exit(struct device *dev, void *data)
-{
-       gpio_free(GPIO_NR_PALMLD_SD_READONLY);
-       gpio_free(GPIO_NR_PALMLD_SD_POWER);
-       free_irq(gpio_to_irq(GPIO_NR_PALMLD_SD_DETECT_N), data);
-       gpio_free(GPIO_NR_PALMLD_SD_DETECT_N);
-}
+static struct physmap_flash_data palmld_flash_data[] = {
+       {
+               .width          = 2,                    /* bankwidth in bytes */
+               .parts          = palmld_partitions,
+               .nr_parts       = ARRAY_SIZE(palmld_partitions)
+       }
+};
 
-static void palmld_mci_power(struct device *dev, unsigned int vdd)
-{
-       struct pxamci_platform_data *p_d = dev->platform_data;
-       gpio_set_value(GPIO_NR_PALMLD_SD_POWER, p_d->ocr_mask & (1 << vdd));
-}
+static struct resource palmld_flash_resource = {
+       .start  = PXA_CS0_PHYS,
+       .end    = PXA_CS0_PHYS + SZ_4M - 1,
+       .flags  = IORESOURCE_MEM,
+};
 
-static int palmld_mci_get_ro(struct device *dev)
-{
-       return gpio_get_value(GPIO_NR_PALMLD_SD_READONLY);
-}
+static struct platform_device palmld_flash = {
+       .name           = "physmap-flash",
+       .id             = 0,
+       .resource       = &palmld_flash_resource,
+       .num_resources  = 1,
+       .dev            = {
+               .platform_data = palmld_flash_data,
+       },
+};
 
+/******************************************************************************
+ * SD/MMC card controller
+ ******************************************************************************/
 static struct pxamci_platform_data palmld_mci_platform_data = {
-       .ocr_mask       = MMC_VDD_32_33 | MMC_VDD_33_34,
-       .setpower       = palmld_mci_power,
-       .get_ro         = palmld_mci_get_ro,
-       .init           = palmld_mci_init,
-       .exit           = palmld_mci_exit,
+       .ocr_mask               = MMC_VDD_32_33 | MMC_VDD_33_34,
+       .gpio_card_detect       = GPIO_NR_PALMLD_SD_DETECT_N,
+       .gpio_card_ro           = GPIO_NR_PALMLD_SD_READONLY,
+       .gpio_power             = GPIO_NR_PALMLD_SD_POWER,
+       .detect_delay           = 20,
 };
 
 /******************************************************************************
@@ -336,35 +304,9 @@ static struct platform_device palmld_backlight = {
 /******************************************************************************
  * IrDA
  ******************************************************************************/
-static int palmld_irda_startup(struct device *dev)
-{
-       int err;
-       err = gpio_request(GPIO_NR_PALMLD_IR_DISABLE, "IR DISABLE");
-       if (err)
-               goto err;
-       err = gpio_direction_output(GPIO_NR_PALMLD_IR_DISABLE, 1);
-       if (err)
-               gpio_free(GPIO_NR_PALMLD_IR_DISABLE);
-err:
-       return err;
-}
-
-static void palmld_irda_shutdown(struct device *dev)
-{
-       gpio_free(GPIO_NR_PALMLD_IR_DISABLE);
-}
-
-static void palmld_irda_transceiver_mode(struct device *dev, int mode)
-{
-       gpio_set_value(GPIO_NR_PALMLD_IR_DISABLE, mode & IR_OFF);
-       pxa2xx_transceiver_mode(dev, mode);
-}
-
 static struct pxaficp_platform_data palmld_ficp_platform_data = {
-       .startup                = palmld_irda_startup,
-       .shutdown               = palmld_irda_shutdown,
-       .transceiver_cap        = IR_SIRMODE | IR_FIRMODE | IR_OFF,
-       .transceiver_mode       = palmld_irda_transceiver_mode,
+       .gpio_pwdown            = GPIO_NR_PALMLD_IR_DISABLE,
+       .transceiver_cap        = IR_SIRMODE | IR_OFF,
 };
 
 /******************************************************************************
@@ -560,6 +502,7 @@ static struct platform_device *devices[] __initdata = {
        &power_supply,
        &palmld_asoc,
        &palmld_hdd,
+       &palmld_flash,
 };
 
 static struct map_desc palmld_io_desc[] __initdata = {
index 33f726f..2dd7ce2 100644 (file)
@@ -124,83 +124,12 @@ static unsigned long palmt5_pin_config[] __initdata = {
 /******************************************************************************
  * SD/MMC card controller
  ******************************************************************************/
-static int palmt5_mci_init(struct device *dev, irq_handler_t palmt5_detect_int,
-                               void *data)
-{
-       int err = 0;
-
-       /* Setup an interrupt for detecting card insert/remove events */
-       err = gpio_request(GPIO_NR_PALMT5_SD_DETECT_N, "SD IRQ");
-       if (err)
-               goto err;
-       err = gpio_direction_input(GPIO_NR_PALMT5_SD_DETECT_N);
-       if (err)
-               goto err2;
-       err = request_irq(gpio_to_irq(GPIO_NR_PALMT5_SD_DETECT_N),
-                       palmt5_detect_int, IRQF_DISABLED | IRQF_SAMPLE_RANDOM |
-                       IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
-                       "SD/MMC card detect", data);
-       if (err) {
-               printk(KERN_ERR "%s: cannot request SD/MMC card detect IRQ\n",
-                               __func__);
-               goto err2;
-       }
-
-       err = gpio_request(GPIO_NR_PALMT5_SD_POWER, "SD_POWER");
-       if (err)
-               goto err3;
-       err = gpio_direction_output(GPIO_NR_PALMT5_SD_POWER, 0);
-       if (err)
-               goto err4;
-
-       err = gpio_request(GPIO_NR_PALMT5_SD_READONLY, "SD_READONLY");
-       if (err)
-               goto err4;
-       err = gpio_direction_input(GPIO_NR_PALMT5_SD_READONLY);
-       if (err)
-               goto err5;
-
-       printk(KERN_DEBUG "%s: irq registered\n", __func__);
-
-       return 0;
-
-err5:
-       gpio_free(GPIO_NR_PALMT5_SD_READONLY);
-err4:
-       gpio_free(GPIO_NR_PALMT5_SD_POWER);
-err3:
-       free_irq(gpio_to_irq(GPIO_NR_PALMT5_SD_DETECT_N), data);
-err2:
-       gpio_free(GPIO_NR_PALMT5_SD_DETECT_N);
-err:
-       return err;
-}
-
-static void palmt5_mci_exit(struct device *dev, void *data)
-{
-       gpio_free(GPIO_NR_PALMT5_SD_READONLY);
-       gpio_free(GPIO_NR_PALMT5_SD_POWER);
-       free_irq(IRQ_GPIO_PALMT5_SD_DETECT_N, data);
-       gpio_free(GPIO_NR_PALMT5_SD_DETECT_N);
-}
-
-static void palmt5_mci_power(struct device *dev, unsigned int vdd)
-{
-       struct pxamci_platform_data *p_d = dev->platform_data;
-       gpio_set_value(GPIO_NR_PALMT5_SD_POWER, p_d->ocr_mask & (1 << vdd));
-}
-
-static int palmt5_mci_get_ro(struct device *dev)
-{
-       return gpio_get_value(GPIO_NR_PALMT5_SD_READONLY);
-}
-
 static struct pxamci_platform_data palmt5_mci_platform_data = {
-       .ocr_mask       = MMC_VDD_32_33 | MMC_VDD_33_34,
-       .setpower       = palmt5_mci_power,
-       .get_ro         = palmt5_mci_get_ro,
-       .init           = palmt5_mci_init,
-       .exit           = palmt5_mci_exit,
+       .ocr_mask               = MMC_VDD_32_33 | MMC_VDD_33_34,
+       .gpio_card_detect       = GPIO_NR_PALMT5_SD_DETECT_N,
+       .gpio_card_ro           = GPIO_NR_PALMT5_SD_READONLY,
+       .gpio_power             = GPIO_NR_PALMT5_SD_POWER,
+       .detect_delay           = 20,
 };
 
 /******************************************************************************
@@ -314,35 +243,9 @@ static struct platform_device palmt5_backlight = {
 /******************************************************************************
  * IrDA
  ******************************************************************************/
-static int palmt5_irda_startup(struct device *dev)
-{
-       int err;
-       err = gpio_request(GPIO_NR_PALMT5_IR_DISABLE, "IR DISABLE");
-       if (err)
-               goto err;
-       err = gpio_direction_output(GPIO_NR_PALMT5_IR_DISABLE, 1);
-       if (err)
-               gpio_free(GPIO_NR_PALMT5_IR_DISABLE);
-err:
-       return err;
-}
-
-static void palmt5_irda_shutdown(struct device *dev)
-{
-       gpio_free(GPIO_NR_PALMT5_IR_DISABLE);
-}
-
-static void palmt5_irda_transceiver_mode(struct device *dev, int mode)
-{
-       gpio_set_value(GPIO_NR_PALMT5_IR_DISABLE, mode & IR_OFF);
-       pxa2xx_transceiver_mode(dev, mode);
-}
-
 static struct pxaficp_platform_data palmt5_ficp_platform_data = {
-       .startup                = palmt5_irda_startup,
-       .shutdown               = palmt5_irda_shutdown,
-       .transceiver_cap        = IR_SIRMODE | IR_FIRMODE | IR_OFF,
-       .transceiver_mode       = palmt5_irda_transceiver_mode,
+       .gpio_pwdown            = GPIO_NR_PALMT5_IR_DISABLE,
+       .transceiver_cap        = IR_SIRMODE | IR_OFF,
 };
 
 /******************************************************************************
diff --git a/arch/arm/mach-pxa/palmtc.c b/arch/arm/mach-pxa/palmtc.c
new file mode 100644 (file)
index 0000000..bb2cc0d
--- /dev/null
@@ -0,0 +1,436 @@
+/*
+ * linux/arch/arm/mach-pxa/palmtc.c
+ *
+ * Support for the Palm Tungsten|C
+ *
+ * Author:     Marek Vasut <marek.vasut@gmail.com>
+ *
+ * Based on work of:
+ *             Petr Blaha <p3t3@centrum.cz>
+ *             Chetan S. Kumar <shivakumar.chetan@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/input.h>
+#include <linux/pwm_backlight.h>
+#include <linux/gpio.h>
+#include <linux/input/matrix_keypad.h>
+#include <linux/ucb1400.h>
+#include <linux/power_supply.h>
+#include <linux/gpio_keys.h>
+#include <linux/mtd/physmap.h>
+
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+
+#include <mach/audio.h>
+#include <mach/palmtc.h>
+#include <mach/mmc.h>
+#include <mach/pxafb.h>
+#include <mach/mfp-pxa25x.h>
+#include <mach/irda.h>
+#include <mach/udc.h>
+#include <mach/pxa2xx-regs.h>
+
+#include "generic.h"
+#include "devices.h"
+
+/******************************************************************************
+ * Pin configuration
+ ******************************************************************************/
+static unsigned long palmtc_pin_config[] __initdata = {
+       /* MMC */
+       GPIO6_MMC_CLK,
+       GPIO8_MMC_CS0,
+       GPIO12_GPIO,    /* detect */
+       GPIO32_GPIO,    /* power */
+       GPIO54_GPIO,    /* r/o switch */
+
+       /* PCMCIA */
+       GPIO52_nPCE_1,
+       GPIO53_nPCE_2,
+       GPIO50_nPIOR,
+       GPIO51_nPIOW,
+       GPIO49_nPWE,
+       GPIO48_nPOE,
+       GPIO52_nPCE_1,
+       GPIO53_nPCE_2,
+       GPIO57_nIOIS16,
+       GPIO56_nPWAIT,
+
+       /* AC97 */
+       GPIO28_AC97_BITCLK,
+       GPIO29_AC97_SDATA_IN_0,
+       GPIO30_AC97_SDATA_OUT,
+       GPIO31_AC97_SYNC,
+
+       /* IrDA */
+       GPIO45_GPIO,    /* ir disable */
+       GPIO46_FICP_RXD,
+       GPIO47_FICP_TXD,
+
+       /* PWM */
+       GPIO17_PWM1_OUT,
+
+       /* USB */
+       GPIO4_GPIO,     /* detect */
+       GPIO36_GPIO,    /* pullup */
+
+       /* LCD */
+       GPIO58_LCD_LDD_0,
+       GPIO59_LCD_LDD_1,
+       GPIO60_LCD_LDD_2,
+       GPIO61_LCD_LDD_3,
+       GPIO62_LCD_LDD_4,
+       GPIO63_LCD_LDD_5,
+       GPIO64_LCD_LDD_6,
+       GPIO65_LCD_LDD_7,
+       GPIO66_LCD_LDD_8,
+       GPIO67_LCD_LDD_9,
+       GPIO68_LCD_LDD_10,
+       GPIO69_LCD_LDD_11,
+       GPIO70_LCD_LDD_12,
+       GPIO71_LCD_LDD_13,
+       GPIO72_LCD_LDD_14,
+       GPIO73_LCD_LDD_15,
+       GPIO74_LCD_FCLK,
+       GPIO75_LCD_LCLK,
+       GPIO76_LCD_PCLK,
+       GPIO77_LCD_BIAS,
+
+       /* MATRIX KEYPAD */
+       GPIO0_GPIO | WAKEUP_ON_EDGE_BOTH,       /* in 0 */
+       GPIO9_GPIO | WAKEUP_ON_EDGE_BOTH,       /* in 1 */
+       GPIO10_GPIO | WAKEUP_ON_EDGE_BOTH,      /* in 2 */
+       GPIO11_GPIO | WAKEUP_ON_EDGE_BOTH,      /* in 3 */
+       GPIO18_GPIO | MFP_LPM_DRIVE_LOW,        /* out 0 */
+       GPIO19_GPIO | MFP_LPM_DRIVE_LOW,        /* out 1 */
+       GPIO20_GPIO | MFP_LPM_DRIVE_LOW,        /* out 2 */
+       GPIO21_GPIO | MFP_LPM_DRIVE_LOW,        /* out 3 */
+       GPIO22_GPIO | MFP_LPM_DRIVE_LOW,        /* out 4 */
+       GPIO23_GPIO | MFP_LPM_DRIVE_LOW,        /* out 5 */
+       GPIO24_GPIO | MFP_LPM_DRIVE_LOW,        /* out 6 */
+       GPIO25_GPIO | MFP_LPM_DRIVE_LOW,        /* out 7 */
+       GPIO26_GPIO | MFP_LPM_DRIVE_LOW,        /* out 8 */
+       GPIO27_GPIO | MFP_LPM_DRIVE_LOW,        /* out 9 */
+       GPIO79_GPIO | MFP_LPM_DRIVE_LOW,        /* out 10 */
+       GPIO80_GPIO | MFP_LPM_DRIVE_LOW,        /* out 11 */
+
+       /* PXA GPIO KEYS */
+       GPIO7_GPIO | WAKEUP_ON_EDGE_BOTH,       /* hotsync button on cradle */
+
+       /* MISC */
+       GPIO1_RST,      /* reset */
+       GPIO2_GPIO,     /* earphone detect */
+       GPIO16_GPIO,    /* backlight switch */
+};
+
+/******************************************************************************
+ * SD/MMC card controller
+ ******************************************************************************/
+static struct pxamci_platform_data palmtc_mci_platform_data = {
+       .ocr_mask               = MMC_VDD_32_33 | MMC_VDD_33_34,
+       .gpio_power             = GPIO_NR_PALMTC_SD_POWER,
+       .gpio_card_ro           = GPIO_NR_PALMTC_SD_READONLY,
+       .gpio_card_detect       = GPIO_NR_PALMTC_SD_DETECT_N,
+       .detect_delay           = 20,
+};
+
+/******************************************************************************
+ * GPIO keys
+ ******************************************************************************/
+static struct gpio_keys_button palmtc_pxa_buttons[] = {
+       {KEY_F8, GPIO_NR_PALMTC_HOTSYNC_BUTTON, 1, "HotSync Button", EV_KEY, 1},
+};
+
+static struct gpio_keys_platform_data palmtc_pxa_keys_data = {
+       .buttons        = palmtc_pxa_buttons,
+       .nbuttons       = ARRAY_SIZE(palmtc_pxa_buttons),
+};
+
+static struct platform_device palmtc_pxa_keys = {
+       .name   = "gpio-keys",
+       .id     = -1,
+       .dev    = {
+               .platform_data = &palmtc_pxa_keys_data,
+       },
+};
+
+/******************************************************************************
+ * Backlight
+ ******************************************************************************/
+static int palmtc_backlight_init(struct device *dev)
+{
+       int ret;
+
+       ret = gpio_request(GPIO_NR_PALMTC_BL_POWER, "BL POWER");
+       if (ret)
+               goto err;
+       ret = gpio_direction_output(GPIO_NR_PALMTC_BL_POWER, 1);
+       if (ret)
+               goto err2;
+
+       return 0;
+
+err2:
+       gpio_free(GPIO_NR_PALMTC_BL_POWER);
+err:
+       return ret;
+}
+
+static int palmtc_backlight_notify(int brightness)
+{
+       /* backlight is on when GPIO16 AF0 is high */
+       gpio_set_value(GPIO_NR_PALMTC_BL_POWER, brightness);
+       return brightness;
+}
+
+static void palmtc_backlight_exit(struct device *dev)
+{
+       gpio_free(GPIO_NR_PALMTC_BL_POWER);
+}
+
+static struct platform_pwm_backlight_data palmtc_backlight_data = {
+       .pwm_id         = 1,
+       .max_brightness = PALMTC_MAX_INTENSITY,
+       .dft_brightness = PALMTC_MAX_INTENSITY,
+       .pwm_period_ns  = PALMTC_PERIOD_NS,
+       .init           = palmtc_backlight_init,
+       .notify         = palmtc_backlight_notify,
+       .exit           = palmtc_backlight_exit,
+};
+
+static struct platform_device palmtc_backlight = {
+       .name   = "pwm-backlight",
+       .dev    = {
+               .parent         = &pxa25x_device_pwm1.dev,
+               .platform_data  = &palmtc_backlight_data,
+       },
+};
+
+/******************************************************************************
+ * IrDA
+ ******************************************************************************/
+static struct pxaficp_platform_data palmtc_ficp_platform_data = {
+       .gpio_pwdown            = GPIO_NR_PALMTC_IR_DISABLE,
+       .transceiver_cap        = IR_SIRMODE | IR_OFF,
+};
+
+/******************************************************************************
+ * Keyboard
+ ******************************************************************************/
+static const uint32_t palmtc_matrix_keys[] = {
+       KEY(0, 0, KEY_F1),
+       KEY(0, 1, KEY_X),
+       KEY(0, 2, KEY_POWER),
+       KEY(0, 3, KEY_TAB),
+       KEY(0, 4, KEY_A),
+       KEY(0, 5, KEY_Q),
+       KEY(0, 6, KEY_LEFTSHIFT),
+       KEY(0, 7, KEY_Z),
+       KEY(0, 8, KEY_S),
+       KEY(0, 9, KEY_W),
+       KEY(0, 10, KEY_E),
+       KEY(0, 11, KEY_UP),
+
+       KEY(1, 0, KEY_F2),
+       KEY(1, 1, KEY_DOWN),
+       KEY(1, 3, KEY_D),
+       KEY(1, 4, KEY_C),
+       KEY(1, 5, KEY_F),
+       KEY(1, 6, KEY_R),
+       KEY(1, 7, KEY_SPACE),
+       KEY(1, 8, KEY_V),
+       KEY(1, 9, KEY_G),
+       KEY(1, 10, KEY_T),
+       KEY(1, 11, KEY_LEFT),
+
+       KEY(2, 0, KEY_F3),
+       KEY(2, 1, KEY_LEFTCTRL),
+       KEY(2, 3, KEY_H),
+       KEY(2, 4, KEY_Y),
+       KEY(2, 5, KEY_N),
+       KEY(2, 6, KEY_J),
+       KEY(2, 7, KEY_U),
+       KEY(2, 8, KEY_M),
+       KEY(2, 9, KEY_K),
+       KEY(2, 10, KEY_I),
+       KEY(2, 11, KEY_RIGHT),
+
+       KEY(3, 0, KEY_F4),
+       KEY(3, 1, KEY_ENTER),
+       KEY(3, 3, KEY_DOT),
+       KEY(3, 4, KEY_L),
+       KEY(3, 5, KEY_O),
+       KEY(3, 6, KEY_LEFTALT),
+       KEY(3, 7, KEY_ENTER),
+       KEY(3, 8, KEY_BACKSPACE),
+       KEY(3, 9, KEY_P),
+       KEY(3, 10, KEY_B),
+       KEY(3, 11, KEY_FN),
+};
+
+const struct matrix_keymap_data palmtc_keymap_data = {
+       .keymap                 = palmtc_matrix_keys,
+       .keymap_size            = ARRAY_SIZE(palmtc_matrix_keys),
+};
+
+const static unsigned int palmtc_keypad_row_gpios[] = {
+       0, 9, 10, 11
+};
+
+const static unsigned int palmtc_keypad_col_gpios[] = {
+       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 79, 80
+};
+
+static struct matrix_keypad_platform_data palmtc_keypad_platform_data = {
+       .keymap_data    = &palmtc_keymap_data,
+       .col_gpios      = palmtc_keypad_row_gpios,
+       .num_col_gpios  = 12,
+       .row_gpios      = palmtc_keypad_col_gpios,
+       .num_row_gpios  = 4,
+       .active_low     = 1,
+
+       .debounce_ms            = 20,
+       .col_scan_delay_us      = 5,
+};
+
+static struct platform_device palmtc_keyboard = {
+       .name   = "matrix-keypad",
+       .id     = -1,
+       .dev    = {
+               .platform_data = &palmtc_keypad_platform_data,
+       },
+};
+
+/******************************************************************************
+ * UDC
+ ******************************************************************************/
+static struct pxa2xx_udc_mach_info palmtc_udc_info __initdata = {
+       .gpio_vbus              = GPIO_NR_PALMTC_USB_DETECT_N,
+       .gpio_vbus_inverted     = 1,
+       .gpio_pullup            = GPIO_NR_PALMTC_USB_POWER,
+};
+
+/******************************************************************************
+ * Touchscreen / Battery / GPIO-extender
+ ******************************************************************************/
+static struct platform_device palmtc_ucb1400_core = {
+       .name   = "ucb1400_core",
+       .id     = -1,
+};
+
+/******************************************************************************
+ * NOR Flash
+ ******************************************************************************/
+static struct resource palmtc_flash_resource = {
+       .start  = PXA_CS0_PHYS,
+       .end    = PXA_CS0_PHYS + SZ_16M - 1,
+       .flags  = IORESOURCE_MEM,
+};
+
+static struct mtd_partition palmtc_flash_parts[] = {
+       {
+               .name   = "U-Boot Bootloader",
+               .offset = 0x0,
+               .size   = 0x40000,
+       },
+       {
+               .name   = "Linux Kernel",
+               .offset = 0x40000,
+               .size   = 0x2c0000,
+       },
+       {
+               .name   = "Filesystem",
+               .offset = 0x300000,
+               .size   = 0xcc0000,
+       },
+       {
+               .name   = "U-Boot Environment",
+               .offset = 0xfc0000,
+               .size   = MTDPART_SIZ_FULL,
+       },
+};
+
+static struct physmap_flash_data palmtc_flash_data = {
+       .width          = 4,
+       .parts          = palmtc_flash_parts,
+       .nr_parts       = ARRAY_SIZE(palmtc_flash_parts),
+};
+
+static struct platform_device palmtc_flash = {
+       .name           = "physmap-flash",
+       .id             = -1,
+       .resource       = &palmtc_flash_resource,
+       .num_resources  = 1,
+       .dev = {
+               .platform_data  = &palmtc_flash_data,
+       },
+};
+
+/******************************************************************************
+ * Framebuffer
+ ******************************************************************************/
+static struct pxafb_mode_info palmtc_lcd_modes[] = {
+{
+       .pixclock       = 115384,
+       .xres           = 320,
+       .yres           = 320,
+       .bpp            = 16,
+
+       .left_margin    = 27,
+       .right_margin   = 7,
+       .upper_margin   = 7,
+       .lower_margin   = 8,
+
+       .hsync_len      = 6,
+       .vsync_len      = 1,
+},
+};
+
+static struct pxafb_mach_info palmtc_lcd_screen = {
+       .modes                  = palmtc_lcd_modes,
+       .num_modes              = ARRAY_SIZE(palmtc_lcd_modes),
+       .lcd_conn               = LCD_COLOR_TFT_16BPP | LCD_PCLK_EDGE_FALL,
+};
+
+/******************************************************************************
+ * Machine init
+ ******************************************************************************/
+static struct platform_device *devices[] __initdata = {
+       &palmtc_backlight,
+       &palmtc_ucb1400_core,
+       &palmtc_keyboard,
+       &palmtc_pxa_keys,
+       &palmtc_flash,
+};
+
+static void __init palmtc_init(void)
+{
+       pxa2xx_mfp_config(ARRAY_AND_SIZE(palmtc_pin_config));
+
+       set_pxa_fb_info(&palmtc_lcd_screen);
+       pxa_set_mci_info(&palmtc_mci_platform_data);
+       pxa_set_udc_info(&palmtc_udc_info);
+       pxa_set_ac97_info(NULL);
+       pxa_set_ficp_info(&palmtc_ficp_platform_data);
+
+       platform_add_devices(devices, ARRAY_SIZE(devices));
+};
+
+MACHINE_START(PALMTC, "Palm Tungsten|C")
+       .phys_io        = 0x40000000,
+       .boot_params    = 0xa0000100,
+       .io_pg_offst    = (io_p2v(0x40000000) >> 18) & 0xfffc,
+       .map_io         = pxa_map_io,
+       .init_irq       = pxa25x_init_irq,
+       .timer          = &pxa_timer,
+       .init_machine   = palmtc_init
+MACHINE_END
index d823b09..277c406 100644 (file)
@@ -117,83 +117,11 @@ static unsigned long palmte2_pin_config[] __initdata = {
 /******************************************************************************
  * SD/MMC card controller
  ******************************************************************************/
-static int palmte2_mci_init(struct device *dev,
-                               irq_handler_t palmte2_detect_int, void *data)
-{
-       int err = 0;
-
-       /* Setup an interrupt for detecting card insert/remove events */
-       err = gpio_request(GPIO_NR_PALMTE2_SD_DETECT_N, "SD IRQ");
-       if (err)
-               goto err;
-       err = gpio_direction_input(GPIO_NR_PALMTE2_SD_DETECT_N);
-       if (err)
-               goto err2;
-       err = request_irq(gpio_to_irq(GPIO_NR_PALMTE2_SD_DETECT_N),
-                       palmte2_detect_int, IRQF_DISABLED | IRQF_SAMPLE_RANDOM |
-                       IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
-                       "SD/MMC card detect", data);
-       if (err) {
-               printk(KERN_ERR "%s: cannot request SD/MMC card detect IRQ\n",
-                               __func__);
-               goto err2;
-       }
-
-       err = gpio_request(GPIO_NR_PALMTE2_SD_POWER, "SD_POWER");
-       if (err)
-               goto err3;
-       err = gpio_direction_output(GPIO_NR_PALMTE2_SD_POWER, 0);
-       if (err)
-               goto err4;
-
-       err = gpio_request(GPIO_NR_PALMTE2_SD_READONLY, "SD_READONLY");
-       if (err)
-               goto err4;
-       err = gpio_direction_input(GPIO_NR_PALMTE2_SD_READONLY);
-       if (err)
-               goto err5;
-
-       printk(KERN_DEBUG "%s: irq registered\n", __func__);
-
-       return 0;
-
-err5:
-       gpio_free(GPIO_NR_PALMTE2_SD_READONLY);
-err4:
-       gpio_free(GPIO_NR_PALMTE2_SD_POWER);
-err3:
-       free_irq(gpio_to_irq(GPIO_NR_PALMTE2_SD_DETECT_N), data);
-err2:
-       gpio_free(GPIO_NR_PALMTE2_SD_DETECT_N);
-err:
-       return err;
-}
-
-static void palmte2_mci_exit(struct device *dev, void *data)
-{
-       gpio_free(GPIO_NR_PALMTE2_SD_READONLY);
-       gpio_free(GPIO_NR_PALMTE2_SD_POWER);
-       free_irq(gpio_to_irq(GPIO_NR_PALMTE2_SD_DETECT_N), data);
-       gpio_free(GPIO_NR_PALMTE2_SD_DETECT_N);
-}
-
-static void palmte2_mci_power(struct device *dev, unsigned int vdd)
-{
-       struct pxamci_platform_data *p_d = dev->platform_data;
-       gpio_set_value(GPIO_NR_PALMTE2_SD_POWER, p_d->ocr_mask & (1 << vdd));
-}
-
-static int palmte2_mci_get_ro(struct device *dev)
-{
-       return gpio_get_value(GPIO_NR_PALMTE2_SD_READONLY);
-}
-
 static struct pxamci_platform_data palmte2_mci_platform_data = {
-       .ocr_mask       = MMC_VDD_32_33 | MMC_VDD_33_34,
-       .setpower       = palmte2_mci_power,
-       .get_ro         = palmte2_mci_get_ro,
-       .init           = palmte2_mci_init,
-       .exit           = palmte2_mci_exit,
+       .ocr_mask               = MMC_VDD_32_33 | MMC_VDD_33_34,
+       .gpio_card_detect       = GPIO_NR_PALMTE2_SD_DETECT_N,
+       .gpio_card_ro           = GPIO_NR_PALMTE2_SD_READONLY,
+       .gpio_power             = GPIO_NR_PALMTE2_SD_POWER,
 };
 
 /******************************************************************************
@@ -287,35 +215,9 @@ static struct platform_device palmte2_backlight = {
 /******************************************************************************
  * IrDA
  ******************************************************************************/
-static int palmte2_irda_startup(struct device *dev)
-{
-       int err;
-       err = gpio_request(GPIO_NR_PALMTE2_IR_DISABLE, "IR DISABLE");
-       if (err)
-               goto err;
-       err = gpio_direction_output(GPIO_NR_PALMTE2_IR_DISABLE, 1);
-       if (err)
-               gpio_free(GPIO_NR_PALMTE2_IR_DISABLE);
-err:
-       return err;
-}
-
-static void palmte2_irda_shutdown(struct device *dev)
-{
-       gpio_free(GPIO_NR_PALMTE2_IR_DISABLE);
-}
-
-static void palmte2_irda_transceiver_mode(struct device *dev, int mode)
-{
-       gpio_set_value(GPIO_NR_PALMTE2_IR_DISABLE, mode & IR_OFF);
-       pxa2xx_transceiver_mode(dev, mode);
-}
-
 static struct pxaficp_platform_data palmte2_ficp_platform_data = {
-       .startup                = palmte2_irda_startup,
-       .shutdown               = palmte2_irda_shutdown,
-       .transceiver_cap        = IR_SIRMODE | IR_FIRMODE | IR_OFF,
-       .transceiver_mode       = palmte2_irda_transceiver_mode,
+       .gpio_pwdown            = GPIO_NR_PALMTE2_IR_DISABLE,
+       .transceiver_cap        = IR_SIRMODE | IR_OFF,
 };
 
 /******************************************************************************
index 83d0208..76a2b37 100644 (file)
 #include <linux/wm97xx_batt.h>
 #include <linux/power_supply.h>
 #include <linux/usb/gpio_vbus.h>
+#include <linux/mtd/nand.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/physmap.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -131,6 +135,10 @@ static unsigned long palmtx_pin_config[] __initdata = {
        GPIO34_FFUART_RXD,
        GPIO39_FFUART_TXD,
 
+       /* NAND */
+       GPIO15_nCS_1,
+       GPIO18_RDY,
+
        /* MISC. */
        GPIO10_GPIO,    /* hotsync button */
        GPIO12_GPIO,    /* power detect */
@@ -138,85 +146,50 @@ static unsigned long palmtx_pin_config[] __initdata = {
 };
 
 /******************************************************************************
- * SD/MMC card controller
+ * NOR Flash
  ******************************************************************************/
-static int palmtx_mci_init(struct device *dev, irq_handler_t palmtx_detect_int,
-                               void *data)
-{
-       int err = 0;
-
-       /* Setup an interrupt for detecting card insert/remove events */
-       err = gpio_request(GPIO_NR_PALMTX_SD_DETECT_N, "SD IRQ");
-       if (err)
-               goto err;
-       err = gpio_direction_input(GPIO_NR_PALMTX_SD_DETECT_N);
-       if (err)
-               goto err2;
-       err = request_irq(gpio_to_irq(GPIO_NR_PALMTX_SD_DETECT_N),
-                       palmtx_detect_int, IRQF_DISABLED | IRQF_SAMPLE_RANDOM |
-                       IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
-                       "SD/MMC card detect", data);
-       if (err) {
-               printk(KERN_ERR "%s: cannot request SD/MMC card detect IRQ\n",
-                               __func__);
-               goto err2;
+static struct mtd_partition palmtx_partitions[] = {
+       {
+               .name           = "Flash",
+               .offset         = 0x00000000,
+               .size           = MTDPART_SIZ_FULL,
+               .mask_flags     = 0
        }
+};
 
-       err = gpio_request(GPIO_NR_PALMTX_SD_POWER, "SD_POWER");
-       if (err)
-               goto err3;
-       err = gpio_direction_output(GPIO_NR_PALMTX_SD_POWER, 0);
-       if (err)
-               goto err4;
-
-       err = gpio_request(GPIO_NR_PALMTX_SD_READONLY, "SD_READONLY");
-       if (err)
-               goto err4;
-       err = gpio_direction_input(GPIO_NR_PALMTX_SD_READONLY);
-       if (err)
-               goto err5;
-
-       printk(KERN_DEBUG "%s: irq registered\n", __func__);
-
-       return 0;
-
-err5:
-       gpio_free(GPIO_NR_PALMTX_SD_READONLY);
-err4:
-       gpio_free(GPIO_NR_PALMTX_SD_POWER);
-err3:
-       free_irq(gpio_to_irq(GPIO_NR_PALMTX_SD_DETECT_N), data);
-err2:
-       gpio_free(GPIO_NR_PALMTX_SD_DETECT_N);
-err:
-       return err;
-}
-
-static void palmtx_mci_exit(struct device *dev, void *data)
-{
-       gpio_free(GPIO_NR_PALMTX_SD_READONLY);
-       gpio_free(GPIO_NR_PALMTX_SD_POWER);
-       free_irq(gpio_to_irq(GPIO_NR_PALMTX_SD_DETECT_N), data);
-       gpio_free(GPIO_NR_PALMTX_SD_DETECT_N);
-}
+static struct physmap_flash_data palmtx_flash_data[] = {
+       {
+               .width          = 2,                    /* bankwidth in bytes */
+               .parts          = palmtx_partitions,
+               .nr_parts       = ARRAY_SIZE(palmtx_partitions)
+       }
+};
 
-static void palmtx_mci_power(struct device *dev, unsigned int vdd)
-{
-       struct pxamci_platform_data *p_d = dev->platform_data;
-       gpio_set_value(GPIO_NR_PALMTX_SD_POWER, p_d->ocr_mask & (1 << vdd));
-}
+static struct resource palmtx_flash_resource = {
+       .start  = PXA_CS0_PHYS,
+       .end    = PXA_CS0_PHYS + SZ_8M - 1,
+       .flags  = IORESOURCE_MEM,
+};
 
-static int palmtx_mci_get_ro(struct device *dev)
-{
-       return gpio_get_value(GPIO_NR_PALMTX_SD_READONLY);
-}
+static struct platform_device palmtx_flash = {
+       .name           = "physmap-flash",
+       .id             = 0,
+       .resource       = &palmtx_flash_resource,
+       .num_resources  = 1,
+       .dev            = {
+               .platform_data = palmtx_flash_data,
+       },
+};
 
+/******************************************************************************
+ * SD/MMC card controller
+ ******************************************************************************/
 static struct pxamci_platform_data palmtx_mci_platform_data = {
-       .ocr_mask       = MMC_VDD_32_33 | MMC_VDD_33_34,
-       .setpower       = palmtx_mci_power,
-       .get_ro         = palmtx_mci_get_ro,
-       .init           = palmtx_mci_init,
-       .exit           = palmtx_mci_exit,
+       .ocr_mask               = MMC_VDD_32_33 | MMC_VDD_33_34,
+       .gpio_card_detect       = GPIO_NR_PALMTX_SD_DETECT_N,
+       .gpio_card_ro           = GPIO_NR_PALMTX_SD_READONLY,
+       .gpio_power             = GPIO_NR_PALMTX_SD_POWER,
+       .detect_delay           = 20,
 };
 
 /******************************************************************************
@@ -330,35 +303,9 @@ static struct platform_device palmtx_backlight = {
 /******************************************************************************
  * IrDA
  ******************************************************************************/
-static int palmtx_irda_startup(struct device *dev)
-{
-       int err;
-       err = gpio_request(GPIO_NR_PALMTX_IR_DISABLE, "IR DISABLE");
-       if (err)
-               goto err;
-       err = gpio_direction_output(GPIO_NR_PALMTX_IR_DISABLE, 1);
-       if (err)
-               gpio_free(GPIO_NR_PALMTX_IR_DISABLE);
-err:
-       return err;
-}
-
-static void palmtx_irda_shutdown(struct device *dev)
-{
-       gpio_free(GPIO_NR_PALMTX_IR_DISABLE);
-}
-
-static void palmtx_irda_transceiver_mode(struct device *dev, int mode)
-{
-       gpio_set_value(GPIO_NR_PALMTX_IR_DISABLE, mode & IR_OFF);
-       pxa2xx_transceiver_mode(dev, mode);
-}
-
 static struct pxaficp_platform_data palmtx_ficp_platform_data = {
-       .startup                = palmtx_irda_startup,
-       .shutdown               = palmtx_irda_shutdown,
-       .transceiver_cap        = IR_SIRMODE | IR_FIRMODE | IR_OFF,
-       .transceiver_mode       = palmtx_irda_transceiver_mode,
+       .gpio_pwdown            = GPIO_NR_PALMTX_IR_DISABLE,
+       .transceiver_cap        = IR_SIRMODE | IR_OFF,
 };
 
 /******************************************************************************
@@ -492,6 +439,68 @@ static struct pxafb_mach_info palmtx_lcd_screen = {
        .lcd_conn       = LCD_COLOR_TFT_16BPP | LCD_PCLK_EDGE_FALL,
 };
 
+/******************************************************************************
+ * NAND Flash
+ ******************************************************************************/
+static void palmtx_nand_cmd_ctl(struct mtd_info *mtd, int cmd,
+                                unsigned int ctrl)
+{
+       struct nand_chip *this = mtd->priv;
+       unsigned long nandaddr = (unsigned long)this->IO_ADDR_W;
+
+       if (cmd == NAND_CMD_NONE)
+               return;
+
+       if (ctrl & NAND_CLE)
+               writeb(cmd, PALMTX_NAND_CLE_VIRT);
+       else if (ctrl & NAND_ALE)
+               writeb(cmd, PALMTX_NAND_ALE_VIRT);
+       else
+               writeb(cmd, nandaddr);
+}
+
+static struct mtd_partition palmtx_partition_info[] = {
+       [0] = {
+               .name   = "palmtx-0",
+               .offset = 0,
+               .size   = MTDPART_SIZ_FULL
+       },
+};
+
+static const char *palmtx_part_probes[] = { "cmdlinepart", NULL };
+
+struct platform_nand_data palmtx_nand_platdata = {
+       .chip   = {
+               .nr_chips               = 1,
+               .chip_offset            = 0,
+               .nr_partitions          = ARRAY_SIZE(palmtx_partition_info),
+               .partitions             = palmtx_partition_info,
+               .chip_delay             = 20,
+               .part_probe_types       = palmtx_part_probes,
+       },
+       .ctrl   = {
+               .cmd_ctrl       = palmtx_nand_cmd_ctl,
+       },
+};
+
+static struct resource palmtx_nand_resource[] = {
+       [0]     = {
+               .start  = PXA_CS1_PHYS,
+               .end    = PXA_CS1_PHYS + SZ_1M - 1,
+               .flags  = IORESOURCE_MEM,
+       },
+};
+
+static struct platform_device palmtx_nand = {
+       .name           = "gen_nand",
+       .num_resources  = ARRAY_SIZE(palmtx_nand_resource),
+       .resource       = palmtx_nand_resource,
+       .id             = -1,
+       .dev            = {
+               .platform_data  = &palmtx_nand_platdata,
+       }
+};
+
 /******************************************************************************
  * Power management - standby
  ******************************************************************************/
@@ -518,6 +527,8 @@ static struct platform_device *devices[] __initdata = {
        &power_supply,
        &palmtx_asoc,
        &palmtx_gpio_vbus,
+       &palmtx_flash,
+       &palmtx_nand,
 };
 
 static struct map_desc palmtx_io_desc[] __initdata = {
@@ -525,8 +536,18 @@ static struct map_desc palmtx_io_desc[] __initdata = {
        .virtual        = PALMTX_PCMCIA_VIRT,
        .pfn            = __phys_to_pfn(PALMTX_PCMCIA_PHYS),
        .length         = PALMTX_PCMCIA_SIZE,
-       .type           = MT_DEVICE
-},
+       .type           = MT_DEVICE,
+}, {
+       .virtual        = PALMTX_NAND_ALE_VIRT,
+       .pfn            = __phys_to_pfn(PALMTX_NAND_ALE_PHYS),
+       .length         = SZ_1M,
+       .type           = MT_DEVICE,
+}, {
+       .virtual        = PALMTX_NAND_CLE_VIRT,
+       .pfn            = __phys_to_pfn(PALMTX_NAND_CLE_PHYS),
+       .length         = SZ_1M,
+       .type           = MT_DEVICE,
+}
 };
 
 static void __init palmtx_map_io(void)
index c3645aa..c2bf493 100644 (file)
@@ -129,88 +129,14 @@ static unsigned long palmz72_pin_config[] __initdata = {
 /******************************************************************************
  * SD/MMC card controller
  ******************************************************************************/
-static int palmz72_mci_init(struct device *dev,
-                               irq_handler_t palmz72_detect_int, void *data)
-{
-       int err = 0;
-
-       /* Setup an interrupt for detecting card insert/remove events */
-       err = gpio_request(GPIO_NR_PALMZ72_SD_DETECT_N, "SD IRQ");
-       if (err)
-               goto err;
-       err = gpio_direction_input(GPIO_NR_PALMZ72_SD_DETECT_N);
-       if (err)
-               goto err2;
-       err = request_irq(gpio_to_irq(GPIO_NR_PALMZ72_SD_DETECT_N),
-                       palmz72_detect_int, IRQF_DISABLED | IRQF_SAMPLE_RANDOM |
-                       IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
-                       "SD/MMC card detect", data);
-       if (err) {
-               printk(KERN_ERR "%s: cannot request SD/MMC card detect IRQ\n",
-                               __func__);
-               goto err2;
-       }
-
-       /* SD_POWER is not actually power, but it is more like chip
-        * select, i.e. it is inverted */
-
-       err = gpio_request(GPIO_NR_PALMZ72_SD_POWER_N, "SD_POWER");
-       if (err)
-               goto err3;
-       err = gpio_direction_output(GPIO_NR_PALMZ72_SD_POWER_N, 0);
-       if (err)
-               goto err4;
-       err = gpio_request(GPIO_NR_PALMZ72_SD_RO, "SD_RO");
-       if (err)
-               goto err4;
-       err = gpio_direction_input(GPIO_NR_PALMZ72_SD_RO);
-       if (err)
-               goto err5;
-
-       printk(KERN_DEBUG "%s: irq registered\n", __func__);
-
-       return 0;
-
-err5:
-       gpio_free(GPIO_NR_PALMZ72_SD_RO);
-err4:
-       gpio_free(GPIO_NR_PALMZ72_SD_POWER_N);
-err3:
-       free_irq(gpio_to_irq(GPIO_NR_PALMZ72_SD_DETECT_N), data);
-err2:
-       gpio_free(GPIO_NR_PALMZ72_SD_DETECT_N);
-err:
-       return err;
-}
-
-static void palmz72_mci_exit(struct device *dev, void *data)
-{
-       gpio_free(GPIO_NR_PALMZ72_SD_POWER_N);
-       free_irq(gpio_to_irq(GPIO_NR_PALMZ72_SD_DETECT_N), data);
-       gpio_free(GPIO_NR_PALMZ72_SD_DETECT_N);
-       gpio_free(GPIO_NR_PALMZ72_SD_RO);
-}
-
-static void palmz72_mci_power(struct device *dev, unsigned int vdd)
-{
-       struct pxamci_platform_data *p_d = dev->platform_data;
-       if (p_d->ocr_mask & (1 << vdd))
-               gpio_set_value(GPIO_NR_PALMZ72_SD_POWER_N, 0);
-       else
-               gpio_set_value(GPIO_NR_PALMZ72_SD_POWER_N, 1);
-}
-
-static int palmz72_mci_ro(struct device *dev)
-{
-       return gpio_get_value(GPIO_NR_PALMZ72_SD_RO);
-}
-
+/* SD_POWER is not actually power, but it is more like chip
+ * select, i.e. it is inverted */
 static struct pxamci_platform_data palmz72_mci_platform_data = {
-       .ocr_mask       = MMC_VDD_32_33 | MMC_VDD_33_34,
-       .setpower       = palmz72_mci_power,
-       .get_ro         = palmz72_mci_ro,
-       .init           = palmz72_mci_init,
-       .exit           = palmz72_mci_exit,
+       .ocr_mask               = MMC_VDD_32_33 | MMC_VDD_33_34,
+       .gpio_card_detect       = GPIO_NR_PALMZ72_SD_DETECT_N,
+       .gpio_card_ro           = GPIO_NR_PALMZ72_SD_RO,
+       .gpio_power             = GPIO_NR_PALMZ72_SD_POWER_N,
+       .gpio_power_invert      = 1,
 };
 
 /******************************************************************************
@@ -304,35 +230,9 @@ static struct platform_device palmz72_backlight = {
 /******************************************************************************
  * IrDA
  ******************************************************************************/
-static int palmz72_irda_startup(struct device *dev)
-{
-       int err;
-       err = gpio_request(GPIO_NR_PALMZ72_IR_DISABLE, "IR DISABLE");
-       if (err)
-               goto err;
-       err = gpio_direction_output(GPIO_NR_PALMZ72_IR_DISABLE, 1);
-       if (err)
-               gpio_free(GPIO_NR_PALMZ72_IR_DISABLE);
-err:
-       return err;
-}
-
-static void palmz72_irda_shutdown(struct device *dev)
-{
-       gpio_free(GPIO_NR_PALMZ72_IR_DISABLE);
-}
-
-static void palmz72_irda_transceiver_mode(struct device *dev, int mode)
-{
-       gpio_set_value(GPIO_NR_PALMZ72_IR_DISABLE, mode & IR_OFF);
-       pxa2xx_transceiver_mode(dev, mode);
-}
-
 static struct pxaficp_platform_data palmz72_ficp_platform_data = {
-       .startup                = palmz72_irda_startup,
-       .shutdown               = palmz72_irda_shutdown,
+       .gpio_pwdown            = GPIO_NR_PALMZ72_IR_DISABLE,
        .transceiver_cap        = IR_SIRMODE | IR_OFF,
-       .transceiver_mode       = palmz72_irda_transceiver_mode,
 };
 
 /******************************************************************************
index 01791d7..bbda570 100644 (file)
@@ -321,11 +321,14 @@ static void pcm990_mci_exit(struct device *dev, void *data)
 #define MSECS_PER_JIFFY (1000/HZ)
 
 static struct pxamci_platform_data pcm990_mci_platform_data = {
-       .detect_delay   = 250 / MSECS_PER_JIFFY,
-       .ocr_mask       = MMC_VDD_32_33 | MMC_VDD_33_34,
-       .init           = pcm990_mci_init,
-       .setpower       = pcm990_mci_setpower,
-       .exit           = pcm990_mci_exit,
+       .detect_delay           = 250 / MSECS_PER_JIFFY,
+       .ocr_mask               = MMC_VDD_32_33 | MMC_VDD_33_34,
+       .init                   = pcm990_mci_init,
+       .setpower               = pcm990_mci_setpower,
+       .exit                   = pcm990_mci_exit,
+       .gpio_card_detect       = -1,
+       .gpio_card_ro           = -1,
+       .gpio_power             = -1,
 };
 
 static struct pxaohci_platform_data pcm990_ohci_platform_data = {
@@ -427,25 +430,56 @@ static void pcm990_camera_free_bus(struct soc_camera_link *link)
        gpio_bus_switch = -EINVAL;
 }
 
-static struct soc_camera_link iclink = {
-       .bus_id = 0, /* Must match with the camera ID above */
-       .query_bus_param = pcm990_camera_query_bus_param,
-       .set_bus_param = pcm990_camera_set_bus_param,
-       .free_bus = pcm990_camera_free_bus,
-};
-
 /* Board I2C devices. */
 static struct i2c_board_info __initdata pcm990_i2c_devices[] = {
        {
                /* Must initialize before the camera(s) */
                I2C_BOARD_INFO("pca9536", 0x41),
                .platform_data = &pca9536_data,
-       }, {
+       },
+};
+
+static struct i2c_board_info pcm990_camera_i2c[] = {
+       {
                I2C_BOARD_INFO("mt9v022", 0x48),
-               .platform_data = &iclink, /* With extender */
        }, {
                I2C_BOARD_INFO("mt9m001", 0x5d),
-               .platform_data = &iclink, /* With extender */
+       },
+};
+
+static struct soc_camera_link iclink[] = {
+       {
+               .bus_id                 = 0, /* Must match with the camera ID */
+               .board_info             = &pcm990_camera_i2c[0],
+               .i2c_adapter_id         = 0,
+               .query_bus_param        = pcm990_camera_query_bus_param,
+               .set_bus_param          = pcm990_camera_set_bus_param,
+               .free_bus               = pcm990_camera_free_bus,
+               .module_name            = "mt9v022",
+       }, {
+               .bus_id                 = 0, /* Must match with the camera ID */
+               .board_info             = &pcm990_camera_i2c[1],
+               .i2c_adapter_id         = 0,
+               .query_bus_param        = pcm990_camera_query_bus_param,
+               .set_bus_param          = pcm990_camera_set_bus_param,
+               .free_bus               = pcm990_camera_free_bus,
+               .module_name            = "mt9m001",
+       },
+};
+
+static struct platform_device pcm990_camera[] = {
+       {
+               .name   = "soc-camera-pdrv",
+               .id     = 0,
+               .dev    = {
+                       .platform_data = &iclink[0],
+               },
+       }, {
+               .name   = "soc-camera-pdrv",
+               .id     = 1,
+               .dev    = {
+                       .platform_data = &iclink[1],
+               },
        },
 };
 #endif /* CONFIG_VIDEO_PXA27x ||CONFIG_VIDEO_PXA27x_MODULE */
@@ -501,6 +535,9 @@ void __init pcm990_baseboard_init(void)
        pxa_set_camera_info(&pcm990_pxacamera_platform_data);
 
        i2c_register_board_info(0, ARRAY_AND_SIZE(pcm990_i2c_devices));
+
+       platform_device_register(&pcm990_camera[0]);
+       platform_device_register(&pcm990_camera[1]);
 #endif
 
        printk(KERN_INFO "PCM-990 Evaluation baseboard initialized\n");
index 9352d4a..a186994 100644 (file)
@@ -245,20 +245,10 @@ static inline void poodle_init_spi(void) {}
  * The card detect interrupt isn't debounced so we delay it by 250ms
  * to give the card a chance to fully insert/eject.
  */
-static struct pxamci_platform_data poodle_mci_platform_data;
-
 static int poodle_mci_init(struct device *dev, irq_handler_t poodle_detect_int, void *data)
 {
        int err;
 
-       err = gpio_request(POODLE_GPIO_nSD_DETECT, "nSD_DETECT");
-       if (err)
-               goto err_out;
-
-       err = gpio_request(POODLE_GPIO_nSD_WP, "nSD_WP");
-       if (err)
-               goto err_free_1;
-
        err = gpio_request(POODLE_GPIO_SD_PWR, "SD_PWR");
        if (err)
                goto err_free_2;
@@ -267,34 +257,14 @@ static int poodle_mci_init(struct device *dev, irq_handler_t poodle_detect_int,
        if (err)
                goto err_free_3;
 
-       gpio_direction_input(POODLE_GPIO_nSD_DETECT);
-       gpio_direction_input(POODLE_GPIO_nSD_WP);
-
        gpio_direction_output(POODLE_GPIO_SD_PWR, 0);
        gpio_direction_output(POODLE_GPIO_SD_PWR1, 0);
 
-       poodle_mci_platform_data.detect_delay = msecs_to_jiffies(250);
-
-       err = request_irq(POODLE_IRQ_GPIO_nSD_DETECT, poodle_detect_int,
-                         IRQF_DISABLED | IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
-                         "MMC card detect", data);
-       if (err) {
-               pr_err("%s: MMC/SD: can't request MMC card detect IRQ\n",
-                               __func__);
-               goto err_free_4;
-       }
-
        return 0;
 
-err_free_4:
-       gpio_free(POODLE_GPIO_SD_PWR1);
 err_free_3:
        gpio_free(POODLE_GPIO_SD_PWR);
 err_free_2:
-       gpio_free(POODLE_GPIO_nSD_WP);
-err_free_1:
-       gpio_free(POODLE_GPIO_nSD_DETECT);
-err_out:
        return err;
 }
 
@@ -312,62 +282,29 @@ static void poodle_mci_setpower(struct device *dev, unsigned int vdd)
        }
 }
 
-static int poodle_mci_get_ro(struct device *dev)
-{
-       return !!gpio_get_value(POODLE_GPIO_nSD_WP);
-       return GPLR(POODLE_GPIO_nSD_WP) & GPIO_bit(POODLE_GPIO_nSD_WP);
-}
-
-
 static void poodle_mci_exit(struct device *dev, void *data)
 {
-       free_irq(POODLE_IRQ_GPIO_nSD_DETECT, data);
        gpio_free(POODLE_GPIO_SD_PWR1);
        gpio_free(POODLE_GPIO_SD_PWR);
-       gpio_free(POODLE_GPIO_nSD_WP);
-       gpio_free(POODLE_GPIO_nSD_DETECT);
 }
 
 static struct pxamci_platform_data poodle_mci_platform_data = {
-       .ocr_mask       = MMC_VDD_32_33|MMC_VDD_33_34,
-       .init           = poodle_mci_init,
-       .get_ro         = poodle_mci_get_ro,
-       .setpower       = poodle_mci_setpower,
-       .exit           = poodle_mci_exit,
+       .ocr_mask               = MMC_VDD_32_33|MMC_VDD_33_34,
+       .init                   = poodle_mci_init,
+       .setpower               = poodle_mci_setpower,
+       .exit                   = poodle_mci_exit,
+       .gpio_card_detect       = POODLE_IRQ_GPIO_nSD_DETECT,
+       .gpio_card_ro           = POODLE_GPIO_nSD_WP,
+       .gpio_power             = -1,
 };
 
 
 /*
  * Irda
  */
-static void poodle_irda_transceiver_mode(struct device *dev, int mode)
-{
-       gpio_set_value(POODLE_GPIO_IR_ON, mode & IR_OFF);
-       pxa2xx_transceiver_mode(dev, mode);
-}
-
-static int poodle_irda_startup(struct device *dev)
-{
-       int err;
-
-       err = gpio_request(POODLE_GPIO_IR_ON, "IR_ON");
-       if (err)
-               return err;
-
-       gpio_direction_output(POODLE_GPIO_IR_ON, 1);
-       return 0;
-}
-
-static void poodle_irda_shutdown(struct device *dev)
-{
-       gpio_free(POODLE_GPIO_IR_ON);
-}
-
 static struct pxaficp_platform_data poodle_ficp_platform_data = {
+       .gpio_pwdown            = POODLE_GPIO_IR_ON,
        .transceiver_cap        = IR_SIRMODE | IR_OFF,
-       .transceiver_mode       = poodle_irda_transceiver_mode,
-       .startup                = poodle_irda_startup,
-       .shutdown               = poodle_irda_shutdown,
 };
 
 
@@ -521,6 +458,7 @@ static void __init poodle_init(void)
        set_pxa_fb_parent(&poodle_locomo_device.dev);
        set_pxa_fb_info(&poodle_fb_info);
        pxa_set_udc_info(&udc_info);
+       poodle_mci_platform_data.detect_delay = msecs_to_jiffies(250);
        pxa_set_mci_info(&poodle_mci_platform_data);
        pxa_set_ficp_info(&poodle_ficp_platform_data);
        pxa_set_i2c_info(NULL);
index 2f3394f..8682704 100644 (file)
@@ -52,3 +52,4 @@ void pxa2xx_transceiver_mode(struct device *dev, int mode)
        } else
                BUG();
 }
+EXPORT_SYMBOL_GPL(pxa2xx_transceiver_mode);
index 4ba6d21..f4af6e2 100644 (file)
@@ -84,9 +84,11 @@ static struct mfp_addr_map pxa310_mfp_addr_map[] __initdata = {
 };
 
 static DEFINE_PXA3_CKEN(common_nand, NAND, 156000000, 0);
+static DEFINE_PXA3_CKEN(gcu, PXA300_GCU, 0, 0);
 
 static struct clk_lookup common_clkregs[] = {
        INIT_CLKREG(&clk_common_nand, "pxa3xx-nand", NULL),
+       INIT_CLKREG(&clk_gcu, "pxa3xx-gcu", NULL),
 };
 
 static DEFINE_PXA3_CKEN(pxa310_mmc3, MMC3, 19500000, 0);
index 8b3d97e..c7373e7 100644 (file)
@@ -78,9 +78,11 @@ static struct mfp_addr_map pxa320_mfp_addr_map[] __initdata = {
 };
 
 static DEFINE_PXA3_CKEN(pxa320_nand, NAND, 104000000, 0);
+static DEFINE_PXA3_CKEN(gcu, PXA320_GCU, 0, 0);
 
 static struct clk_lookup pxa320_clkregs[] = {
        INIT_CLKREG(&clk_pxa320_nand, "pxa3xx-nand", NULL),
+       INIT_CLKREG(&clk_gcu, "pxa3xx-gcu", NULL),
 };
 
 static int __init pxa320_init(void)
index 7113174..0642920 100644 (file)
@@ -176,13 +176,30 @@ static struct mfp_addr_map pxa930_mfp_addr_map[] __initdata = {
        MFP_ADDR_END,
 };
 
+static struct mfp_addr_map pxa935_mfp_addr_map[] __initdata = {
+       MFP_ADDR(GPIO159, 0x0524),
+       MFP_ADDR(GPIO163, 0x0534),
+       MFP_ADDR(GPIO167, 0x0544),
+       MFP_ADDR(GPIO168, 0x0548),
+       MFP_ADDR(GPIO169, 0x054c),
+       MFP_ADDR(GPIO170, 0x0550),
+       MFP_ADDR(GPIO171, 0x0554),
+       MFP_ADDR(GPIO172, 0x0558),
+       MFP_ADDR(GPIO173, 0x055c),
+
+       MFP_ADDR_END,
+};
+
 static int __init pxa930_init(void)
 {
-       if (cpu_is_pxa930()) {
+       if (cpu_is_pxa930() || cpu_is_pxa935()) {
                mfp_init_base(io_p2v(MFPR_BASE));
                mfp_init_addr(pxa930_mfp_addr_map);
        }
 
+       if (cpu_is_pxa935())
+               mfp_init_addr(pxa935_mfp_addr_map);
+
        return 0;
 }
 
index dda310f..ee8d603 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/spi/ads7846.h>
 #include <linux/spi/corgi_lcd.h>
 #include <linux/mtd/sharpsl.h>
+#include <linux/input/matrix_keypad.h>
 
 #include <asm/setup.h>
 #include <asm/mach-types.h>
@@ -111,6 +112,26 @@ static unsigned long spitz_pin_config[] __initdata = {
        GPIO105_GPIO,   /* SPITZ_GPIO_CF_IRQ */
        GPIO106_GPIO,   /* SPITZ_GPIO_CF2_IRQ */
 
+       /* GPIO matrix keypad */
+       GPIO88_GPIO,    /* column 0 */
+       GPIO23_GPIO,    /* column 1 */
+       GPIO24_GPIO,    /* column 2 */
+       GPIO25_GPIO,    /* column 3 */
+       GPIO26_GPIO,    /* column 4 */
+       GPIO27_GPIO,    /* column 5 */
+       GPIO52_GPIO,    /* column 6 */
+       GPIO103_GPIO,   /* column 7 */
+       GPIO107_GPIO,   /* column 8 */
+       GPIO108_GPIO,   /* column 9 */
+       GPIO114_GPIO,   /* column 10 */
+       GPIO12_GPIO,    /* row 0 */
+       GPIO17_GPIO,    /* row 1 */
+       GPIO91_GPIO,    /* row 2 */
+       GPIO34_GPIO,    /* row 3 */
+       GPIO36_GPIO,    /* row 4 */
+       GPIO38_GPIO,    /* row 5 */
+       GPIO39_GPIO,    /* row 6 */
+
        /* I2C */
        GPIO117_I2C_SCL,
        GPIO118_I2C_SDA,
@@ -242,9 +263,115 @@ EXPORT_SYMBOL(spitzscoop2_device);
 /*
  * Spitz Keyboard Device
  */
+#define SPITZ_KEY_CALENDAR     KEY_F1
+#define SPITZ_KEY_ADDRESS      KEY_F2
+#define SPITZ_KEY_FN           KEY_F3
+#define SPITZ_KEY_CANCEL       KEY_F4
+#define SPITZ_KEY_EXOK         KEY_F5
+#define SPITZ_KEY_EXCANCEL     KEY_F6
+#define SPITZ_KEY_EXJOGDOWN    KEY_F7
+#define SPITZ_KEY_EXJOGUP      KEY_F8
+#define SPITZ_KEY_JAP1         KEY_LEFTALT
+#define SPITZ_KEY_JAP2         KEY_RIGHTCTRL
+#define SPITZ_KEY_SYNC         KEY_F9
+#define SPITZ_KEY_MAIL         KEY_F10
+#define SPITZ_KEY_OK           KEY_F11
+#define SPITZ_KEY_MENU         KEY_F12
+
+static const uint32_t spitzkbd_keymap[] = {
+       KEY(0, 0, KEY_LEFTCTRL),
+       KEY(0, 1, KEY_1),
+       KEY(0, 2, KEY_3),
+       KEY(0, 3, KEY_5),
+       KEY(0, 4, KEY_6),
+       KEY(0, 5, KEY_7),
+       KEY(0, 6, KEY_9),
+       KEY(0, 7, KEY_0),
+       KEY(0, 8, KEY_BACKSPACE),
+       KEY(0, 9, SPITZ_KEY_EXOK),      /* EXOK */
+       KEY(0, 10, SPITZ_KEY_EXCANCEL), /* EXCANCEL */
+       KEY(1, 1, KEY_2),
+       KEY(1, 2, KEY_4),
+       KEY(1, 3, KEY_R),
+       KEY(1, 4, KEY_Y),
+       KEY(1, 5, KEY_8),
+       KEY(1, 6, KEY_I),
+       KEY(1, 7, KEY_O),
+       KEY(1, 8, KEY_P),
+       KEY(1, 9, SPITZ_KEY_EXJOGDOWN), /* EXJOGDOWN */
+       KEY(1, 10, SPITZ_KEY_EXJOGUP),  /* EXJOGUP */
+       KEY(2, 0, KEY_TAB),
+       KEY(2, 1, KEY_Q),
+       KEY(2, 2, KEY_E),
+       KEY(2, 3, KEY_T),
+       KEY(2, 4, KEY_G),
+       KEY(2, 5, KEY_U),
+       KEY(2, 6, KEY_J),
+       KEY(2, 7, KEY_K),
+       KEY(3, 0, SPITZ_KEY_ADDRESS),   /* ADDRESS */
+       KEY(3, 1, KEY_W),
+       KEY(3, 2, KEY_S),
+       KEY(3, 3, KEY_F),
+       KEY(3, 4, KEY_V),
+       KEY(3, 5, KEY_H),
+       KEY(3, 6, KEY_M),
+       KEY(3, 7, KEY_L),
+       KEY(3, 9, KEY_RIGHTSHIFT),
+       KEY(4, 0, SPITZ_KEY_CALENDAR),  /* CALENDAR */
+       KEY(4, 1, KEY_A),
+       KEY(4, 2, KEY_D),
+       KEY(4, 3, KEY_C),
+       KEY(4, 4, KEY_B),
+       KEY(4, 5, KEY_N),
+       KEY(4, 6, KEY_DOT),
+       KEY(4, 8, KEY_ENTER),
+       KEY(4, 9, KEY_LEFTSHIFT),
+       KEY(5, 0, SPITZ_KEY_MAIL),      /* MAIL */
+       KEY(5, 1, KEY_Z),
+       KEY(5, 2, KEY_X),
+       KEY(5, 3, KEY_MINUS),
+       KEY(5, 4, KEY_SPACE),
+       KEY(5, 5, KEY_COMMA),
+       KEY(5, 7, KEY_UP),
+       KEY(5, 10, SPITZ_KEY_FN),       /* FN */
+       KEY(6, 0, KEY_SYSRQ),
+       KEY(6, 1, SPITZ_KEY_JAP1),      /* JAP1 */
+       KEY(6, 2, SPITZ_KEY_JAP2),      /* JAP2 */
+       KEY(6, 3, SPITZ_KEY_CANCEL),    /* CANCEL */
+       KEY(6, 4, SPITZ_KEY_OK),        /* OK */
+       KEY(6, 5, SPITZ_KEY_MENU),      /* MENU */
+       KEY(6, 6, KEY_LEFT),
+       KEY(6, 7, KEY_DOWN),
+       KEY(6, 8, KEY_RIGHT),
+};
+
+static const struct matrix_keymap_data spitzkbd_keymap_data = {
+       .keymap         = spitzkbd_keymap,
+       .keymap_size    = ARRAY_SIZE(spitzkbd_keymap),
+};
+
+static const uint32_t spitzkbd_row_gpios[] =
+               { 12, 17, 91, 34, 36, 38, 39 };
+static const uint32_t spitzkbd_col_gpios[] =
+               { 88, 23, 24, 25, 26, 27, 52, 103, 107, 108, 114 };
+
+static struct matrix_keypad_platform_data spitzkbd_pdata = {
+       .keymap_data            = &spitzkbd_keymap_data,
+       .row_gpios              = spitzkbd_row_gpios,
+       .col_gpios              = spitzkbd_col_gpios,
+       .num_row_gpios          = ARRAY_SIZE(spitzkbd_row_gpios),
+       .num_col_gpios          = ARRAY_SIZE(spitzkbd_col_gpios),
+       .col_scan_delay_us      = 10,
+       .debounce_ms            = 10,
+       .wakeup                 = 1,
+};
+
 static struct platform_device spitzkbd_device = {
-       .name           = "spitz-keyboard",
+       .name           = "matrix-keypad",
        .id             = -1,
+       .dev            = {
+               .platform_data = &spitzkbd_pdata,
+       },
 };
 
 
@@ -296,6 +423,7 @@ static struct ads7846_platform_data spitz_ads7846_info = {
        .vref_delay_usecs       = 100,
        .x_plate_ohms           = 419,
        .y_plate_ohms           = 486,
+       .pressure_max           = 1024,
        .gpio_pendown           = SPITZ_GPIO_TP_INT,
        .wait_for_sync          = spitz_wait_for_hsync,
 };
@@ -378,45 +506,6 @@ static inline void spitz_init_spi(void) {}
  * The card detect interrupt isn't debounced so we delay it by 250ms
  * to give the card a chance to fully insert/eject.
  */
-
-static struct pxamci_platform_data spitz_mci_platform_data;
-
-static int spitz_mci_init(struct device *dev, irq_handler_t spitz_detect_int, void *data)
-{
-       int err;
-
-       err = gpio_request(SPITZ_GPIO_nSD_DETECT, "nSD_DETECT");
-       if (err)
-               goto err_out;
-
-       err = gpio_request(SPITZ_GPIO_nSD_WP, "nSD_WP");
-       if (err)
-               goto err_free_1;
-
-       gpio_direction_input(SPITZ_GPIO_nSD_DETECT);
-       gpio_direction_input(SPITZ_GPIO_nSD_WP);
-
-       spitz_mci_platform_data.detect_delay = msecs_to_jiffies(250);
-
-       err = request_irq(SPITZ_IRQ_GPIO_nSD_DETECT, spitz_detect_int,
-                         IRQF_DISABLED | IRQF_TRIGGER_RISING |
-                         IRQF_TRIGGER_FALLING,
-                         "MMC card detect", data);
-       if (err) {
-               pr_err("%s: MMC/SD: can't request MMC card detect IRQ\n",
-                               __func__);
-               goto err_free_2;
-       }
-       return 0;
-
-err_free_2:
-       gpio_free(SPITZ_GPIO_nSD_WP);
-err_free_1:
-       gpio_free(SPITZ_GPIO_nSD_DETECT);
-err_out:
-       return err;
-}
-
 static void spitz_mci_setpower(struct device *dev, unsigned int vdd)
 {
        struct pxamci_platform_data* p_d = dev->platform_data;
@@ -427,24 +516,12 @@ static void spitz_mci_setpower(struct device *dev, unsigned int vdd)
                spitz_card_pwr_ctrl(SPITZ_PWR_SD, 0x0000);
 }
 
-static int spitz_mci_get_ro(struct device *dev)
-{
-       return gpio_get_value(SPITZ_GPIO_nSD_WP);
-}
-
-static void spitz_mci_exit(struct device *dev, void *data)
-{
-       free_irq(SPITZ_IRQ_GPIO_nSD_DETECT, data);
-       gpio_free(SPITZ_GPIO_nSD_WP);
-       gpio_free(SPITZ_GPIO_nSD_DETECT);
-}
-
 static struct pxamci_platform_data spitz_mci_platform_data = {
-       .ocr_mask       = MMC_VDD_32_33|MMC_VDD_33_34,
-       .init           = spitz_mci_init,
-       .get_ro         = spitz_mci_get_ro,
-       .setpower       = spitz_mci_setpower,
-       .exit           = spitz_mci_exit,
+       .ocr_mask               = MMC_VDD_32_33|MMC_VDD_33_34,
+       .setpower               = spitz_mci_setpower,
+       .gpio_card_detect       = SPITZ_GPIO_nSD_DETECT,
+       .gpio_card_ro           = SPITZ_GPIO_nSD_WP,
+       .gpio_power             = -1,
 };
 
 
@@ -484,50 +561,10 @@ static struct pxaohci_platform_data spitz_ohci_platform_data = {
 /*
  * Irda
  */
-static int spitz_irda_startup(struct device *dev)
-{
-       int rc;
-
-       rc = gpio_request(SPITZ_GPIO_IR_ON, "IrDA on");
-       if (rc)
-               goto err;
-
-       rc = gpio_direction_output(SPITZ_GPIO_IR_ON, 1);
-       if (rc)
-               goto err_dir;
-
-       return 0;
-
-err_dir:
-       gpio_free(SPITZ_GPIO_IR_ON);
-err:
-       return rc;
-}
-
-static void spitz_irda_shutdown(struct device *dev)
-{
-       gpio_free(SPITZ_GPIO_IR_ON);
-}
-
-static void spitz_irda_transceiver_mode(struct device *dev, int mode)
-{
-       gpio_set_value(SPITZ_GPIO_IR_ON, mode & IR_OFF);
-       pxa2xx_transceiver_mode(dev, mode);
-}
-
-#ifdef CONFIG_MACH_AKITA
-static void akita_irda_transceiver_mode(struct device *dev, int mode)
-{
-       gpio_set_value(AKITA_GPIO_IR_ON, mode & IR_OFF);
-       pxa2xx_transceiver_mode(dev, mode);
-}
-#endif
 
 static struct pxaficp_platform_data spitz_ficp_platform_data = {
+/* .gpio_pwdown is set in spitz_init() and akita_init() accordingly */
        .transceiver_cap        = IR_SIRMODE | IR_OFF,
-       .transceiver_mode       = spitz_irda_transceiver_mode,
-       .startup                = spitz_irda_startup,
-       .shutdown               = spitz_irda_shutdown,
 };
 
 
@@ -695,6 +732,7 @@ static void __init common_init(void)
        spitz_init_spi();
 
        platform_add_devices(devices, ARRAY_SIZE(devices));
+       spitz_mci_platform_data.detect_delay = msecs_to_jiffies(250);
        pxa_set_mci_info(&spitz_mci_platform_data);
        pxa_set_ohci_info(&spitz_ohci_platform_data);
        pxa_set_ficp_info(&spitz_ficp_platform_data);
@@ -705,6 +743,8 @@ static void __init common_init(void)
 #if defined(CONFIG_MACH_SPITZ) || defined(CONFIG_MACH_BORZOI)
 static void __init spitz_init(void)
 {
+       spitz_ficp_platform_data.gpio_pwdown = SPITZ_GPIO_IR_ON;
+
        platform_scoop_config = &spitz_pcmcia_config;
 
        common_init();
@@ -747,7 +787,7 @@ static struct nand_ecclayout akita_oobinfo = {
 
 static void __init akita_init(void)
 {
-       spitz_ficp_platform_data.transceiver_mode = akita_irda_transceiver_mode;
+       spitz_ficp_platform_data.gpio_pwdown = AKITA_GPIO_IR_ON;
 
        sharpsl_nand_platform_data.badblock_pattern = &sharpsl_akita_bbt;
        sharpsl_nand_platform_data.ecc_layout = &akita_oobinfo;
index 117ad59..e81a526 100644 (file)
@@ -247,49 +247,10 @@ static struct pxa2xx_udc_mach_info udc_info __initdata = {
 /*
  * MMC/SD Device
  */
-static struct pxamci_platform_data tosa_mci_platform_data;
-
 static int tosa_mci_init(struct device *dev, irq_handler_t tosa_detect_int, void *data)
 {
        int err;
 
-       tosa_mci_platform_data.detect_delay = msecs_to_jiffies(250);
-
-       err = gpio_request(TOSA_GPIO_nSD_DETECT, "MMC/SD card detect");
-       if (err) {
-               printk(KERN_ERR "tosa_mci_init: can't request nSD_DETECT gpio\n");
-               goto err_gpio_detect;
-       }
-       err = gpio_direction_input(TOSA_GPIO_nSD_DETECT);
-       if (err)
-               goto err_gpio_detect_dir;
-
-       err = request_irq(TOSA_IRQ_GPIO_nSD_DETECT, tosa_detect_int,
-                         IRQF_DISABLED | IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
-                               "MMC/SD card detect", data);
-       if (err) {
-               printk(KERN_ERR "tosa_mci_init: MMC/SD: can't request MMC card detect IRQ\n");
-               goto err_irq;
-       }
-
-       err = gpio_request(TOSA_GPIO_SD_WP, "SD Write Protect");
-       if (err) {
-               printk(KERN_ERR "tosa_mci_init: can't request SD_WP gpio\n");
-               goto err_gpio_wp;
-       }
-       err = gpio_direction_input(TOSA_GPIO_SD_WP);
-       if (err)
-               goto err_gpio_wp_dir;
-
-       err = gpio_request(TOSA_GPIO_PWR_ON, "SD Power");
-       if (err) {
-               printk(KERN_ERR "tosa_mci_init: can't request SD_PWR gpio\n");
-               goto err_gpio_pwr;
-       }
-       err = gpio_direction_output(TOSA_GPIO_PWR_ON, 0);
-       if (err)
-               goto err_gpio_pwr_dir;
-
        err = gpio_request(TOSA_GPIO_nSD_INT, "SD Int");
        if (err) {
                printk(KERN_ERR "tosa_mci_init: can't request SD_PWR gpio\n");
@@ -304,51 +265,21 @@ static int tosa_mci_init(struct device *dev, irq_handler_t tosa_detect_int, void
 err_gpio_int_dir:
        gpio_free(TOSA_GPIO_nSD_INT);
 err_gpio_int:
-err_gpio_pwr_dir:
-       gpio_free(TOSA_GPIO_PWR_ON);
-err_gpio_pwr:
-err_gpio_wp_dir:
-       gpio_free(TOSA_GPIO_SD_WP);
-err_gpio_wp:
-       free_irq(TOSA_IRQ_GPIO_nSD_DETECT, data);
-err_irq:
-err_gpio_detect_dir:
-       gpio_free(TOSA_GPIO_nSD_DETECT);
-err_gpio_detect:
        return err;
 }
 
-static void tosa_mci_setpower(struct device *dev, unsigned int vdd)
-{
-       struct pxamci_platform_data* p_d = dev->platform_data;
-
-       if (( 1 << vdd) & p_d->ocr_mask) {
-               gpio_set_value(TOSA_GPIO_PWR_ON, 1);
-       } else {
-               gpio_set_value(TOSA_GPIO_PWR_ON, 0);
-       }
-}
-
-static int tosa_mci_get_ro(struct device *dev)
-{
-       return gpio_get_value(TOSA_GPIO_SD_WP);
-}
-
 static void tosa_mci_exit(struct device *dev, void *data)
 {
        gpio_free(TOSA_GPIO_nSD_INT);
-       gpio_free(TOSA_GPIO_PWR_ON);
-       gpio_free(TOSA_GPIO_SD_WP);
-       free_irq(TOSA_IRQ_GPIO_nSD_DETECT, data);
-       gpio_free(TOSA_GPIO_nSD_DETECT);
 }
 
 static struct pxamci_platform_data tosa_mci_platform_data = {
-       .ocr_mask       = MMC_VDD_32_33|MMC_VDD_33_34,
-       .init           = tosa_mci_init,
-       .get_ro         = tosa_mci_get_ro,
-       .setpower       = tosa_mci_setpower,
-       .exit           = tosa_mci_exit,
+       .ocr_mask               = MMC_VDD_32_33|MMC_VDD_33_34,
+       .init                   = tosa_mci_init,
+       .exit                   = tosa_mci_exit,
+       .gpio_card_detect       = TOSA_GPIO_nSD_DETECT,
+       .gpio_card_ro           = TOSA_GPIO_SD_WP,
+       .gpio_power             = TOSA_GPIO_PWR_ON,
 };
 
 /*
@@ -406,10 +337,11 @@ static void tosa_irda_shutdown(struct device *dev)
 }
 
 static struct pxaficp_platform_data tosa_ficp_platform_data = {
-       .transceiver_cap  = IR_SIRMODE | IR_OFF,
-       .transceiver_mode = tosa_irda_transceiver_mode,
-       .startup = tosa_irda_startup,
-       .shutdown = tosa_irda_shutdown,
+       .gpio_pwdown            = -1,
+       .transceiver_cap        = IR_SIRMODE | IR_OFF,
+       .transceiver_mode       = tosa_irda_transceiver_mode,
+       .startup                = tosa_irda_startup,
+       .shutdown               = tosa_irda_shutdown,
 };
 
 /*
@@ -910,6 +842,7 @@ static void __init tosa_init(void)
        dummy = gpiochip_reserve(TOSA_SCOOP_JC_GPIO_BASE, 12);
        dummy = gpiochip_reserve(TOSA_TC6393XB_GPIO_BASE, 16);
 
+       tosa_mci_platform_data.detect_delay = msecs_to_jiffies(250);
        pxa_set_mci_info(&tosa_mci_platform_data);
        pxa_set_udc_info(&udc_info);
        pxa_set_ficp_info(&tosa_ficp_platform_data);
index 753ec4d..fe08507 100644 (file)
@@ -153,87 +153,11 @@ static unsigned long treo680_pin_config[] __initdata = {
 /******************************************************************************
  * SD/MMC card controller
  ******************************************************************************/
-static int treo680_mci_init(struct device *dev,
-               irq_handler_t treo680_detect_int, void *data)
-{
-       int err = 0;
-
-       /* Setup an interrupt for detecting card insert/remove events */
-       err = gpio_request(GPIO_NR_TREO680_SD_DETECT_N, "SD IRQ");
-
-       if (err)
-               goto err;
-
-       err = gpio_direction_input(GPIO_NR_TREO680_SD_DETECT_N);
-       if (err)
-               goto err2;
-
-       err = request_irq(gpio_to_irq(GPIO_NR_TREO680_SD_DETECT_N),
-                       treo680_detect_int, IRQF_DISABLED | IRQF_SAMPLE_RANDOM |
-                       IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
-                       "SD/MMC card detect", data);
-
-       if (err) {
-               dev_err(dev, "%s: cannot request SD/MMC card detect IRQ\n",
-                            __func__);
-               goto err2;
-       }
-
-       err = gpio_request(GPIO_NR_TREO680_SD_POWER, "SD_POWER");
-       if (err)
-               goto err3;
-
-       err = gpio_direction_output(GPIO_NR_TREO680_SD_POWER, 1);
-       if (err)
-               goto err4;
-
-       err = gpio_request(GPIO_NR_TREO680_SD_READONLY, "SD_READONLY");
-       if (err)
-               goto err4;
-
-       err = gpio_direction_input(GPIO_NR_TREO680_SD_READONLY);
-       if (err)
-               goto err5;
-
-       return 0;
-
-err5:
-       gpio_free(GPIO_NR_TREO680_SD_READONLY);
-err4:
-       gpio_free(GPIO_NR_TREO680_SD_POWER);
-err3:
-       free_irq(gpio_to_irq(GPIO_NR_TREO680_SD_DETECT_N), data);
-err2:
-       gpio_free(GPIO_NR_TREO680_SD_DETECT_N);
-err:
-       return err;
-}
-
-static void treo680_mci_exit(struct device *dev, void *data)
-{
-       gpio_free(GPIO_NR_TREO680_SD_READONLY);
-       gpio_free(GPIO_NR_TREO680_SD_POWER);
-       free_irq(gpio_to_irq(GPIO_NR_TREO680_SD_DETECT_N), data);
-       gpio_free(GPIO_NR_TREO680_SD_DETECT_N);
-}
-
-static void treo680_mci_power(struct device *dev, unsigned int vdd)
-{
-       struct pxamci_platform_data *p_d = dev->platform_data;
-       gpio_set_value(GPIO_NR_TREO680_SD_POWER, p_d->ocr_mask & (1 << vdd));
-}
-
-static int treo680_mci_get_ro(struct device *dev)
-{
-       return gpio_get_value(GPIO_NR_TREO680_SD_READONLY);
-}
-
 static struct pxamci_platform_data treo680_mci_platform_data = {
-       .ocr_mask       = MMC_VDD_32_33 | MMC_VDD_33_34,
-       .setpower       = treo680_mci_power,
-       .get_ro         = treo680_mci_get_ro,
-       .init           = treo680_mci_init,
-       .exit           = treo680_mci_exit,
+       .ocr_mask               = MMC_VDD_32_33 | MMC_VDD_33_34,
+       .gpio_card_detect       = GPIO_NR_TREO680_SD_DETECT_N,
+       .gpio_card_ro           = GPIO_NR_TREO680_SD_READONLY,
+       .gpio_power             = GPIO_NR_TREO680_SD_POWER,
 };
 
 /******************************************************************************
@@ -330,16 +254,9 @@ static int treo680_backlight_init(struct device *dev)
        ret = gpio_direction_output(GPIO_NR_TREO680_BL_POWER, 0);
        if (ret)
                goto err2;
-       ret = gpio_request(GPIO_NR_TREO680_LCD_POWER, "LCD POWER");
-       if (ret)
-               goto err2;
-       ret = gpio_direction_output(GPIO_NR_TREO680_LCD_POWER, 0);
-       if (ret)
-               goto err3;
 
        return 0;
-err3:
-       gpio_free(GPIO_NR_TREO680_LCD_POWER);
+
 err2:
        gpio_free(GPIO_NR_TREO680_BL_POWER);
 err:
@@ -355,7 +272,6 @@ static int treo680_backlight_notify(int brightness)
 static void treo680_backlight_exit(struct device *dev)
 {
        gpio_free(GPIO_NR_TREO680_BL_POWER);
-       gpio_free(GPIO_NR_TREO680_LCD_POWER);
 }
 
 static struct platform_pwm_backlight_data treo680_backlight_data = {
@@ -379,44 +295,9 @@ static struct platform_device treo680_backlight = {
 /******************************************************************************
  * IrDA
  ******************************************************************************/
-static void treo680_transceiver_mode(struct device *dev, int mode)
-{
-       gpio_set_value(GPIO_NR_TREO680_IR_EN, mode & IR_OFF);
-       pxa2xx_transceiver_mode(dev, mode);
-}
-
-static int treo680_irda_startup(struct device *dev)
-{
-       int err;
-
-       err = gpio_request(GPIO_NR_TREO680_IR_EN, "Ir port disable");
-       if (err)
-               goto err1;
-
-       err = gpio_direction_output(GPIO_NR_TREO680_IR_EN, 1);
-       if (err)
-               goto err2;
-
-       return 0;
-
-err2:
-       dev_err(dev, "treo680_irda: cannot change IR gpio direction\n");
-       gpio_free(GPIO_NR_TREO680_IR_EN);
-err1:
-       dev_err(dev, "treo680_irda: cannot allocate IR gpio\n");
-       return err;
-}
-
-static void treo680_irda_shutdown(struct device *dev)
-{
-       gpio_free(GPIO_NR_TREO680_IR_EN);
-}
-
 static struct pxaficp_platform_data treo680_ficp_info = {
-       .transceiver_cap  = IR_FIRMODE | IR_SIRMODE | IR_OFF,
-       .startup          = treo680_irda_startup,
-       .shutdown         = treo680_irda_shutdown,
-       .transceiver_mode = treo680_transceiver_mode,
+       .gpio_pwdown            = GPIO_NR_TREO680_IR_EN,
+       .transceiver_cap        = IR_SIRMODE | IR_OFF,
 };
 
 /******************************************************************************
@@ -546,6 +427,11 @@ static struct pxafb_mode_info treo680_lcd_modes[] = {
 },
 };
 
+static void treo680_lcd_power(int on, struct fb_var_screeninfo *info)
+{
+       gpio_set_value(GPIO_NR_TREO680_BL_POWER, on);
+}
+
 static struct pxafb_mach_info treo680_lcd_screen = {
        .modes          = treo680_lcd_modes,
        .num_modes      = ARRAY_SIZE(treo680_lcd_modes),
@@ -585,11 +471,32 @@ static void __init treo680_udc_init(void)
        }
 }
 
+static void __init treo680_lcd_power_init(void)
+{
+       int ret;
+
+       ret = gpio_request(GPIO_NR_TREO680_LCD_POWER, "LCD POWER");
+       if (ret) {
+               pr_err("Treo680: LCD power GPIO request failed!\n");
+               return;
+       }
+
+       ret = gpio_direction_output(GPIO_NR_TREO680_LCD_POWER, 0);
+       if (ret) {
+               pr_err("Treo680: setting LCD power GPIO direction failed!\n");
+               gpio_free(GPIO_NR_TREO680_LCD_POWER);
+               return;
+       }
+
+       treo680_lcd_screen.pxafb_lcd_power = treo680_lcd_power;
+}
+
 static void __init treo680_init(void)
 {
        treo680_pm_init();
        pxa2xx_mfp_config(ARRAY_AND_SIZE(treo680_pin_config));
        pxa_set_keypad_info(&treo680_keypad_platform_data);
+       treo680_lcd_power_init();
        set_pxa_fb_info(&treo680_lcd_screen);
        pxa_set_mci_info(&treo680_mci_platform_data);
        treo680_udc_init();
index 825f540..3981e03 100644 (file)
@@ -367,6 +367,9 @@ static struct pxamci_platform_data trizeps4_mci_platform_data = {
        .exit           = trizeps4_mci_exit,
        .get_ro         = NULL, /* write-protection not supported */
        .setpower       = NULL, /* power-switching not supported */
+       .gpio_card_detect = -1,
+       .gpio_card_ro   = -1,
+       .gpio_power     = -1,
 };
 
 /****************************************************************************
@@ -412,6 +415,7 @@ static void trizeps4_irda_transceiver_mode(struct device *dev, int mode)
 }
 
 static struct pxaficp_platform_data trizeps4_ficp_platform_data = {
+       .gpio_pwdown            = -1,
        .transceiver_cap        = IR_SIRMODE | IR_FIRMODE | IR_OFF,
        .transceiver_mode       = trizeps4_irda_transceiver_mode,
        .startup                = trizeps4_irda_startup,
diff --git a/arch/arm/mach-pxa/xcep.c b/arch/arm/mach-pxa/xcep.c
new file mode 100644 (file)
index 0000000..3fd79cb
--- /dev/null
@@ -0,0 +1,187 @@
+/*  linux/arch/arm/mach-pxa/xcep.c
+ *
+ *  Support for the Iskratel Electronics XCEP platform as used in
+ *  the Libera instruments from Instrumentation Technologies.
+ *
+ *  Author:     Ales Bardorfer <ales@i-tech.si>
+ *  Contributions by: Abbott, MG (Michael) <michael.abbott@diamond.ac.uk>
+ *  Contributions by: Matej Kenda <matej.kenda@i-tech.si>
+ *  Created:    June 2006
+ *  Copyright:  (C) 2006-2009 Instrumentation Technologies
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/i2c.h>
+#include <linux/smc91x.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/physmap.h>
+
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+#include <asm/mach/irq.h>
+#include <asm/mach/map.h>
+
+#include <plat/i2c.h>
+
+#include <mach/hardware.h>
+#include <mach/pxa2xx-regs.h>
+#include <mach/mfp-pxa25x.h>
+
+#include "generic.h"
+
+#define XCEP_ETH_PHYS          (PXA_CS3_PHYS + 0x00000300)
+#define XCEP_ETH_PHYS_END      (PXA_CS3_PHYS + 0x000fffff)
+#define XCEP_ETH_ATTR          (PXA_CS3_PHYS + 0x02000000)
+#define XCEP_ETH_ATTR_END      (PXA_CS3_PHYS + 0x020fffff)
+#define XCEP_ETH_IRQ           IRQ_GPIO0
+
+/*  XCEP CPLD base */
+#define XCEP_CPLD_BASE         0xf0000000
+
+
+/* Flash partitions. */
+
+static struct mtd_partition xcep_partitions[] = {
+       {
+               .name =         "Bootloader",
+               .size =         0x00040000,
+               .offset =       0,
+               .mask_flags =   MTD_WRITEABLE
+       }, {
+               .name =         "Bootloader ENV",
+               .size =         0x00040000,
+               .offset =       0x00040000,
+               .mask_flags =   MTD_WRITEABLE
+       }, {
+               .name =         "Kernel",
+               .size =         0x00100000,
+               .offset =       0x00080000,
+       }, {
+               .name =         "Rescue fs",
+               .size =         0x00280000,
+               .offset =       0x00180000,
+       }, {
+               .name =         "Filesystem",
+               .size =         MTDPART_SIZ_FULL,
+               .offset =       0x00400000
+       }
+};
+
+static struct physmap_flash_data xcep_flash_data[] = {
+       {
+               .width          = 4,            /* bankwidth in bytes */
+               .parts          = xcep_partitions,
+               .nr_parts       = ARRAY_SIZE(xcep_partitions)
+       }
+};
+
+static struct resource flash_resource = {
+       .start  = PXA_CS0_PHYS,
+       .end    = PXA_CS0_PHYS + SZ_32M - 1,
+       .flags  = IORESOURCE_MEM,
+};
+
+static struct platform_device flash_device = {
+       .name   = "physmap-flash",
+       .id     = 0,
+       .dev    = {
+               .platform_data = xcep_flash_data,
+       },
+       .resource = &flash_resource,
+       .num_resources = 1,
+};
+
+
+
+/* SMC LAN91C111 network controller. */
+
+static struct resource smc91x_resources[] = {
+       [0] = {
+               .name   = "smc91x-regs",
+               .start  = XCEP_ETH_PHYS,
+               .end    = XCEP_ETH_PHYS_END,
+               .flags  = IORESOURCE_MEM,
+       },
+       [1] = {
+               .start  = XCEP_ETH_IRQ,
+               .end    = XCEP_ETH_IRQ,
+               .flags  = IORESOURCE_IRQ,
+       },
+       [2] = {
+               .name   = "smc91x-attrib",
+               .start  = XCEP_ETH_ATTR,
+               .end    = XCEP_ETH_ATTR_END,
+               .flags  = IORESOURCE_MEM,
+       },
+};
+
+static struct smc91x_platdata xcep_smc91x_info = {
+       .flags  = SMC91X_USE_32BIT | SMC91X_NOWAIT | SMC91X_USE_DMA,
+};
+
+static struct platform_device smc91x_device = {
+       .name           = "smc91x",
+       .id             = -1,
+       .num_resources  = ARRAY_SIZE(smc91x_resources),
+       .resource       = smc91x_resources,
+       .dev            = {
+               .platform_data = &xcep_smc91x_info,
+       },
+};
+
+
+static struct platform_device *devices[] __initdata = {
+       &flash_device,
+       &smc91x_device,
+};
+
+
+/* We have to state that there are HWMON devices on the I2C bus on XCEP.
+ * Drivers for HWMON verify capabilities of the adapter when loading and
+ * refuse to attach if the adapter doesn't support HWMON class of devices.
+ * See also Documentation/i2c/porting-clients. */
+static struct i2c_pxa_platform_data xcep_i2c_platform_data  = {
+       .class = I2C_CLASS_HWMON
+};
+
+
+static mfp_cfg_t xcep_pin_config[] __initdata = {
+       GPIO79_nCS_3,   /* SMC 91C111 chip select. */
+       GPIO80_nCS_4,   /* CPLD chip select. */
+       /* SSP communication to MSP430 */
+       GPIO23_SSP1_SCLK,
+       GPIO24_SSP1_SFRM,
+       GPIO25_SSP1_TXD,
+       GPIO26_SSP1_RXD,
+       GPIO27_SSP1_EXTCLK
+};
+
+static void __init xcep_init(void)
+{
+       pxa2xx_mfp_config(ARRAY_AND_SIZE(xcep_pin_config));
+
+       /* See Intel XScale Developer's Guide for details */
+       /* Set RDF and RDN to appropriate values (chip select 3 (smc91x)) */
+       MSC1 = (MSC1 & 0xffff) | 0xD5540000;
+       /* Set RDF and RDN to appropriate values (chip select 5 (fpga)) */
+       MSC2 = (MSC2 & 0xffff) | 0x72A00000;
+
+       platform_add_devices(ARRAY_AND_SIZE(devices));
+       pxa_set_i2c_info(&xcep_i2c_platform_data);
+}
+
+MACHINE_START(XCEP, "Iskratel XCEP")
+       .phys_io        = 0x40000000,
+       .io_pg_offst    = (io_p2v(0x40000000) >> 18) & 0xfffc,
+       .boot_params    = 0xa0000100,
+       .init_machine   = xcep_init,
+       .map_io         = pxa_map_io,
+       .init_irq       = pxa25x_init_irq,
+       .timer          = &pxa_timer,
+MACHINE_END
+
index 218d200..09784d3 100644 (file)
@@ -290,6 +290,9 @@ static struct pxamci_platform_data zylonite_mci_platform_data = {
        .init           = zylonite_mci_init,
        .exit           = zylonite_mci_exit,
        .get_ro         = zylonite_mci_ro,
+       .gpio_card_detect = -1,
+       .gpio_card_ro   = -1,
+       .gpio_power     = -1,
 };
 
 static struct pxamci_platform_data zylonite_mci2_platform_data = {
index dc3519c..a2083b6 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/io.h>
 #include <linux/smsc911x.h>
 #include <linux/ata_platform.h>
+#include <linux/amba/mmci.h>
 
 #include <asm/clkdev.h>
 #include <asm/system.h>
@@ -44,7 +45,6 @@
 #include <asm/mach/flash.h>
 #include <asm/mach/irq.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 
 #include <asm/hardware/gic.h>
 
@@ -237,14 +237,14 @@ static unsigned int realview_mmc_status(struct device *dev)
        return readl(REALVIEW_SYSMCI) & mask;
 }
 
-struct mmc_platform_data realview_mmc0_plat_data = {
+struct mmci_platform_data realview_mmc0_plat_data = {
        .ocr_mask       = MMC_VDD_32_33|MMC_VDD_33_34,
        .status         = realview_mmc_status,
        .gpio_wp        = 17,
        .gpio_cd        = 16,
 };
 
-struct mmc_platform_data realview_mmc1_plat_data = {
+struct mmci_platform_data realview_mmc1_plat_data = {
        .ocr_mask       = MMC_VDD_32_33|MMC_VDD_33_34,
        .status         = realview_mmc_status,
        .gpio_wp        = 19,
@@ -296,31 +296,31 @@ static struct clk ref24_clk = {
 
 static struct clk_lookup lookups[] = {
        {       /* UART0 */
-               .dev_id         = "dev:f1",
+               .dev_id         = "dev:uart0",
                .clk            = &ref24_clk,
        }, {    /* UART1 */
-               .dev_id         = "dev:f2",
+               .dev_id         = "dev:uart1",
                .clk            = &ref24_clk,
        }, {    /* UART2 */
-               .dev_id         = "dev:f3",
+               .dev_id         = "dev:uart2",
                .clk            = &ref24_clk,
        }, {    /* UART3 */
-               .dev_id         = "fpga:09",
+               .dev_id         = "fpga:uart3",
                .clk            = &ref24_clk,
        }, {    /* KMI0 */
-               .dev_id         = "fpga:06",
+               .dev_id         = "fpga:kmi0",
                .clk            = &ref24_clk,
        }, {    /* KMI1 */
-               .dev_id         = "fpga:07",
+               .dev_id         = "fpga:kmi1",
                .clk            = &ref24_clk,
        }, {    /* MMC0 */
-               .dev_id         = "fpga:05",
+               .dev_id         = "fpga:mmc0",
                .clk            = &ref24_clk,
        }, {    /* EB:CLCD */
-               .dev_id         = "dev:20",
+               .dev_id         = "dev:clcd",
                .clk            = &oscvco_clk,
        }, {    /* PB:CLCD */
-               .dev_id         = "issp:20",
+               .dev_id         = "issp:clcd",
                .clk            = &oscvco_clk,
        }
 };
index 59a337b..46cd6ac 100644 (file)
@@ -47,8 +47,8 @@ static struct amba_device name##_device = {                   \
 extern struct platform_device realview_flash_device;
 extern struct platform_device realview_cf_device;
 extern struct platform_device realview_i2c_device;
-extern struct mmc_platform_data realview_mmc0_plat_data;
-extern struct mmc_platform_data realview_mmc1_plat_data;
+extern struct mmci_platform_data realview_mmc0_plat_data;
+extern struct mmci_platform_data realview_mmc1_plat_data;
 extern struct clcd_board clcd_plat_data;
 extern void __iomem *gic_cpu_base_addr;
 extern void __iomem *timer0_va_base;
index abd13b4..1d65e64 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/sysdev.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <mach/hardware.h>
@@ -37,7 +38,6 @@
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 #include <asm/mach/time.h>
 
 #include <mach/board-eb.h>
@@ -193,27 +193,27 @@ static struct pl061_platform_data gpio2_plat_data = {
 #define EB_SSP_DMA     { 9, 8 }
 
 /* FPGA Primecells */
-AMBA_DEVICE(aaci,  "fpga:04", AACI,     NULL);
-AMBA_DEVICE(mmc0,  "fpga:05", MMCI0,    &realview_mmc0_plat_data);
-AMBA_DEVICE(kmi0,  "fpga:06", KMI0,     NULL);
-AMBA_DEVICE(kmi1,  "fpga:07", KMI1,     NULL);
-AMBA_DEVICE(uart3, "fpga:09", EB_UART3, NULL);
+AMBA_DEVICE(aaci,  "fpga:aaci",  AACI,     NULL);
+AMBA_DEVICE(mmc0,  "fpga:mmc0",  MMCI0,    &realview_mmc0_plat_data);
+AMBA_DEVICE(kmi0,  "fpga:kmi0",  KMI0,     NULL);
+AMBA_DEVICE(kmi1,  "fpga:kmi1",  KMI1,     NULL);
+AMBA_DEVICE(uart3, "fpga:uart3", EB_UART3, NULL);
 
 /* DevChip Primecells */
-AMBA_DEVICE(smc,   "dev:00",  EB_SMC,   NULL);
-AMBA_DEVICE(clcd,  "dev:20",  EB_CLCD,  &clcd_plat_data);
-AMBA_DEVICE(dmac,  "dev:30",  DMAC,     NULL);
-AMBA_DEVICE(sctl,  "dev:e0",  SCTL,     NULL);
-AMBA_DEVICE(wdog,  "dev:e1",  EB_WATCHDOG, NULL);
-AMBA_DEVICE(gpio0, "dev:e4",  EB_GPIO0, &gpio0_plat_data);
-AMBA_DEVICE(gpio1, "dev:e5",  GPIO1,    &gpio1_plat_data);
-AMBA_DEVICE(gpio2, "dev:e6",  GPIO2,    &gpio2_plat_data);
-AMBA_DEVICE(rtc,   "dev:e8",  EB_RTC,   NULL);
-AMBA_DEVICE(sci0,  "dev:f0",  SCI,      NULL);
-AMBA_DEVICE(uart0, "dev:f1",  EB_UART0, NULL);
-AMBA_DEVICE(uart1, "dev:f2",  EB_UART1, NULL);
-AMBA_DEVICE(uart2, "dev:f3",  EB_UART2, NULL);
-AMBA_DEVICE(ssp0,  "dev:f4",  EB_SSP,   NULL);
+AMBA_DEVICE(smc,   "dev:smc",   EB_SMC,   NULL);
+AMBA_DEVICE(clcd,  "dev:clcd",  EB_CLCD,  &clcd_plat_data);
+AMBA_DEVICE(dmac,  "dev:dmac",  DMAC,     NULL);
+AMBA_DEVICE(sctl,  "dev:sctl",  SCTL,     NULL);
+AMBA_DEVICE(wdog,  "dev:wdog",  EB_WATCHDOG, NULL);
+AMBA_DEVICE(gpio0, "dev:gpio0", EB_GPIO0, &gpio0_plat_data);
+AMBA_DEVICE(gpio1, "dev:gpio1", GPIO1,    &gpio1_plat_data);
+AMBA_DEVICE(gpio2, "dev:gpio2", GPIO2,    &gpio2_plat_data);
+AMBA_DEVICE(rtc,   "dev:rtc",   EB_RTC,   NULL);
+AMBA_DEVICE(sci0,  "dev:sci0",  SCI,      NULL);
+AMBA_DEVICE(uart0, "dev:uart0", EB_UART0, NULL);
+AMBA_DEVICE(uart1, "dev:uart1", EB_UART1, NULL);
+AMBA_DEVICE(uart2, "dev:uart2", EB_UART2, NULL);
+AMBA_DEVICE(ssp0,  "dev:ssp0",  EB_SSP,   NULL);
 
 static struct amba_device *amba_devs[] __initdata = {
        &dmac_device,
index 17fbb0e..2817fe0 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/sysdev.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <mach/hardware.h>
@@ -37,7 +38,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 #include <asm/mach/time.h>
 
 #include <mach/board-pb1176.h>
@@ -170,29 +170,29 @@ static struct pl061_platform_data gpio2_plat_data = {
 #define PB1176_SSP_DMA         { 9, 8 }
 
 /* FPGA Primecells */
-AMBA_DEVICE(aaci,      "fpga:04",      AACI,           NULL);
-AMBA_DEVICE(mmc0,      "fpga:05",      MMCI0,          &realview_mmc0_plat_data);
-AMBA_DEVICE(kmi0,      "fpga:06",      KMI0,           NULL);
-AMBA_DEVICE(kmi1,      "fpga:07",      KMI1,           NULL);
-AMBA_DEVICE(uart3,     "fpga:09",      PB1176_UART3,   NULL);
+AMBA_DEVICE(aaci,      "fpga:aaci",    AACI,           NULL);
+AMBA_DEVICE(mmc0,      "fpga:mmc0",    MMCI0,          &realview_mmc0_plat_data);
+AMBA_DEVICE(kmi0,      "fpga:kmi0",    KMI0,           NULL);
+AMBA_DEVICE(kmi1,      "fpga:kmi1",    KMI1,           NULL);
+AMBA_DEVICE(uart3,     "fpga:uart3",   PB1176_UART3,   NULL);
 
 /* DevChip Primecells */
-AMBA_DEVICE(smc,       "dev:00",       PB1176_SMC,     NULL);
-AMBA_DEVICE(sctl,      "dev:e0",       SCTL,           NULL);
-AMBA_DEVICE(wdog,      "dev:e1",       PB1176_WATCHDOG,        NULL);
-AMBA_DEVICE(gpio0,     "dev:e4",       PB1176_GPIO0,   &gpio0_plat_data);
-AMBA_DEVICE(gpio1,     "dev:e5",       GPIO1,          &gpio1_plat_data);
-AMBA_DEVICE(gpio2,     "dev:e6",       GPIO2,          &gpio2_plat_data);
-AMBA_DEVICE(rtc,       "dev:e8",       PB1176_RTC,     NULL);
-AMBA_DEVICE(sci0,      "dev:f0",       SCI,            NULL);
-AMBA_DEVICE(uart0,     "dev:f1",       PB1176_UART0,   NULL);
-AMBA_DEVICE(uart1,     "dev:f2",       PB1176_UART1,   NULL);
-AMBA_DEVICE(uart2,     "dev:f3",       PB1176_UART2,   NULL);
-AMBA_DEVICE(ssp0,      "dev:f4",       PB1176_SSP,     NULL);
+AMBA_DEVICE(smc,       "dev:smc",      PB1176_SMC,     NULL);
+AMBA_DEVICE(sctl,      "dev:sctl",     SCTL,           NULL);
+AMBA_DEVICE(wdog,      "dev:wdog",     PB1176_WATCHDOG,        NULL);
+AMBA_DEVICE(gpio0,     "dev:gpio0",    PB1176_GPIO0,   &gpio0_plat_data);
+AMBA_DEVICE(gpio1,     "dev:gpio1",    GPIO1,          &gpio1_plat_data);
+AMBA_DEVICE(gpio2,     "dev:gpio2",    GPIO2,          &gpio2_plat_data);
+AMBA_DEVICE(rtc,       "dev:rtc",      PB1176_RTC,     NULL);
+AMBA_DEVICE(sci0,      "dev:sci0",     SCI,            NULL);
+AMBA_DEVICE(uart0,     "dev:uart0",    PB1176_UART0,   NULL);
+AMBA_DEVICE(uart1,     "dev:uart1",    PB1176_UART1,   NULL);
+AMBA_DEVICE(uart2,     "dev:uart2",    PB1176_UART2,   NULL);
+AMBA_DEVICE(ssp0,      "dev:ssp0",     PB1176_SSP,     NULL);
 
 /* Primecells on the NEC ISSP chip */
-AMBA_DEVICE(clcd,      "issp:20",      PB1176_CLCD,    &clcd_plat_data);
-//AMBA_DEVICE(dmac,    "issp:30",      PB1176_DMAC,    NULL);
+AMBA_DEVICE(clcd,      "issp:clcd",    PB1176_CLCD,    &clcd_plat_data);
+//AMBA_DEVICE(dmac,    "issp:dmac",    PB1176_DMAC,    NULL);
 
 static struct amba_device *amba_devs[] __initdata = {
 //     &dmac_device,
index fdd042b..94680fc 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/sysdev.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <mach/hardware.h>
@@ -38,7 +39,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 #include <asm/mach/time.h>
 
 #include <mach/board-pb11mp.h>
@@ -172,29 +172,29 @@ static struct pl061_platform_data gpio2_plat_data = {
 #define PB11MP_SSP_DMA         { 9, 8 }
 
 /* FPGA Primecells */
-AMBA_DEVICE(aaci,      "fpga:04",      AACI,           NULL);
-AMBA_DEVICE(mmc0,      "fpga:05",      MMCI0,          &realview_mmc0_plat_data);
-AMBA_DEVICE(kmi0,      "fpga:06",      KMI0,           NULL);
-AMBA_DEVICE(kmi1,      "fpga:07",      KMI1,           NULL);
-AMBA_DEVICE(uart3,     "fpga:09",      PB11MP_UART3,   NULL);
+AMBA_DEVICE(aaci,      "fpga:aaci",    AACI,           NULL);
+AMBA_DEVICE(mmc0,      "fpga:mmc0",    MMCI0,          &realview_mmc0_plat_data);
+AMBA_DEVICE(kmi0,      "fpga:kmi0",    KMI0,           NULL);
+AMBA_DEVICE(kmi1,      "fpga:kmi1",    KMI1,           NULL);
+AMBA_DEVICE(uart3,     "fpga:uart3",   PB11MP_UART3,   NULL);
 
 /* DevChip Primecells */
-AMBA_DEVICE(smc,       "dev:00",       PB11MP_SMC,     NULL);
-AMBA_DEVICE(sctl,      "dev:e0",       SCTL,           NULL);
-AMBA_DEVICE(wdog,      "dev:e1",       PB11MP_WATCHDOG, NULL);
-AMBA_DEVICE(gpio0,     "dev:e4",       PB11MP_GPIO0,   &gpio0_plat_data);
-AMBA_DEVICE(gpio1,     "dev:e5",       GPIO1,          &gpio1_plat_data);
-AMBA_DEVICE(gpio2,     "dev:e6",       GPIO2,          &gpio2_plat_data);
-AMBA_DEVICE(rtc,       "dev:e8",       PB11MP_RTC,     NULL);
-AMBA_DEVICE(sci0,      "dev:f0",       SCI,            NULL);
-AMBA_DEVICE(uart0,     "dev:f1",       PB11MP_UART0,   NULL);
-AMBA_DEVICE(uart1,     "dev:f2",       PB11MP_UART1,   NULL);
-AMBA_DEVICE(uart2,     "dev:f3",       PB11MP_UART2,   NULL);
-AMBA_DEVICE(ssp0,      "dev:f4",       PB11MP_SSP,     NULL);
+AMBA_DEVICE(smc,       "dev:smc",      PB11MP_SMC,     NULL);
+AMBA_DEVICE(sctl,      "dev:sctl",     SCTL,           NULL);
+AMBA_DEVICE(wdog,      "dev:wdog",     PB11MP_WATCHDOG, NULL);
+AMBA_DEVICE(gpio0,     "dev:gpio0",    PB11MP_GPIO0,   &gpio0_plat_data);
+AMBA_DEVICE(gpio1,     "dev:gpio1",    GPIO1,          &gpio1_plat_data);
+AMBA_DEVICE(gpio2,     "dev:gpio2",    GPIO2,          &gpio2_plat_data);
+AMBA_DEVICE(rtc,       "dev:rtc",      PB11MP_RTC,     NULL);
+AMBA_DEVICE(sci0,      "dev:sci0",     SCI,            NULL);
+AMBA_DEVICE(uart0,     "dev:uart0",    PB11MP_UART0,   NULL);
+AMBA_DEVICE(uart1,     "dev:uart1",    PB11MP_UART1,   NULL);
+AMBA_DEVICE(uart2,     "dev:uart2",    PB11MP_UART2,   NULL);
+AMBA_DEVICE(ssp0,      "dev:ssp0",     PB11MP_SSP,     NULL);
 
 /* Primecells on the NEC ISSP chip */
-AMBA_DEVICE(clcd,      "issp:20",      PB11MP_CLCD,    &clcd_plat_data);
-AMBA_DEVICE(dmac,      "issp:30",      DMAC,           NULL);
+AMBA_DEVICE(clcd,      "issp:clcd",    PB11MP_CLCD,    &clcd_plat_data);
+AMBA_DEVICE(dmac,      "issp:dmac",    DMAC,           NULL);
 
 static struct amba_device *amba_devs[] __initdata = {
        &dmac_device,
index 70bba99..941beb2 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/sysdev.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <asm/irq.h>
@@ -34,7 +35,6 @@
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 #include <asm/mach/time.h>
 
 #include <mach/hardware.h>
@@ -162,29 +162,29 @@ static struct pl061_platform_data gpio2_plat_data = {
 #define PBA8_SSP_DMA           { 9, 8 }
 
 /* FPGA Primecells */
-AMBA_DEVICE(aaci,      "fpga:04",      AACI,           NULL);
-AMBA_DEVICE(mmc0,      "fpga:05",      MMCI0,          &realview_mmc0_plat_data);
-AMBA_DEVICE(kmi0,      "fpga:06",      KMI0,           NULL);
-AMBA_DEVICE(kmi1,      "fpga:07",      KMI1,           NULL);
-AMBA_DEVICE(uart3,     "fpga:09",      PBA8_UART3,     NULL);
+AMBA_DEVICE(aaci,      "fpga:aaci",    AACI,           NULL);
+AMBA_DEVICE(mmc0,      "fpga:mmc0",    MMCI0,          &realview_mmc0_plat_data);
+AMBA_DEVICE(kmi0,      "fpga:kmi0",    KMI0,           NULL);
+AMBA_DEVICE(kmi1,      "fpga:kmi1",    KMI1,           NULL);
+AMBA_DEVICE(uart3,     "fpga:uart3",   PBA8_UART3,     NULL);
 
 /* DevChip Primecells */
-AMBA_DEVICE(smc,       "dev:00",       PBA8_SMC,       NULL);
-AMBA_DEVICE(sctl,      "dev:e0",       SCTL,           NULL);
-AMBA_DEVICE(wdog,      "dev:e1",       PBA8_WATCHDOG, NULL);
-AMBA_DEVICE(gpio0,     "dev:e4",       PBA8_GPIO0,     &gpio0_plat_data);
-AMBA_DEVICE(gpio1,     "dev:e5",       GPIO1,          &gpio1_plat_data);
-AMBA_DEVICE(gpio2,     "dev:e6",       GPIO2,          &gpio2_plat_data);
-AMBA_DEVICE(rtc,       "dev:e8",       PBA8_RTC,       NULL);
-AMBA_DEVICE(sci0,      "dev:f0",       SCI,            NULL);
-AMBA_DEVICE(uart0,     "dev:f1",       PBA8_UART0,     NULL);
-AMBA_DEVICE(uart1,     "dev:f2",       PBA8_UART1,     NULL);
-AMBA_DEVICE(uart2,     "dev:f3",       PBA8_UART2,     NULL);
-AMBA_DEVICE(ssp0,      "dev:f4",       PBA8_SSP,       NULL);
+AMBA_DEVICE(smc,       "dev:smc",      PBA8_SMC,       NULL);
+AMBA_DEVICE(sctl,      "dev:sctl",     SCTL,           NULL);
+AMBA_DEVICE(wdog,      "dev:wdog",     PBA8_WATCHDOG, NULL);
+AMBA_DEVICE(gpio0,     "dev:gpio0",    PBA8_GPIO0,     &gpio0_plat_data);
+AMBA_DEVICE(gpio1,     "dev:gpio1",    GPIO1,          &gpio1_plat_data);
+AMBA_DEVICE(gpio2,     "dev:gpio2",    GPIO2,          &gpio2_plat_data);
+AMBA_DEVICE(rtc,       "dev:rtc",      PBA8_RTC,       NULL);
+AMBA_DEVICE(sci0,      "dev:sci0",     SCI,            NULL);
+AMBA_DEVICE(uart0,     "dev:uart0",    PBA8_UART0,     NULL);
+AMBA_DEVICE(uart1,     "dev:uart1",    PBA8_UART1,     NULL);
+AMBA_DEVICE(uart2,     "dev:uart2",    PBA8_UART2,     NULL);
+AMBA_DEVICE(ssp0,      "dev:ssp0",     PBA8_SSP,       NULL);
 
 /* Primecells on the NEC ISSP chip */
-AMBA_DEVICE(clcd,      "issp:20",      PBA8_CLCD,      &clcd_plat_data);
-AMBA_DEVICE(dmac,      "issp:30",      DMAC,           NULL);
+AMBA_DEVICE(clcd,      "issp:clcd",    PBA8_CLCD,      &clcd_plat_data);
+AMBA_DEVICE(dmac,      "issp:dmac",    DMAC,           NULL);
 
 static struct amba_device *amba_devs[] __initdata = {
        &dmac_device,
index ce6c5d2..7e4bc6c 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/sysdev.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <asm/irq.h>
@@ -34,7 +35,6 @@
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 #include <asm/mach/time.h>
 
 #include <mach/hardware.h>
@@ -182,29 +182,29 @@ static struct pl061_platform_data gpio2_plat_data = {
 #define PBX_SSP_DMA            { 9, 8 }
 
 /* FPGA Primecells */
-AMBA_DEVICE(aaci,      "fpga:04",      AACI,           NULL);
-AMBA_DEVICE(mmc0,      "fpga:05",      MMCI0,          &realview_mmc0_plat_data);
-AMBA_DEVICE(kmi0,      "fpga:06",      KMI0,           NULL);
-AMBA_DEVICE(kmi1,      "fpga:07",      KMI1,           NULL);
-AMBA_DEVICE(uart3,     "fpga:09",      PBX_UART3,      NULL);
+AMBA_DEVICE(aaci,      "fpga:aaci",    AACI,           NULL);
+AMBA_DEVICE(mmc0,      "fpga:mmc0",    MMCI0,          &realview_mmc0_plat_data);
+AMBA_DEVICE(kmi0,      "fpga:kmi0",    KMI0,           NULL);
+AMBA_DEVICE(kmi1,      "fpga:kmi1",    KMI1,           NULL);
+AMBA_DEVICE(uart3,     "fpga:uart3",   PBX_UART3,      NULL);
 
 /* DevChip Primecells */
-AMBA_DEVICE(smc,       "dev:00",       PBX_SMC,        NULL);
-AMBA_DEVICE(sctl,      "dev:e0",       SCTL,           NULL);
-AMBA_DEVICE(wdog,      "dev:e1",       PBX_WATCHDOG,   NULL);
-AMBA_DEVICE(gpio0,     "dev:e4",       PBX_GPIO0,      &gpio0_plat_data);
-AMBA_DEVICE(gpio1,     "dev:e5",       GPIO1,          &gpio1_plat_data);
-AMBA_DEVICE(gpio2,     "dev:e6",       GPIO2,          &gpio2_plat_data);
-AMBA_DEVICE(rtc,       "dev:e8",       PBX_RTC,        NULL);
-AMBA_DEVICE(sci0,      "dev:f0",       SCI,            NULL);
-AMBA_DEVICE(uart0,     "dev:f1",       PBX_UART0,      NULL);
-AMBA_DEVICE(uart1,     "dev:f2",       PBX_UART1,      NULL);
-AMBA_DEVICE(uart2,     "dev:f3",       PBX_UART2,      NULL);
-AMBA_DEVICE(ssp0,      "dev:f4",       PBX_SSP,        NULL);
+AMBA_DEVICE(smc,       "dev:smc",      PBX_SMC,        NULL);
+AMBA_DEVICE(sctl,      "dev:sctl",     SCTL,           NULL);
+AMBA_DEVICE(wdog,      "dev:wdog",     PBX_WATCHDOG,   NULL);
+AMBA_DEVICE(gpio0,     "dev:gpio0",    PBX_GPIO0,      &gpio0_plat_data);
+AMBA_DEVICE(gpio1,     "dev:gpio1",    GPIO1,          &gpio1_plat_data);
+AMBA_DEVICE(gpio2,     "dev:gpio2",    GPIO2,          &gpio2_plat_data);
+AMBA_DEVICE(rtc,       "dev:rtc",      PBX_RTC,        NULL);
+AMBA_DEVICE(sci0,      "dev:sci0",     SCI,            NULL);
+AMBA_DEVICE(uart0,     "dev:uart0",    PBX_UART0,      NULL);
+AMBA_DEVICE(uart1,     "dev:uart1",    PBX_UART1,      NULL);
+AMBA_DEVICE(uart2,     "dev:uart2",    PBX_UART2,      NULL);
+AMBA_DEVICE(ssp0,      "dev:ssp0",     PBX_SSP,        NULL);
 
 /* Primecells on the NEC ISSP chip */
-AMBA_DEVICE(clcd,      "issp:20",      PBX_CLCD,       &clcd_plat_data);
-AMBA_DEVICE(dmac,      "issp:30",      DMAC,           NULL);
+AMBA_DEVICE(clcd,      "issp:clcd",    PBX_CLCD,       &clcd_plat_data);
+AMBA_DEVICE(dmac,      "issp:dmac",    DMAC,           NULL);
 
 static struct amba_device *amba_devs[] __initdata = {
        &dmac_device,
index d8c023d..3d4e9da 100644 (file)
@@ -77,6 +77,7 @@ config ARCH_H1940
        select CPU_S3C2410
        select PM_H1940 if PM
        select S3C_DEV_USB_HOST
+       select S3C_DEV_NAND
        help
          Say Y here if you are using the HP IPAQ H1940
 
@@ -89,6 +90,7 @@ config MACH_N30
        bool "Acer N30 family"
        select CPU_S3C2410
        select S3C_DEV_USB_HOST
+       select S3C_DEV_NAND
        help
          Say Y here if you want suppt for the Acer N30, Acer N35,
          Navman PiN570, Yakumo AlphaX or Airis NC05 PDAs.
@@ -103,6 +105,7 @@ config ARCH_BAST
        select S3C24XX_DCLK
        select ISA
        select S3C_DEV_USB_HOST
+       select S3C_DEV_NAND
        help
          Say Y here if you are using the Simtec Electronics EB2410ITX
          development board (also known as BAST)
@@ -111,6 +114,7 @@ config MACH_OTOM
        bool "NexVision OTOM Board"
        select CPU_S3C2410
        select S3C_DEV_USB_HOST
+       select S3C_DEV_NAND
        help
          Say Y here if you are using the Nex Vision OTOM board
 
@@ -154,6 +158,7 @@ config MACH_QT2410
        bool "QT2410"
        select CPU_S3C2410
        select S3C_DEV_USB_HOST
+       select S3C_DEV_NAND
        help
           Say Y here if you are using the Armzone QT2410
 
index 35c1bde..c2bdc46 100644 (file)
@@ -48,6 +48,7 @@ config MACH_JIVE
        bool "Logitech Jive"
        select CPU_S3C2412
        select S3C_DEV_USB_HOST
+       select S3C_DEV_NAND
        help
          Say Y here if you are using the Logitech Jive.
 
@@ -61,6 +62,7 @@ config MACH_SMDK2413
        select MACH_S3C2413
        select MACH_SMDK
        select S3C_DEV_USB_HOST
+       select S3C_DEV_NAND
        help
          Say Y here if you are using an SMDK2413
 
@@ -84,6 +86,7 @@ config MACH_VSTMS
        bool "VMSTMS"
        select CPU_S3C2412
        select S3C_DEV_USB_HOST
+       select S3C_DEV_NAND
        help
          Say Y here if you are using an VSTMS board
 
index 8ae1b28..d7bba91 100644 (file)
@@ -48,6 +48,7 @@ config MACH_OSIRIS
        select S3C2440_XTAL_12000000
        select S3C2410_IOTIMING if S3C2440_CPUFREQ
        select S3C_DEV_USB_HOST
+       select S3C_DEV_NAND
        help
          Say Y here if you are using the Simtec IM2440D20 module, also
          known as the Osiris.
@@ -57,6 +58,7 @@ config MACH_RX3715
        select CPU_S3C2440
        select S3C2440_XTAL_16934400
        select PM_H1940 if PM
+       select S3C_DEV_NAND
        help
          Say Y here if you are using the HP iPAQ rx3715.
 
@@ -66,6 +68,7 @@ config ARCH_S3C2440
        select S3C2440_XTAL_16934400
        select MACH_SMDK
        select S3C_DEV_USB_HOST
+       select S3C_DEV_NAND
        help
          Say Y here if you are using the SMDK2440.
 
@@ -74,6 +77,7 @@ config MACH_NEXCODER_2440
        select CPU_S3C2440
        select S3C2440_XTAL_12000000
        select S3C_DEV_USB_HOST
+       select S3C_DEV_NAND
        help
          Say Y here if you are using the Nex Vision NEXCODER 2440 Light Board
 
@@ -88,6 +92,7 @@ config MACH_AT2440EVB
        bool "Avantech AT2440EVB development board"
        select CPU_S3C2440
        select S3C_DEV_USB_HOST
+       select S3C_DEV_NAND
        help
          Say Y here if you are using the AT2440EVB development board
 
@@ -97,6 +102,7 @@ config MACH_MINI2440
        select EEPROM_AT24
        select LEDS_TRIGGER_BACKLIGHT
        select SND_S3C24XX_SOC_S3C24XX_UDA134X
+       select S3C_DEV_NAND
        help
          Say Y here to select support for the MINI2440. Is a 10cm x 10cm board
          available via various sources. It can come with a 3.5" or 7" touch LCD.
index f5af212..770b720 100644 (file)
@@ -26,6 +26,7 @@ config MACH_SMDK6400
        bool "SMDK6400"
        select CPU_S3C6400
        select S3C_DEV_HSMMC
+       select S3C_DEV_NAND
        select S3C6400_SETUP_SDHCI
        help
          Machine support for the Samsung SMDK6400
index f9d0f09..53fc3ff 100644 (file)
@@ -102,6 +102,7 @@ config MACH_HMT
        bool "Airgoo HMT"
        select CPU_S3C6410
        select S3C_DEV_FB
+       select S3C_DEV_NAND
        select S3C_DEV_USB_HOST
        select S3C64XX_SETUP_FB_24BPP
        select HAVE_PWM
index 95f9c5a..cb4521a 100644 (file)
@@ -39,7 +39,7 @@ typedef struct {
 
 static sa1100_dma_t dma_chan[SA1100_DMA_CHANNELS];
 
-static spinlock_t dma_list_lock;
+static DEFINE_SPINLOCK(dma_list_lock);
 
 
 static irqreturn_t dma_irq_handler(int irq, void *dev_id)
index 337b9aa..801b21e 100644 (file)
@@ -81,6 +81,18 @@ config MACH_U300_SEMI_IS_SHARED
                Memory Interface) from both from access and application
                side.
 
+config MACH_U300_SPIDUMMY
+       bool "SSP/SPI dummy chip"
+       select SPI
+       select SPI_MASTER
+       select SPI_PL022
+       help
+               This creates a small kernel module that creates a dummy
+               SPI device to be used for loopback tests. Regularly used
+               to test reference designs. If you're not testing SPI,
+               you don't need it. Selecting this will activate the
+               SPI framework and ARM PL022 support.
+
 comment "All the settings below must match the bootloader's settings"
 
 config MACH_U300_ACCESS_MEM_SIZE
index 24950e0..885b5c0 100644 (file)
@@ -9,3 +9,6 @@ obj-            :=
 
 obj-$(CONFIG_ARCH_U300)                  += u300.o
 obj-$(CONFIG_MMC)                 += mmc.o
+obj-$(CONFIG_SPI_PL022)           += spi.o
+obj-$(CONFIG_MACH_U300_SPIDUMMY)  += dummyspichip.o
+obj-$(CONFIG_I2C_STU300)          += i2c.o
index 2e9b8cc..be60d6d 100644 (file)
@@ -32,6 +32,8 @@
 
 #include "clock.h"
 #include "mmc.h"
+#include "spi.h"
+#include "i2c.h"
 
 /*
  * Static I/O mappings that are needed for booting the U300 platforms. The
@@ -378,14 +380,14 @@ static struct platform_device wdog_device = {
 };
 
 static struct platform_device i2c0_device = {
-       .name = "stddci2c",
+       .name = "stu300",
        .id = 0,
        .num_resources = ARRAY_SIZE(i2c0_resources),
        .resource = i2c0_resources,
 };
 
 static struct platform_device i2c1_device = {
-       .name = "stddci2c",
+       .name = "stu300",
        .id = 1,
        .num_resources = ARRAY_SIZE(i2c1_resources),
        .resource = i2c1_resources,
@@ -611,6 +613,8 @@ void __init u300_init_devices(void)
        /* Wait for the PLL208 to lock if not locked in yet */
        while (!(readw(U300_SYSCON_VBASE + U300_SYSCON_CSR) &
                 U300_SYSCON_CSR_PLL208_LOCK_IND));
+       /* Initialize SPI device with some board specifics */
+       u300_spi_init(&pl022_device);
 
        /* Register the AMBA devices in the AMBA bus abstraction layer */
        u300_clock_primecells();
@@ -622,6 +626,12 @@ void __init u300_init_devices(void)
 
        u300_assign_physmem();
 
+       /* Register subdevices on the I2C buses */
+       u300_i2c_register_board_devices();
+
+       /* Register subdevices on the SPI bus */
+       u300_spi_register_board_devices();
+
        /* Register the platform devices */
        platform_add_devices(platform_devs, ARRAY_SIZE(platform_devs));
 
diff --git a/arch/arm/mach-u300/dummyspichip.c b/arch/arm/mach-u300/dummyspichip.c
new file mode 100644 (file)
index 0000000..962f9de
--- /dev/null
@@ -0,0 +1,290 @@
+/*
+ * arch/arm/mach-u300/dummyspichip.c
+ *
+ * Copyright (C) 2007-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * This is a dummy loopback SPI "chip" used for testing SPI.
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/sysfs.h>
+#include <linux/mutex.h>
+#include <linux/spi/spi.h>
+#include <linux/dma-mapping.h>
+/*
+ * WARNING! Do not include this pl022-specific controller header
+ * for any generic driver. It is only done in this dummy chip
+ * because we alter the chip configuration in order to test some
+ * different settings on the loopback device. Normal chip configs
+ * shall be STATIC and not altered by the driver!
+ */
+#include <linux/amba/pl022.h>
+
+struct dummy {
+       struct device *dev;
+       struct mutex lock;
+};
+
+#define DMA_TEST_SIZE 2048
+
+/* When we cat /sys/bus/spi/devices/spi0.0/looptest this will be triggered */
+static ssize_t dummy_looptest(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct spi_device *spi = to_spi_device(dev);
+       struct dummy *p_dummy = dev_get_drvdata(&spi->dev);
+
+       /*
+        * WARNING! Do not dereference the chip-specific data in any normal
+        * driver for a chip. It is usually STATIC and shall not be read
+        * or written to. Your chip driver should NOT depend on fields in this
+        * struct, this is just used here to alter the behaviour of the chip
+        * in order to perform tests.
+        */
+       struct pl022_config_chip *chip_info = spi->controller_data;
+       int status;
+       u8 txbuf[14] = {0xDE, 0xAD, 0xBE, 0xEF, 0x2B, 0xAD,
+                       0xCA, 0xFE, 0xBA, 0xBE, 0xB1, 0x05,
+                       0xF0, 0x0D};
+       u8 rxbuf[14];
+       u8 *bigtxbuf_virtual;
+       u8 *bigrxbuf_virtual;
+
+       if (mutex_lock_interruptible(&p_dummy->lock))
+               return -ERESTARTSYS;
+
+       bigtxbuf_virtual = kmalloc(DMA_TEST_SIZE, GFP_KERNEL);
+       if (bigtxbuf_virtual == NULL) {
+               status = -ENOMEM;
+               goto out;
+       }
+       bigrxbuf_virtual = kmalloc(DMA_TEST_SIZE, GFP_KERNEL);
+
+       /* Fill TXBUF with some happy pattern */
+       memset(bigtxbuf_virtual, 0xAA, DMA_TEST_SIZE);
+
+       /*
+        * Force chip to 8 bit mode
+        * WARNING: NEVER DO THIS IN REAL DRIVER CODE, THIS SHOULD BE STATIC!
+        */
+       chip_info->data_size = SSP_DATA_BITS_8;
+       /* You should NOT DO THIS EITHER */
+       spi->master->setup(spi);
+
+       /* Now run the tests for 8bit mode */
+       pr_info("Simple test 1: write 0xAA byte, read back garbage byte "
+               "in 8bit mode\n");
+       status = spi_w8r8(spi, 0xAA);
+       if (status < 0)
+               pr_warning("Siple test 1: FAILURE: spi_write_then_read "
+                          "failed with status %d\n", status);
+       else
+               pr_info("Simple test 1: SUCCESS!\n");
+
+       pr_info("Simple test 2: write 8 bytes, read back 8 bytes garbage "
+               "in 8bit mode (full FIFO)\n");
+       status = spi_write_then_read(spi, &txbuf[0], 8, &rxbuf[0], 8);
+       if (status < 0)
+               pr_warning("Simple test 2: FAILURE: spi_write_then_read() "
+                          "failed with status %d\n", status);
+       else
+               pr_info("Simple test 2: SUCCESS!\n");
+
+       pr_info("Simple test 3: write 14 bytes, read back 14 bytes garbage "
+               "in 8bit mode (see if we overflow FIFO)\n");
+       status = spi_write_then_read(spi, &txbuf[0], 14, &rxbuf[0], 14);
+       if (status < 0)
+               pr_warning("Simple test 3: FAILURE: failed with status %d "
+                          "(probably FIFO overrun)\n", status);
+       else
+               pr_info("Simple test 3: SUCCESS!\n");
+
+       pr_info("Simple test 4: write 8 bytes with spi_write(), read 8 "
+               "bytes garbage with spi_read() in 8bit mode\n");
+       status = spi_write(spi, &txbuf[0], 8);
+       if (status < 0)
+               pr_warning("Simple test 4 step 1: FAILURE: spi_write() "
+                          "failed with status %d\n", status);
+       else
+               pr_info("Simple test 4 step 1: SUCCESS!\n");
+       status = spi_read(spi, &rxbuf[0], 8);
+       if (status < 0)
+               pr_warning("Simple test 4 step 2: FAILURE: spi_read() "
+                          "failed with status %d\n", status);
+       else
+               pr_info("Simple test 4 step 2: SUCCESS!\n");
+
+       pr_info("Simple test 5: write 14 bytes with spi_write(), read "
+               "14 bytes garbage with spi_read() in 8bit mode\n");
+       status = spi_write(spi, &txbuf[0], 14);
+       if (status < 0)
+               pr_warning("Simple test 5 step 1: FAILURE: spi_write() "
+                          "failed with status %d (probably FIFO overrun)\n",
+                          status);
+       else
+               pr_info("Simple test 5 step 1: SUCCESS!\n");
+       status = spi_read(spi, &rxbuf[0], 14);
+       if (status < 0)
+               pr_warning("Simple test 5 step 2: FAILURE: spi_read() "
+                          "failed with status %d (probably FIFO overrun)\n",
+                          status);
+       else
+               pr_info("Simple test 5: SUCCESS!\n");
+
+       pr_info("Simple test 6: write %d bytes with spi_write(), "
+               "read %d bytes garbage with spi_read() in 8bit mode\n",
+               DMA_TEST_SIZE, DMA_TEST_SIZE);
+       status = spi_write(spi, &bigtxbuf_virtual[0], DMA_TEST_SIZE);
+       if (status < 0)
+               pr_warning("Simple test 6 step 1: FAILURE: spi_write() "
+                          "failed with status %d (probably FIFO overrun)\n",
+                          status);
+       else
+               pr_info("Simple test 6 step 1: SUCCESS!\n");
+       status = spi_read(spi, &bigrxbuf_virtual[0], DMA_TEST_SIZE);
+       if (status < 0)
+               pr_warning("Simple test 6 step 2: FAILURE: spi_read() "
+                          "failed with status %d (probably FIFO overrun)\n",
+                          status);
+       else
+               pr_info("Simple test 6: SUCCESS!\n");
+
+
+       /*
+        * Force chip to 16 bit mode
+        * WARNING: NEVER DO THIS IN REAL DRIVER CODE, THIS SHOULD BE STATIC!
+        */
+       chip_info->data_size = SSP_DATA_BITS_16;
+       /* You should NOT DO THIS EITHER */
+       spi->master->setup(spi);
+
+       pr_info("Simple test 7: write 0xAA byte, read back garbage byte "
+               "in 16bit bus mode\n");
+       status = spi_w8r8(spi, 0xAA);
+       if (status == -EIO)
+               pr_info("Simple test 7: SUCCESS! (expected failure with "
+                       "status EIO)\n");
+       else if (status < 0)
+               pr_warning("Siple test 7: FAILURE: spi_write_then_read "
+                          "failed with status %d\n", status);
+       else
+               pr_warning("Siple test 7: FAILURE: spi_write_then_read "
+                          "succeeded but it was expected to fail!\n");
+
+       pr_info("Simple test 8: write 8 bytes, read back 8 bytes garbage "
+               "in 16bit mode (full FIFO)\n");
+       status = spi_write_then_read(spi, &txbuf[0], 8, &rxbuf[0], 8);
+       if (status < 0)
+               pr_warning("Simple test 8: FAILURE: spi_write_then_read() "
+                          "failed with status %d\n", status);
+       else
+               pr_info("Simple test 8: SUCCESS!\n");
+
+       pr_info("Simple test 9: write 14 bytes, read back 14 bytes garbage "
+               "in 16bit mode (see if we overflow FIFO)\n");
+       status = spi_write_then_read(spi, &txbuf[0], 14, &rxbuf[0], 14);
+       if (status < 0)
+               pr_warning("Simple test 9: FAILURE: failed with status %d "
+                          "(probably FIFO overrun)\n", status);
+       else
+               pr_info("Simple test 9: SUCCESS!\n");
+
+       pr_info("Simple test 10: write %d bytes with spi_write(), "
+              "read %d bytes garbage with spi_read() in 16bit mode\n",
+              DMA_TEST_SIZE, DMA_TEST_SIZE);
+       status = spi_write(spi, &bigtxbuf_virtual[0], DMA_TEST_SIZE);
+       if (status < 0)
+               pr_warning("Simple test 10 step 1: FAILURE: spi_write() "
+                          "failed with status %d (probably FIFO overrun)\n",
+                          status);
+       else
+               pr_info("Simple test 10 step 1: SUCCESS!\n");
+
+       status = spi_read(spi, &bigrxbuf_virtual[0], DMA_TEST_SIZE);
+       if (status < 0)
+               pr_warning("Simple test 10 step 2: FAILURE: spi_read() "
+                          "failed with status %d (probably FIFO overrun)\n",
+                          status);
+       else
+               pr_info("Simple test 10: SUCCESS!\n");
+
+       status = sprintf(buf, "loop test complete\n");
+       kfree(bigrxbuf_virtual);
+       kfree(bigtxbuf_virtual);
+ out:
+       mutex_unlock(&p_dummy->lock);
+       return status;
+}
+
+static DEVICE_ATTR(looptest, S_IRUGO, dummy_looptest, NULL);
+
+static int __devinit pl022_dummy_probe(struct spi_device *spi)
+{
+       struct dummy *p_dummy;
+       int status;
+
+       dev_info(&spi->dev, "probing dummy SPI device\n");
+
+       p_dummy = kzalloc(sizeof *p_dummy, GFP_KERNEL);
+       if (!p_dummy)
+               return -ENOMEM;
+
+       dev_set_drvdata(&spi->dev, p_dummy);
+       mutex_init(&p_dummy->lock);
+
+       /* sysfs hook */
+       status = device_create_file(&spi->dev, &dev_attr_looptest);
+       if (status) {
+               dev_dbg(&spi->dev, "device_create_file looptest failure.\n");
+               goto out_dev_create_looptest_failed;
+       }
+
+       return 0;
+
+out_dev_create_looptest_failed:
+       dev_set_drvdata(&spi->dev, NULL);
+       kfree(p_dummy);
+       return status;
+}
+
+static int __devexit pl022_dummy_remove(struct spi_device *spi)
+{
+       struct dummy *p_dummy = dev_get_drvdata(&spi->dev);
+
+       dev_info(&spi->dev, "removing dummy SPI device\n");
+       device_remove_file(&spi->dev, &dev_attr_looptest);
+       dev_set_drvdata(&spi->dev, NULL);
+       kfree(p_dummy);
+
+       return 0;
+}
+
+static struct spi_driver pl022_dummy_driver = {
+       .driver = {
+               .name   = "spi-dummy",
+               .owner  = THIS_MODULE,
+       },
+       .probe  = pl022_dummy_probe,
+       .remove = __devexit_p(pl022_dummy_remove),
+};
+
+static int __init pl022_init_dummy(void)
+{
+       return spi_register_driver(&pl022_dummy_driver);
+}
+
+static void __exit pl022_exit_dummy(void)
+{
+       spi_unregister_driver(&pl022_dummy_driver);
+}
+
+module_init(pl022_init_dummy);
+module_exit(pl022_exit_dummy);
+
+MODULE_AUTHOR("Linus Walleij <linus.walleij@stericsson.com>");
+MODULE_DESCRIPTION("PL022 SSP/SPI DUMMY Linux driver");
+MODULE_LICENSE("GPL");
index 308cdb1..63c8f27 100644 (file)
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
 
-/* Need access to SYSCON registers for PADmuxing */
-#include <mach/syscon.h>
-
-#include "padmux.h"
-
 /* Reference to GPIO block clock */
 static struct clk *clk;
 
@@ -606,14 +601,6 @@ static int __init gpio_probe(struct platform_device *pdev)
        writel(U300_GPIO_CR_BLOCK_CLKRQ_ENABLE, virtbase + U300_GPIO_CR);
 #endif
 
-       /* Set up some padmuxing here */
-#ifdef CONFIG_MMC
-       pmx_set_mission_mode_mmc();
-#endif
-#ifdef CONFIG_SPI_PL022
-       pmx_set_mission_mode_spi();
-#endif
-
        gpio_set_initial_values();
 
        for (num_irqs = 0 ; num_irqs < U300_GPIO_NUM_PORTS; num_irqs++) {
diff --git a/arch/arm/mach-u300/i2c.c b/arch/arm/mach-u300/i2c.c
new file mode 100644 (file)
index 0000000..10be1f8
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * arch/arm/mach-u300/i2c.c
+ *
+ * Copyright (C) 2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ *
+ * Register board i2c devices
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+#include <linux/kernel.h>
+#include <linux/i2c.h>
+#include <mach/irqs.h>
+
+static struct i2c_board_info __initdata bus0_i2c_board_info[] = {
+       {
+               .type = "ab3100",
+               .addr = 0x48,
+               .irq = IRQ_U300_IRQ0_EXT,
+       },
+};
+
+static struct i2c_board_info __initdata bus1_i2c_board_info[] = {
+#ifdef CONFIG_MACH_U300_BS335
+       {
+               .type = "fwcam",
+               .addr = 0x10,
+       },
+       {
+               .type = "fwcam",
+               .addr = 0x5d,
+       },
+#else
+       { },
+#endif
+};
+
+void __init u300_i2c_register_board_devices(void)
+{
+       i2c_register_board_info(0, bus0_i2c_board_info,
+                               ARRAY_SIZE(bus0_i2c_board_info));
+       i2c_register_board_info(1, bus1_i2c_board_info,
+                               ARRAY_SIZE(bus1_i2c_board_info));
+}
diff --git a/arch/arm/mach-u300/i2c.h b/arch/arm/mach-u300/i2c.h
new file mode 100644 (file)
index 0000000..485c02e
--- /dev/null
@@ -0,0 +1,23 @@
+/*
+ * arch/arm/mach-u300/i2c.h
+ *
+ * Copyright (C) 2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ *
+ * Register board i2c devices
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+
+#ifndef MACH_U300_I2C_H
+#define MACH_U300_I2C_H
+
+#ifdef CONFIG_I2C_STU300
+void __init u300_i2c_register_board_devices(void);
+#else
+/* Compile out this stuff if no I2C adapter is available */
+static inline void __init u300_i2c_register_board_devices(void)
+{
+}
+#endif
+
+#endif
index bf134bc..ab000df 100644 (file)
            (CONFIG_MACH_U300_ACCESS_MEM_SIZE & 1))*1024*1024 + 0x100)
 #endif
 
+/*
+ * TCM memory whereabouts
+ */
+#define ITCM_OFFSET    0xffff2000
+#define ITCM_END       0xffff3fff
+#define DTCM_OFFSET    0xffff4000
+#define DTCM_END       0xffff5fff
+
 /*
  * We enable a real big DMA buffer if need be.
  */
index 1c90d1b..7444f5c 100644 (file)
 #define U300_SYSCON_PMC1LR_CDI_MASK                            (0xC000)
 #define U300_SYSCON_PMC1LR_CDI_CDI                             (0x0000)
 #define U300_SYSCON_PMC1LR_CDI_EMIF                            (0x4000)
+#ifdef CONFIG_MACH_U300_BS335
+#define U300_SYSCON_PMC1LR_CDI_CDI2                            (0x8000)
+#define U300_SYSCON_PMC1LR_CDI_WCDMA_APP_GPIO                  (0xC000)
+#elif CONFIG_MACH_U300_BS365
 #define U300_SYSCON_PMC1LR_CDI_GPIO                            (0x8000)
 #define U300_SYSCON_PMC1LR_CDI_WCDMA                           (0xC000)
+#endif
 #define U300_SYSCON_PMC1LR_PDI_MASK                            (0x3000)
 #define U300_SYSCON_PMC1LR_PDI_PDI                             (0x0000)
 #define U300_SYSCON_PMC1LR_PDI_EGG                             (0x1000)
 #define U300_SYSCON_MMCR_MASK                                  (0x0003)
 #define U300_SYSCON_MMCR_MMC_FB_CLK_SEL_ENABLE                 (0x0002)
 #define U300_SYSCON_MMCR_MSPRO_FREQSEL_ENABLE                  (0x0001)
-
+/* Pull up/down control (R/W) */
+#define U300_SYSCON_PUCR                                       (0x104)
+#define U300_SYSCON_PUCR_EMIF_1_WAIT_N_PU_ENABLE               (0x0200)
+#define U300_SYSCON_PUCR_EMIF_1_NFIF_READY_PU_ENABLE           (0x0100)
+#define U300_SYSCON_PUCR_EMIF_1_16BIT_PU_ENABLE                        (0x0080)
+#define U300_SYSCON_PUCR_EMIF_1_8BIT_PU_ENABLE                 (0x0040)
+#define U300_SYSCON_PUCR_KEY_IN_PU_EN_MASK                     (0x003F)
+/* Padmux 2 control */
+#define U300_SYSCON_PMC2R                                      (0x100)
+#define U300_SYSCON_PMC2R_APP_MISC_0_MASK                      (0x00C0)
+#define U300_SYSCON_PMC2R_APP_MISC_0_APP_GPIO                  (0x0000)
+#define U300_SYSCON_PMC2R_APP_MISC_0_EMIF_SDRAM                        (0x0040)
+#define U300_SYSCON_PMC2R_APP_MISC_0_MMC                       (0x0080)
+#define U300_SYSCON_PMC2R_APP_MISC_0_CDI2                      (0x00C0)
+#define U300_SYSCON_PMC2R_APP_MISC_1_MASK                      (0x0300)
+#define U300_SYSCON_PMC2R_APP_MISC_1_APP_GPIO                  (0x0000)
+#define U300_SYSCON_PMC2R_APP_MISC_1_EMIF_SDRAM                        (0x0100)
+#define U300_SYSCON_PMC2R_APP_MISC_1_MMC                       (0x0200)
+#define U300_SYSCON_PMC2R_APP_MISC_1_CDI2                      (0x0300)
+#define U300_SYSCON_PMC2R_APP_MISC_2_MASK                      (0x0C00)
+#define U300_SYSCON_PMC2R_APP_MISC_2_APP_GPIO                  (0x0000)
+#define U300_SYSCON_PMC2R_APP_MISC_2_EMIF_SDRAM                        (0x0400)
+#define U300_SYSCON_PMC2R_APP_MISC_2_MMC                       (0x0800)
+#define U300_SYSCON_PMC2R_APP_MISC_2_CDI2                      (0x0C00)
+#define U300_SYSCON_PMC2R_APP_MISC_3_MASK                      (0x3000)
+#define U300_SYSCON_PMC2R_APP_MISC_3_APP_GPIO                  (0x0000)
+#define U300_SYSCON_PMC2R_APP_MISC_3_EMIF_SDRAM                        (0x1000)
+#define U300_SYSCON_PMC2R_APP_MISC_3_MMC                       (0x2000)
+#define U300_SYSCON_PMC2R_APP_MISC_3_CDI2                      (0x3000)
+#define U300_SYSCON_PMC2R_APP_MISC_4_MASK                      (0xC000)
+#define U300_SYSCON_PMC2R_APP_MISC_4_APP_GPIO                  (0x0000)
+#define U300_SYSCON_PMC2R_APP_MISC_4_EMIF_SDRAM                        (0x4000)
+#define U300_SYSCON_PMC2R_APP_MISC_4_MMC                       (0x8000)
+#define U300_SYSCON_PMC2R_APP_MISC_4_ACC_GPIO                  (0xC000)
 /* TODO: More SYSCON registers missing */
 #define U300_SYSCON_PMC3R                                      (0x10c)
 #define U300_SYSCON_PMC3R_APP_MISC_11_MASK                     (0xc000)
 #define U300_SYSCON_PMC3R_APP_MISC_11_SPI                      (0x4000)
 #define U300_SYSCON_PMC3R_APP_MISC_10_MASK                     (0x3000)
 #define U300_SYSCON_PMC3R_APP_MISC_10_SPI                      (0x1000)
-/* TODO: Missing other configs, I just added the SPI stuff */
-
+/* TODO: Missing other configs */
+#define U300_SYSCON_PMC4R                                      (0x168)
+#define U300_SYSCON_PMC4R_APP_MISC_12_MASK                     (0x0003)
+#define U300_SYSCON_PMC4R_APP_MISC_12_APP_GPIO                 (0x0000)
+#define U300_SYSCON_PMC4R_APP_MISC_13_MASK                     (0x000C)
+#define U300_SYSCON_PMC4R_APP_MISC_13_CDI                      (0x0000)
+#define U300_SYSCON_PMC4R_APP_MISC_13_SMIA                     (0x0004)
+#define U300_SYSCON_PMC4R_APP_MISC_13_SMIA2                    (0x0008)
+#define U300_SYSCON_PMC4R_APP_MISC_13_APP_GPIO                 (0x000C)
+#define U300_SYSCON_PMC4R_APP_MISC_14_MASK                     (0x0030)
+#define U300_SYSCON_PMC4R_APP_MISC_14_CDI                      (0x0000)
+#define U300_SYSCON_PMC4R_APP_MISC_14_SMIA                     (0x0010)
+#define U300_SYSCON_PMC4R_APP_MISC_14_CDI2                     (0x0020)
+#define U300_SYSCON_PMC4R_APP_MISC_14_APP_GPIO                 (0x0030)
+#define U300_SYSCON_PMC4R_APP_MISC_16_MASK                     (0x0300)
+#define U300_SYSCON_PMC4R_APP_MISC_16_APP_GPIO_13              (0x0000)
+#define U300_SYSCON_PMC4R_APP_MISC_16_APP_UART1_CTS            (0x0100)
+#define U300_SYSCON_PMC4R_APP_MISC_16_EMIF_1_STATIC_CS5_N      (0x0200)
 /* SYS_0_CLK_CONTROL first clock control 16bit (R/W) */
 #define U300_SYSCON_S0CCR                                      (0x120)
 #define U300_SYSCON_S0CCR_FIELD_MASK                           (0x43FF)
 #define U300_SYSCON_S0CCR_CLOCK_REQ                            (0x4000)
+#define U300_SYSCON_S0CCR_CLOCK_REQ_MONITOR                    (0x2000)
 #define U300_SYSCON_S0CCR_CLOCK_INV                            (0x0200)
 #define U300_SYSCON_S0CCR_CLOCK_FREQ_MASK                      (0x01E0)
 #define U300_SYSCON_S0CCR_CLOCK_SELECT_MASK                    (0x001E)
 #define U300_SYSCON_S1CCR                                      (0x124)
 #define U300_SYSCON_S1CCR_FIELD_MASK                           (0x43FF)
 #define U300_SYSCON_S1CCR_CLOCK_REQ                            (0x4000)
+#define U300_SYSCON_S1CCR_CLOCK_REQ_MONITOR                    (0x2000)
 #define U300_SYSCON_S1CCR_CLOCK_INV                            (0x0200)
 #define U300_SYSCON_S1CCR_CLOCK_FREQ_MASK                      (0x01E0)
 #define U300_SYSCON_S1CCR_CLOCK_SELECT_MASK                    (0x001E)
 #define U300_SYSCON_S2CCR_FIELD_MASK                           (0xC3FF)
 #define U300_SYSCON_S2CCR_CLK_STEAL                            (0x8000)
 #define U300_SYSCON_S2CCR_CLOCK_REQ                            (0x4000)
+#define U300_SYSCON_S2CCR_CLOCK_REQ_MONITOR                    (0x2000)
 #define U300_SYSCON_S2CCR_CLOCK_INV                            (0x0200)
 #define U300_SYSCON_S2CCR_CLOCK_FREQ_MASK                      (0x01E0)
 #define U300_SYSCON_S2CCR_CLOCK_SELECT_MASK                    (0x001E)
 #define U300_SYSCON_MCR_PMGEN_CR_0_EMIF_0_SDRAM                        (0x000C)
 #define U300_SYSCON_MCR_PM1G_MODE_ENABLE                       (0x0002)
 #define U300_SYSCON_MCR_PMTG5_MODE_ENABLE                      (0x0001)
+/* SC_PLL_IRQ_CONTROL 16bit (R/W) */
+#define U300_SYSCON_PICR                                       (0x0130)
+#define U300_SYSCON_PICR_MASK                                  (0x00FF)
+#define U300_SYSCON_PICR_FORCE_PLL208_LOCK_LOW_ENABLE          (0x0080)
+#define U300_SYSCON_PICR_FORCE_PLL208_LOCK_HIGH_ENABLE         (0x0040)
+#define U300_SYSCON_PICR_FORCE_PLL13_LOCK_LOW_ENABLE           (0x0020)
+#define U300_SYSCON_PICR_FORCE_PLL13_LOCK_HIGH_ENABLE          (0x0010)
+#define U300_SYSCON_PICR_IRQMASK_PLL13_UNLOCK_ENABLE           (0x0008)
+#define U300_SYSCON_PICR_IRQMASK_PLL13_LOCK_ENABLE             (0x0004)
+#define U300_SYSCON_PICR_IRQMASK_PLL208_UNLOCK_ENABLE          (0x0002)
+#define U300_SYSCON_PICR_IRQMASK_PLL208_LOCK_ENABLE            (0x0001)
+/* SC_PLL_IRQ_STATUS 16 bit (R/-) */
+#define U300_SYSCON_PISR                                       (0x0134)
+#define U300_SYSCON_PISR_MASK                                  (0x000F)
+#define U300_SYSCON_PISR_PLL13_UNLOCK_IND                      (0x0008)
+#define U300_SYSCON_PISR_PLL13_LOCK_IND                                (0x0004)
+#define U300_SYSCON_PISR_PLL208_UNLOCK_IND                     (0x0002)
+#define U300_SYSCON_PISR_PLL208_LOCK_IND                       (0x0001)
+/* SC_PLL_IRQ_CLEAR 16 bit (-/W) */
+#define U300_SYSCON_PICLR                                      (0x0138)
+#define U300_SYSCON_PICLR_MASK                                 (0x000F)
+#define U300_SYSCON_PICLR_RWMASK                               (0x0000)
+#define U300_SYSCON_PICLR_PLL13_UNLOCK_SC                      (0x0008)
+#define U300_SYSCON_PICLR_PLL13_LOCK_SC                                (0x0004)
+#define U300_SYSCON_PICLR_PLL208_UNLOCK_SC                     (0x0002)
+#define U300_SYSCON_PICLR_PLL208_LOCK_SC                       (0x0001)
+/* CAMIF_CONTROL 16 bit (-/W) */
+#define U300_SYSCON_CICR                                       (0x013C)
+#define U300_SYSCON_CICR_MASK                                  (0x0FFF)
+#define U300_SYSCON_CICR_APP_SUBLVDS_TESTMODE_MASK             (0x0F00)
+#define U300_SYSCON_CICR_APP_SUBLVDS_TESTMODE_PORT1            (0x0C00)
+#define U300_SYSCON_CICR_APP_SUBLVDS_TESTMODE_PORT0            (0x0300)
+#define U300_SYSCON_CICR_APP_SUBLVDS_RESCON_MASK               (0x00F0)
+#define U300_SYSCON_CICR_APP_SUBLVDS_RESCON_PORT1              (0x00C0)
+#define U300_SYSCON_CICR_APP_SUBLVDS_RESCON_PORT0              (0x0030)
+#define U300_SYSCON_CICR_APP_SUBLVDS_PWR_DWN_N_MASK            (0x000F)
+#define U300_SYSCON_CICR_APP_SUBLVDS_PWR_DWN_N_PORT1           (0x000C)
+#define U300_SYSCON_CICR_APP_SUBLVDS_PWR_DWN_N_PORT0           (0x0003)
 /* Clock activity observability register 0 */
 #define U300_SYSCON_C0OAR                                      (0x140)
 #define U300_SYSCON_C0OAR_MASK                                 (0xFFFF)
 /**
  * CPU medium frequency in MHz
  */
-#define SYSCON_CPU_CLOCK_MEDIUM  104
+#define SYSCON_CPU_CLOCK_MEDIUM   52
 /**
  * CPU low frequency in MHz
  */
 /**
  * EMIF medium frequency in MHz
  */
-#define SYSCON_EMIF_CLOCK_MEDIUM 104
+#define SYSCON_EMIF_CLOCK_MEDIUM  52
 /**
  * EMIF low frequency in MHz
  */
 /**
  * AHB medium frequency in MHz
  */
-#define SYSCON_AHB_CLOCK_MEDIUM   52
+#define SYSCON_AHB_CLOCK_MEDIUM   26
 /**
  * AHB low frequency in MHz
  */
@@ -553,6 +648,15 @@ enum syscon_busmaster {
   SYSCON_BM_VIDEO_ENC
 };
 
+/* Selectr a resistor or a set of resistors */
+enum syscon_pull_up_down {
+  SYSCON_PU_KEY_IN_EN,
+  SYSCON_PU_EMIF_1_8_BIT_EN,
+  SYSCON_PU_EMIF_1_16_BIT_EN,
+  SYSCON_PU_EMIF_1_NFIF_READY_EN,
+  SYSCON_PU_EMIF_1_NFIF_WAIT_N_EN,
+};
+
 /*
  * Note that this array must match the order of the array "clk_reg"
  * in syscon.c
@@ -575,6 +679,7 @@ enum syscon_clk {
   SYSCON_CLKCONTROL_SPI,
   SYSCON_CLKCONTROL_I2S0_CORE,
   SYSCON_CLKCONTROL_I2S1_CORE,
+  SYSCON_CLKCONTROL_UART1,
   SYSCON_CLKCONTROL_AAIF,
   SYSCON_CLKCONTROL_AHB,
   SYSCON_CLKCONTROL_APEX,
@@ -604,7 +709,8 @@ enum syscon_sysclk_mode {
 
 enum syscon_sysclk_req {
   SYSCON_SYSCLKREQ_DISABLED,
-  SYSCON_SYSCLKREQ_ACTIVE_LOW
+  SYSCON_SYSCLKREQ_ACTIVE_LOW,
+  SYSCON_SYSCLKREQ_MONITOR
 };
 
 enum syscon_clk_mode {
index 585cc01..7b6b016 100644 (file)
 #include <linux/regulator/consumer.h>
 #include <linux/regulator/machine.h>
 #include <linux/gpio.h>
+#include <linux/amba/mmci.h>
 
-#include <asm/mach/mmc.h>
 #include "mmc.h"
+#include "padmux.h"
 
 struct mmci_card_event {
        struct input_dev *mmc_input;
        int mmc_inserted;
        struct work_struct workq;
-       struct mmc_platform_data mmc0_plat_data;
+       struct mmci_platform_data mmc0_plat_data;
 };
 
 static unsigned int mmc_status(struct device *dev)
@@ -146,6 +147,7 @@ int __devinit mmc_init(struct amba_device *adev)
 {
        struct mmci_card_event *mmci_card;
        struct device *mmcsd_device = &adev->dev;
+       struct pmx *pmx;
        int ret = 0;
 
        mmci_card = kzalloc(sizeof(struct mmci_card_event), GFP_KERNEL);
@@ -158,6 +160,8 @@ int __devinit mmc_init(struct amba_device *adev)
        mmci_card->mmc0_plat_data.status = mmc_status;
        mmci_card->mmc0_plat_data.gpio_wp = -1;
        mmci_card->mmc0_plat_data.gpio_cd = -1;
+       mmci_card->mmc0_plat_data.capabilities = MMC_CAP_MMC_HIGHSPEED |
+               MMC_CAP_SD_HIGHSPEED | MMC_CAP_4_BIT_DATA;
 
        mmcsd_device->platform_data = (void *) &mmci_card->mmc0_plat_data;
 
@@ -207,6 +211,20 @@ int __devinit mmc_init(struct amba_device *adev)
 
        input_set_drvdata(mmci_card->mmc_input, mmci_card);
 
+       /*
+        * Setup padmuxing for MMC. Since this must always be
+        * compiled into the kernel, pmx is never released.
+        */
+       pmx = pmx_get(mmcsd_device, U300_APP_PMX_MMC_SETTING);
+
+       if (IS_ERR(pmx))
+               pr_warning("Could not get padmux handle\n");
+       else {
+               ret = pmx_activate(mmcsd_device, pmx);
+               if (IS_ERR_VALUE(ret))
+                       pr_warning("Could not activate padmuxing\n");
+       }
+
        ret = gpio_register_callback(U300_GPIO_PIN_MMC_CD, mmci_callback,
                                     mmci_card);
 
index f366456..4c93c6c 100644 (file)
  * Copyright (C) 2009 ST-Ericsson AB
  * License terms: GNU General Public License (GPL) version 2
  * U300 PADMUX functions
- * Author: Linus Walleij <linus.walleij@stericsson.com>
- *
+ * Author: Martin Persson <martin.persson@stericsson.com>
  */
-#include <linux/io.h>
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
 #include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/string.h>
+#include <linux/bug.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 #include <mach/u300-regs.h>
 #include <mach/syscon.h>
-
 #include "padmux.h"
 
-/* Set the PAD MUX to route the MMC reader correctly to GPIO0. */
-void pmx_set_mission_mode_mmc(void)
-{
-       u16 val;
-
-       val = readw(U300_SYSCON_VBASE + U300_SYSCON_PMC1LR);
-       val &= ~U300_SYSCON_PMC1LR_MMCSD_MASK;
-       writew(val, U300_SYSCON_VBASE + U300_SYSCON_PMC1LR);
-       val = readw(U300_SYSCON_VBASE + U300_SYSCON_PMC1HR);
-       val &= ~U300_SYSCON_PMC1HR_APP_GPIO_1_MASK;
-       val |= U300_SYSCON_PMC1HR_APP_GPIO_1_MMC;
-       writew(val, U300_SYSCON_VBASE + U300_SYSCON_PMC1HR);
-}
-
-void pmx_set_mission_mode_spi(void)
-{
-       u16 val;
-
-       /* Set up padmuxing so the SPI port and its chipselects are active */
-       val = readw(U300_SYSCON_VBASE + U300_SYSCON_PMC1HR);
-       /*
-        * Activate the SPI port (disable the use of these pins for generic
-        * GPIO, DSP, AAIF
-        */
-       val &= ~U300_SYSCON_PMC1HR_APP_SPI_2_MASK;
-       val |= U300_SYSCON_PMC1HR_APP_SPI_2_SPI;
-       /*
-        * Use GPIO pin SPI CS1 for CS1 actually (it can be used for other
-        * things also)
-        */
-       val &= ~U300_SYSCON_PMC1HR_APP_SPI_CS_1_MASK;
-       val |= U300_SYSCON_PMC1HR_APP_SPI_CS_1_SPI;
-       /*
-        * Use GPIO pin SPI CS2 for CS2 actually (it can be used for other
-        * things also)
-        */
-       val &= ~U300_SYSCON_PMC1HR_APP_SPI_CS_2_MASK;
-       val |= U300_SYSCON_PMC1HR_APP_SPI_CS_2_SPI;
-       writew(val, U300_SYSCON_VBASE + U300_SYSCON_PMC1HR);
+static DEFINE_MUTEX(pmx_mutex);
+
+const u32 pmx_registers[] = {
+       (U300_SYSCON_VBASE + U300_SYSCON_PMC1LR),
+       (U300_SYSCON_VBASE + U300_SYSCON_PMC1HR),
+       (U300_SYSCON_VBASE + U300_SYSCON_PMC2R),
+       (U300_SYSCON_VBASE + U300_SYSCON_PMC3R),
+       (U300_SYSCON_VBASE + U300_SYSCON_PMC4R)
+};
+
+/* High level functionality */
+
+/* Lazy dog:
+ * onmask = {
+ *   {"PMC1LR" mask, "PMC1LR" value},
+ *   {"PMC1HR" mask, "PMC1HR" value},
+ *   {"PMC2R"  mask, "PMC2R"  value},
+ *   {"PMC3R"  mask, "PMC3R"  value},
+ *   {"PMC4R"  mask, "PMC4R"  value}
+ * }
+ */
+static struct pmx mmc_setting = {
+       .setting = U300_APP_PMX_MMC_SETTING,
+       .default_on = false,
+       .activated = false,
+       .name = "MMC",
+       .onmask = {
+                  {U300_SYSCON_PMC1LR_MMCSD_MASK,
+                   U300_SYSCON_PMC1LR_MMCSD_MMCSD},
+                  {0, 0},
+                  {0, 0},
+                  {0, 0},
+                  {U300_SYSCON_PMC4R_APP_MISC_12_MASK,
+                   U300_SYSCON_PMC4R_APP_MISC_12_APP_GPIO}
+                  },
+};
+
+static struct pmx spi_setting = {
+       .setting = U300_APP_PMX_SPI_SETTING,
+       .default_on = false,
+       .activated = false,
+       .name = "SPI",
+       .onmask = {{0, 0},
+                  {U300_SYSCON_PMC1HR_APP_SPI_2_MASK |
+                   U300_SYSCON_PMC1HR_APP_SPI_CS_1_MASK |
+                   U300_SYSCON_PMC1HR_APP_SPI_CS_2_MASK,
+                   U300_SYSCON_PMC1HR_APP_SPI_2_SPI |
+                   U300_SYSCON_PMC1HR_APP_SPI_CS_1_SPI |
+                   U300_SYSCON_PMC1HR_APP_SPI_CS_2_SPI},
+                  {0, 0},
+                  {0, 0},
+                  {0, 0}
+                  },
+};
+
+/* Available padmux settings */
+static struct pmx *pmx_settings[] = {
+       &mmc_setting,
+       &spi_setting,
+};
+
+static void update_registers(struct pmx *pmx, bool activate)
+{
+       u16 regval, val, mask;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(pmx_registers); i++) {
+               if (activate)
+                       val = pmx->onmask[i].val;
+               else
+                       val = 0;
+
+               mask = pmx->onmask[i].mask;
+               if (mask != 0) {
+                       regval = readw(pmx_registers[i]);
+                       regval &= ~mask;
+                       regval |= val;
+                       writew(regval, pmx_registers[i]);
+               }
+       }
+}
+
+struct pmx *pmx_get(struct device *dev, enum pmx_settings setting)
+{
+       int i;
+       struct pmx *pmx = ERR_PTR(-ENOENT);
+
+       if (dev == NULL)
+               return ERR_PTR(-EINVAL);
+
+       mutex_lock(&pmx_mutex);
+       for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) {
+
+               if (setting == pmx_settings[i]->setting) {
+
+                       if (pmx_settings[i]->dev != NULL) {
+                               WARN(1, "padmux: required setting "
+                                    "in use by another consumer\n");
+                       } else {
+                               pmx = pmx_settings[i];
+                               pmx->dev = dev;
+                               dev_dbg(dev, "padmux: setting nr %d is now "
+                                       "bound to %s and ready to use\n",
+                                       setting, dev_name(dev));
+                               break;
+                       }
+               }
+       }
+       mutex_unlock(&pmx_mutex);
+
+       return pmx;
+}
+EXPORT_SYMBOL(pmx_get);
+
+int pmx_put(struct device *dev, struct pmx *pmx)
+{
+       int i;
+       int ret = -ENOENT;
+
+       if (pmx == NULL || dev == NULL)
+               return -EINVAL;
+
+       mutex_lock(&pmx_mutex);
+       for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) {
+
+               if (pmx->setting == pmx_settings[i]->setting) {
+
+                       if (dev != pmx->dev) {
+                               WARN(1, "padmux: cannot release handle as "
+                                       "it is bound to another consumer\n");
+                               ret = -EINVAL;
+                               break;
+                       } else {
+                               pmx_settings[i]->dev = NULL;
+                               ret = 0;
+                               break;
+                       }
+               }
+       }
+       mutex_unlock(&pmx_mutex);
+
+       return ret;
+}
+EXPORT_SYMBOL(pmx_put);
+
+int pmx_activate(struct device *dev, struct pmx *pmx)
+{
+       int i, j, ret;
+       ret = 0;
+
+       if (pmx == NULL || dev == NULL)
+               return -EINVAL;
+
+       mutex_lock(&pmx_mutex);
+
+       /* Make sure the required bits are not used */
+       for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) {
+
+               if (pmx_settings[i]->dev == NULL || pmx_settings[i] == pmx)
+                       continue;
+
+               for (j = 0; j < ARRAY_SIZE(pmx_registers); j++) {
+
+                       if (pmx_settings[i]->onmask[j].mask & pmx->
+                               onmask[j].mask) {
+                               /* More than one entry on the same bits */
+                               WARN(1, "padmux: cannot activate "
+                                       "setting. Bit conflict with "
+                                       "an active setting\n");
+
+                               ret = -EUSERS;
+                               goto exit;
+                       }
+               }
+       }
+       update_registers(pmx, true);
+       pmx->activated = true;
+       dev_dbg(dev, "padmux: setting nr %d is activated\n",
+               pmx->setting);
+
+exit:
+       mutex_unlock(&pmx_mutex);
+       return ret;
+}
+EXPORT_SYMBOL(pmx_activate);
+
+int pmx_deactivate(struct device *dev, struct pmx *pmx)
+{
+       int i;
+       int ret = -ENOENT;
+
+       if (pmx == NULL || dev == NULL)
+               return -EINVAL;
+
+       mutex_lock(&pmx_mutex);
+       for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) {
+
+               if (pmx_settings[i]->dev == NULL)
+                       continue;
+
+               if (pmx->setting == pmx_settings[i]->setting) {
+
+                       if (dev != pmx->dev) {
+                               WARN(1, "padmux: cannot deactivate "
+                                    "pmx setting as it was activated "
+                                    "by another consumer\n");
+
+                               ret = -EBUSY;
+                               continue;
+                       } else {
+                               update_registers(pmx, false);
+                               pmx_settings[i]->dev = NULL;
+                               pmx->activated = false;
+                               ret = 0;
+                               dev_dbg(dev, "padmux: setting nr %d is deactivated",
+                                       pmx->setting);
+                               break;
+                       }
+               }
+       }
+       mutex_unlock(&pmx_mutex);
+
+       return ret;
+}
+EXPORT_SYMBOL(pmx_deactivate);
+
+/*
+ * For internal use only. If it is to be exported,
+ * it should be reentrant. Notice that pmx_activate
+ * (i.e. runtime settings) always override default settings.
+ */
+static int pmx_set_default(void)
+{
+       /* Used to identify several entries on the same bits */
+       u16 modbits[ARRAY_SIZE(pmx_registers)];
+
+       int i, j;
+
+       memset(modbits, 0, ARRAY_SIZE(pmx_registers) * sizeof(u16));
+
+       for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) {
+
+               if (!pmx_settings[i]->default_on)
+                       continue;
+
+               for (j = 0; j < ARRAY_SIZE(pmx_registers); j++) {
+
+                       /* Make sure there is only one entry on the same bits */
+                       if (modbits[j] & pmx_settings[i]->onmask[j].mask) {
+                               BUG();
+                               return -EUSERS;
+                       }
+                       modbits[j] |= pmx_settings[i]->onmask[j].mask;
+               }
+               update_registers(pmx_settings[i], true);
+       }
+       return 0;
 }
+
+#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_U300_DEBUG))
+static int pmx_show(struct seq_file *s, void *data)
+{
+       int i;
+       seq_printf(s, "-------------------------------------------------\n");
+       seq_printf(s, "SETTING     BOUND TO DEVICE               STATE\n");
+       seq_printf(s, "-------------------------------------------------\n");
+       mutex_lock(&pmx_mutex);
+       for (i = 0; i < ARRAY_SIZE(pmx_settings); i++) {
+               /* Format pmx and device name nicely */
+               char cdp[33];
+               int chars;
+
+               chars = snprintf(&cdp[0], 17, "%s", pmx_settings[i]->name);
+               while (chars < 16) {
+                       cdp[chars] = ' ';
+                       chars++;
+               }
+               chars = snprintf(&cdp[16], 17, "%s", pmx_settings[i]->dev ?
+                               dev_name(pmx_settings[i]->dev) : "N/A");
+               while (chars < 16) {
+                       cdp[chars+16] = ' ';
+                       chars++;
+               }
+               cdp[32] = '\0';
+
+               seq_printf(s,
+                       "%s\t%s\n",
+                       &cdp[0],
+                       pmx_settings[i]->activated ?
+                       "ACTIVATED" : "DEACTIVATED"
+                       );
+
+       }
+       mutex_unlock(&pmx_mutex);
+       return 0;
+}
+
+static int pmx_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, pmx_show, NULL);
+}
+
+static const struct file_operations pmx_operations = {
+       .owner          = THIS_MODULE,
+       .open           = pmx_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+static int __init init_pmx_read_debugfs(void)
+{
+       /* Expose a simple debugfs interface to view pmx settings */
+       (void) debugfs_create_file("padmux", S_IFREG | S_IRUGO,
+                                  NULL, NULL,
+                                  &pmx_operations);
+       return 0;
+}
+
+/*
+ * This needs to come in after the core_initcall(),
+ * because debugfs is not available until
+ * the subsystems come up.
+ */
+module_init(init_pmx_read_debugfs);
+#endif
+
+static int __init pmx_init(void)
+{
+       int ret;
+
+       ret = pmx_set_default();
+
+       if (IS_ERR_VALUE(ret))
+               pr_crit("padmux: default settings could not be set\n");
+
+       return 0;
+}
+
+/* Should be initialized before consumers */
+core_initcall(pmx_init);
index 8c2099a..6e8b860 100644 (file)
@@ -6,14 +6,34 @@
  * Copyright (C) 2009 ST-Ericsson AB
  * License terms: GNU General Public License (GPL) version 2
  * U300 PADMUX API
- * Author: Linus Walleij <linus.walleij@stericsson.com>
- *
+ * Author: Martin Persson <martin.persson@stericsson.com>
  */
 
 #ifndef __MACH_U300_PADMUX_H
 #define __MACH_U300_PADMUX_H
 
-void pmx_set_mission_mode_mmc(void);
-void pmx_set_mission_mode_spi(void);
+enum pmx_settings {
+       U300_APP_PMX_MMC_SETTING,
+       U300_APP_PMX_SPI_SETTING
+};
+
+struct pmx_onmask {
+       u16 mask;               /* Mask bits */
+       u16 val;                /* Value when active */
+};
+
+struct pmx {
+       struct device *dev;
+       enum pmx_settings setting;
+       char *name;
+       bool activated;
+       bool default_on;
+       struct pmx_onmask onmask[];
+};
+
+struct pmx *pmx_get(struct device *dev, enum pmx_settings setting);
+int pmx_put(struct device *dev, struct pmx *pmx);
+int pmx_activate(struct device *dev, struct pmx *pmx);
+int pmx_deactivate(struct device *dev, struct pmx *pmx);
 
 #endif
diff --git a/arch/arm/mach-u300/spi.c b/arch/arm/mach-u300/spi.c
new file mode 100644 (file)
index 0000000..f0e887b
--- /dev/null
@@ -0,0 +1,124 @@
+/*
+ * arch/arm/mach-u300/spi.c
+ *
+ * Copyright (C) 2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ *
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+#include <linux/device.h>
+#include <linux/amba/bus.h>
+#include <linux/spi/spi.h>
+#include <linux/amba/pl022.h>
+#include <linux/err.h>
+#include "padmux.h"
+
+/*
+ * The following is for the actual devices on the SSP/SPI bus
+ */
+#ifdef CONFIG_MACH_U300_SPIDUMMY
+static void select_dummy_chip(u32 chipselect)
+{
+       pr_debug("CORE: %s called with CS=0x%x (%s)\n",
+                __func__,
+                chipselect,
+                chipselect ? "unselect chip" : "select chip");
+       /*
+        * Here you would write the chip select value to the GPIO pins if
+        * this was a real chip (but this is a loopback dummy).
+        */
+}
+
+struct pl022_config_chip dummy_chip_info = {
+       /* Nominally this is LOOPBACK_DISABLED, but this is our dummy chip! */
+       .lbm = LOOPBACK_ENABLED,
+       /*
+        * available POLLING_TRANSFER and INTERRUPT_TRANSFER,
+        * DMA_TRANSFER does not work
+        */
+       .com_mode = INTERRUPT_TRANSFER,
+       .iface = SSP_INTERFACE_MOTOROLA_SPI,
+       /* We can only act as master but SSP_SLAVE is possible in theory */
+       .hierarchy = SSP_MASTER,
+       /* 0 = drive TX even as slave, 1 = do not drive TX as slave */
+       .slave_tx_disable = 0,
+       /* LSB first */
+       .endian_tx = SSP_TX_LSB,
+       .endian_rx = SSP_RX_LSB,
+       .data_size = SSP_DATA_BITS_8, /* used to be 12 in some default */
+       .rx_lev_trig = SSP_RX_1_OR_MORE_ELEM,
+       .tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC,
+       .clk_phase = SSP_CLK_SECOND_EDGE,
+       .clk_pol = SSP_CLK_POL_IDLE_LOW,
+       .ctrl_len = SSP_BITS_12,
+       .wait_state = SSP_MWIRE_WAIT_ZERO,
+       .duplex = SSP_MICROWIRE_CHANNEL_FULL_DUPLEX,
+       /*
+        * This is where you insert a call to a function to enable CS
+        * (usually GPIO) for a certain chip.
+        */
+       .cs_control = select_dummy_chip,
+};
+#endif
+
+static struct spi_board_info u300_spi_devices[] = {
+#ifdef CONFIG_MACH_U300_SPIDUMMY
+       {
+               /* A dummy chip used for loopback tests */
+               .modalias       = "spi-dummy",
+               /* Really dummy, pass in additional chip config here */
+               .platform_data  = NULL,
+               /* This defines how the controller shall handle the device */
+               .controller_data = &dummy_chip_info,
+               /* .irq - no external IRQ routed from this device */
+               .max_speed_hz   = 1000000,
+               .bus_num        = 0, /* Only one bus on this chip */
+               .chip_select    = 0,
+               /* Means SPI_CS_HIGH, change if e.g low CS */
+               .mode           = 0,
+       },
+#endif
+};
+
+static struct pl022_ssp_controller ssp_platform_data = {
+       /* If you have several SPI buses this varies, we have only bus 0 */
+       .bus_id = 0,
+       /* Set this to 1 when we think we got DMA working */
+       .enable_dma = 0,
+       /*
+        * On the APP CPU GPIO 4, 5 and 6 are connected as generic
+        * chip selects for SPI. (Same on U330, U335 and U365.)
+        * TODO: make sure the GPIO driver can select these properly
+        * and do padmuxing accordingly too.
+        */
+       .num_chipselect = 3,
+};
+
+
+void __init u300_spi_init(struct amba_device *adev)
+{
+       struct pmx *pmx;
+
+       adev->dev.platform_data = &ssp_platform_data;
+       /*
+        * Setup padmuxing for SPI. Since this must always be
+        * compiled into the kernel, pmx is never released.
+        */
+       pmx = pmx_get(&adev->dev, U300_APP_PMX_SPI_SETTING);
+
+       if (IS_ERR(pmx))
+               dev_warn(&adev->dev, "Could not get padmux handle\n");
+       else {
+               int ret;
+
+               ret = pmx_activate(&adev->dev, pmx);
+               if (IS_ERR_VALUE(ret))
+                       dev_warn(&adev->dev, "Could not activate padmuxing\n");
+       }
+
+}
+void __init u300_spi_register_board_devices(void)
+{
+       /* Register any SPI devices */
+       spi_register_board_info(u300_spi_devices, ARRAY_SIZE(u300_spi_devices));
+}
diff --git a/arch/arm/mach-u300/spi.h b/arch/arm/mach-u300/spi.h
new file mode 100644 (file)
index 0000000..bd3d867
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+ * arch/arm/mach-u300/spi.h
+ *
+ * Copyright (C) 2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ *
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+#ifndef SPI_H
+#define SPI_H
+#include <linux/amba/bus.h>
+
+#ifdef CONFIG_SPI_PL022
+void __init u300_spi_init(struct amba_device *adev);
+void __init u300_spi_register_board_devices(void);
+#else
+/* Compile out SPI support if PL022 is not selected */
+static inline void __init u300_spi_init(struct amba_device *adev)
+{
+}
+static inline void __init u300_spi_register_board_devices(void)
+{
+}
+#endif
+
+#endif
index cce5320..26d26f5 100644 (file)
@@ -346,6 +346,21 @@ static struct clocksource clocksource_u300_1mhz = {
        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
+/*
+ * Override the global weak sched_clock symbol with this
+ * local implementation which uses the clocksource to get some
+ * better resolution when scheduling the kernel. We accept that
+ * this wraps around for now, since it is just a relative time
+ * stamp. (Inspired by OMAP implementation.)
+ */
+unsigned long long notrace sched_clock(void)
+{
+       return clocksource_cyc2ns(clocksource_u300_1mhz.read(
+                                 &clocksource_u300_1mhz),
+                                 clocksource_u300_1mhz.mult,
+                                 clocksource_u300_1mhz.shift);
+}
+
 
 /*
  * This sets up the system timers, clock source and clock event.
index 975eae4..e13be7c 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/amba/bus.h>
 #include <linux/amba/clcd.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
 #include <linux/cnt32_to_63.h>
@@ -47,7 +48,6 @@
 #include <asm/mach/irq.h>
 #include <asm/mach/time.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 
 #include "core.h"
 #include "clock.h"
@@ -369,7 +369,7 @@ unsigned int mmc_status(struct device *dev)
        return readl(VERSATILE_SYSMCI) & mask;
 }
 
-static struct mmc_platform_data mmc0_plat_data = {
+static struct mmci_platform_data mmc0_plat_data = {
        .ocr_mask       = MMC_VDD_32_33|MMC_VDD_33_34,
        .status         = mmc_status,
        .gpio_wp        = -1,
index 9af8d81..239cd30 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/sysdev.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <mach/hardware.h>
@@ -31,7 +32,6 @@
 #include <asm/mach-types.h>
 
 #include <asm/mach/arch.h>
-#include <asm/mach/mmc.h>
 
 #include "core.h"
 
@@ -41,7 +41,7 @@
 #define IRQ_MMCI1A     IRQ_SIC_MMCI1A
 #endif
 
-static struct mmc_platform_data mmc1_plat_data = {
+static struct mmci_platform_data mmc1_plat_data = {
        .ocr_mask       = MMC_VDD_32_33|MMC_VDD_33_34,
        .status         = mmc_status,
        .gpio_wp        = -1,
index 5fe595a..8d43e58 100644 (file)
@@ -771,3 +771,8 @@ config CACHE_XSC3L2
        select OUTER_CACHE
        help
          This option enables the L2 cache on XScale3.
+
+config ARM_L1_CACHE_SHIFT
+       int
+       default 6 if ARCH_OMAP3
+       default 5
index cc8829d..379f785 100644 (file)
 
 #include "fault.h"
 
+/*
+ * Fault status register encodings.  We steal bit 31 for our own purposes.
+ */
+#define FSR_LNX_PF             (1 << 31)
+#define FSR_WRITE              (1 << 11)
+#define FSR_FS4                        (1 << 10)
+#define FSR_FS3_0              (15)
+
+static inline int fsr_fs(unsigned int fsr)
+{
+       return (fsr & FSR_FS3_0) | (fsr & FSR_FS4) >> 6;
+}
+
 #ifdef CONFIG_MMU
 
 #ifdef CONFIG_KPROBES
@@ -182,18 +195,35 @@ void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 #define VM_FAULT_BADMAP                0x010000
 #define VM_FAULT_BADACCESS     0x020000
 
-static int
+/*
+ * Check that the permissions on the VMA allow for the fault which occurred.
+ * If we encountered a write fault, we must have write permission, otherwise
+ * we allow any permission.
+ */
+static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma)
+{
+       unsigned int mask = VM_READ | VM_WRITE | VM_EXEC;
+
+       if (fsr & FSR_WRITE)
+               mask = VM_WRITE;
+       if (fsr & FSR_LNX_PF)
+               mask = VM_EXEC;
+
+       return vma->vm_flags & mask ? false : true;
+}
+
+static int __kprobes
 __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
                struct task_struct *tsk)
 {
        struct vm_area_struct *vma;
-       int fault, mask;
+       int fault;
 
        vma = find_vma(mm, addr);
        fault = VM_FAULT_BADMAP;
-       if (!vma)
+       if (unlikely(!vma))
                goto out;
-       if (vma->vm_start > addr)
+       if (unlikely(vma->vm_start > addr))
                goto check_stack;
 
        /*
@@ -201,47 +231,24 @@ __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
         * memory access, so we can handle it.
         */
 good_area:
-       if (fsr & (1 << 11)) /* write? */
-               mask = VM_WRITE;
-       else
-               mask = VM_READ|VM_EXEC|VM_WRITE;
-
-       fault = VM_FAULT_BADACCESS;
-       if (!(vma->vm_flags & mask))
+       if (access_error(fsr, vma)) {
+               fault = VM_FAULT_BADACCESS;
                goto out;
+       }
 
        /*
-        * If for any reason at all we couldn't handle
-        * the fault, make sure we exit gracefully rather
-        * than endlessly redo the fault.
+        * If for any reason at all we couldn't handle the fault, make
+        * sure we exit gracefully rather than endlessly redo the fault.
         */
-survive:
-       fault = handle_mm_fault(mm, vma, addr & PAGE_MASK, (fsr & (1 << 11)) ? FAULT_FLAG_WRITE : 0);
-       if (unlikely(fault & VM_FAULT_ERROR)) {
-               if (fault & VM_FAULT_OOM)
-                       goto out_of_memory;
-               else if (fault & VM_FAULT_SIGBUS)
-                       return fault;
-               BUG();
-       }
+       fault = handle_mm_fault(mm, vma, addr & PAGE_MASK, (fsr & FSR_WRITE) ? FAULT_FLAG_WRITE : 0);
+       if (unlikely(fault & VM_FAULT_ERROR))
+               return fault;
        if (fault & VM_FAULT_MAJOR)
                tsk->maj_flt++;
        else
                tsk->min_flt++;
        return fault;
 
-out_of_memory:
-       if (!is_global_init(tsk))
-               goto out;
-
-       /*
-        * If we are out of memory for pid1, sleep for a while and retry
-        */
-       up_read(&mm->mmap_sem);
-       yield();
-       down_read(&mm->mmap_sem);
-       goto survive;
-
 check_stack:
        if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
                goto good_area;
@@ -278,6 +285,13 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
                if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc))
                        goto no_context;
                down_read(&mm->mmap_sem);
+       } else {
+               /*
+                * The above down_read_trylock() might have succeeded in
+                * which case, we'll have missed the might_sleep() from
+                * down_read()
+                */
+               might_sleep();
        }
 
        fault = __do_page_fault(mm, addr, fsr, tsk);
@@ -289,6 +303,16 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
        if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
                return 0;
 
+       if (fault & VM_FAULT_OOM) {
+               /*
+                * We ran out of memory, call the OOM killer, and return to
+                * userspace (which will retry the fault, or kill us if we
+                * got oom-killed)
+                */
+               pagefault_out_of_memory();
+               return 0;
+       }
+
        /*
         * If we are in kernel mode at this point, we
         * have no context to handle this fault with.
@@ -296,16 +320,6 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
        if (!user_mode(regs))
                goto no_context;
 
-       if (fault & VM_FAULT_OOM) {
-               /*
-                * We ran out of memory, or some other thing
-                * happened to us that made us unable to handle
-                * the page fault gracefully.
-                */
-               printk("VM: killing process %s\n", tsk->comm);
-               do_group_exit(SIGKILL);
-               return 0;
-       }
        if (fault & VM_FAULT_SIGBUS) {
                /*
                 * We had some memory, but were unable to
@@ -489,10 +503,10 @@ hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *)
 asmlinkage void __exception
 do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 {
-       const struct fsr_info *inf = fsr_info + (fsr & 15) + ((fsr & (1 << 10)) >> 6);
+       const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
        struct siginfo info;
 
-       if (!inf->fn(addr, fsr, regs))
+       if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
                return;
 
        printk(KERN_ALERT "Unhandled fault: %s (0x%03x) at 0x%08lx\n",
@@ -508,6 +522,6 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 asmlinkage void __exception
 do_PrefetchAbort(unsigned long addr, struct pt_regs *regs)
 {
-       do_translation_fault(addr, 0, regs);
+       do_translation_fault(addr, FSR_LNX_PF, regs);
 }
 
index f982606..877c492 100644 (file)
@@ -613,6 +613,14 @@ void __init mem_init(void)
 
 void free_initmem(void)
 {
+#ifdef CONFIG_HAVE_TCM
+       extern char *__tcm_start, *__tcm_end;
+
+       totalram_pages += free_area(__phys_to_pfn(__pa(__tcm_start)),
+                                   __phys_to_pfn(__pa(__tcm_end)),
+                                   "TCM link");
+#endif
+
        if (!machine_is_integrator() && !machine_is_cintegrator())
                totalram_pages += free_area(__phys_to_pfn(__pa(__init_begin)),
                                            __phys_to_pfn(__pa(__init_end)),
index 3c127aa..1ff6a37 100644 (file)
@@ -179,7 +179,6 @@ static int __init iop3xx_adma_cap_init(void)
        dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
        #else
        dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask);
-       dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_0_data.cap_mask);
        dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
        #endif
 
@@ -188,7 +187,6 @@ static int __init iop3xx_adma_cap_init(void)
        dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
        #else
        dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask);
-       dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_1_data.cap_mask);
        dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
        #endif
 
@@ -198,7 +196,7 @@ static int __init iop3xx_adma_cap_init(void)
        dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
        #else
        dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask);
-       dma_cap_set(DMA_ZERO_SUM, iop3xx_aau_data.cap_mask);
+       dma_cap_set(DMA_XOR_VAL, iop3xx_aau_data.cap_mask);
        dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask);
        dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
        #endif
index 70aeee4..2975798 100644 (file)
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/errno.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/system.h>
 #include <asm/irq.h>
+#include <asm/memory.h>
 #include <mach/hardware.h>
 #include <mach/dma.h>
 
+#define DMA_DEBUG_NAME         "pxa_dma"
+#define DMA_MAX_REQUESTERS     64
+
 struct dma_channel {
        char *name;
        pxa_dma_prio prio;
        void (*irq_handler)(int, void *);
        void *data;
+       spinlock_t lock;
 };
 
 static struct dma_channel *dma_channels;
 static int num_dma_channels;
 
+/*
+ * Debug fs
+ */
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/seq_file.h>
+
+static struct dentry *dbgfs_root, *dbgfs_state, **dbgfs_chan;
+
+static int dbg_show_requester_chan(struct seq_file *s, void *p)
+{
+       int pos = 0;
+       int chan = (int)s->private;
+       int i;
+       u32 drcmr;
+
+       pos += seq_printf(s, "DMA channel %d requesters list :\n", chan);
+       for (i = 0; i < DMA_MAX_REQUESTERS; i++) {
+               drcmr = DRCMR(i);
+               if ((drcmr & DRCMR_CHLNUM) == chan)
+                       pos += seq_printf(s, "\tRequester %d (MAPVLD=%d)\n", i,
+                                         !!(drcmr & DRCMR_MAPVLD));
+       }
+       return pos;
+}
+
+static inline int dbg_burst_from_dcmd(u32 dcmd)
+{
+       int burst = (dcmd >> 16) & 0x3;
+
+       return burst ? 4 << burst : 0;
+}
+
+static int is_phys_valid(unsigned long addr)
+{
+       return pfn_valid(__phys_to_pfn(addr));
+}
+
+#define DCSR_STR(flag) (dcsr & DCSR_##flag ? #flag" " : "")
+#define DCMD_STR(flag) (dcmd & DCMD_##flag ? #flag" " : "")
+
+static int dbg_show_descriptors(struct seq_file *s, void *p)
+{
+       int pos = 0;
+       int chan = (int)s->private;
+       int i, max_show = 20, burst, width;
+       u32 dcmd;
+       unsigned long phys_desc;
+       struct pxa_dma_desc *desc;
+       unsigned long flags;
+
+       spin_lock_irqsave(&dma_channels[chan].lock, flags);
+       phys_desc = DDADR(chan);
+
+       pos += seq_printf(s, "DMA channel %d descriptors :\n", chan);
+       pos += seq_printf(s, "[%03d] First descriptor unknown\n", 0);
+       for (i = 1; i < max_show && is_phys_valid(phys_desc); i++) {
+               desc = phys_to_virt(phys_desc);
+               dcmd = desc->dcmd;
+               burst = dbg_burst_from_dcmd(dcmd);
+               width = (1 << ((dcmd >> 14) & 0x3)) >> 1;
+
+               pos += seq_printf(s, "[%03d] Desc at %08lx(virt %p)\n",
+                                 i, phys_desc, desc);
+               pos += seq_printf(s, "\tDDADR = %08x\n", desc->ddadr);
+               pos += seq_printf(s, "\tDSADR = %08x\n", desc->dsadr);
+               pos += seq_printf(s, "\tDTADR = %08x\n", desc->dtadr);
+               pos += seq_printf(s, "\tDCMD  = %08x (%s%s%s%s%s%s%sburst=%d"
+                                 " width=%d len=%d)\n",
+                                 dcmd,
+                                 DCMD_STR(INCSRCADDR), DCMD_STR(INCTRGADDR),
+                                 DCMD_STR(FLOWSRC), DCMD_STR(FLOWTRG),
+                                 DCMD_STR(STARTIRQEN), DCMD_STR(ENDIRQEN),
+                                 DCMD_STR(ENDIAN), burst, width,
+                                 dcmd & DCMD_LENGTH);
+               phys_desc = desc->ddadr;
+       }
+       if (i == max_show)
+               pos += seq_printf(s, "[%03d] Desc at %08lx ... max display reached\n",
+                                 i, phys_desc);
+       else
+               pos += seq_printf(s, "[%03d] Desc at %08lx is %s\n",
+                                 i, phys_desc, phys_desc == DDADR_STOP ?
+                                 "DDADR_STOP" : "invalid");
+
+       spin_unlock_irqrestore(&dma_channels[chan].lock, flags);
+       return pos;
+}
+
+static int dbg_show_chan_state(struct seq_file *s, void *p)
+{
+       int pos = 0;
+       int chan = (int)s->private;
+       u32 dcsr, dcmd;
+       int burst, width;
+       static char *str_prio[] = { "high", "normal", "low" };
+
+       dcsr = DCSR(chan);
+       dcmd = DCMD(chan);
+       burst = dbg_burst_from_dcmd(dcmd);
+       width = (1 << ((dcmd >> 14) & 0x3)) >> 1;
+
+       pos += seq_printf(s, "DMA channel %d\n", chan);
+       pos += seq_printf(s, "\tPriority : %s\n",
+                         str_prio[dma_channels[chan].prio]);
+       pos += seq_printf(s, "\tUnaligned transfer bit: %s\n",
+                         DALGN & (1 << chan) ? "yes" : "no");
+       pos += seq_printf(s, "\tDCSR  = %08x (%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
+                         dcsr, DCSR_STR(RUN), DCSR_STR(NODESC),
+                         DCSR_STR(STOPIRQEN), DCSR_STR(EORIRQEN),
+                         DCSR_STR(EORJMPEN), DCSR_STR(EORSTOPEN),
+                         DCSR_STR(SETCMPST), DCSR_STR(CLRCMPST),
+                         DCSR_STR(CMPST), DCSR_STR(EORINTR), DCSR_STR(REQPEND),
+                         DCSR_STR(STOPSTATE), DCSR_STR(ENDINTR),
+                         DCSR_STR(STARTINTR), DCSR_STR(BUSERR));
+
+       pos += seq_printf(s, "\tDCMD  = %08x (%s%s%s%s%s%s%sburst=%d width=%d"
+                         " len=%d)\n",
+                         dcmd,
+                         DCMD_STR(INCSRCADDR), DCMD_STR(INCTRGADDR),
+                         DCMD_STR(FLOWSRC), DCMD_STR(FLOWTRG),
+                         DCMD_STR(STARTIRQEN), DCMD_STR(ENDIRQEN),
+                         DCMD_STR(ENDIAN), burst, width, dcmd & DCMD_LENGTH);
+       pos += seq_printf(s, "\tDSADR = %08x\n", DSADR(chan));
+       pos += seq_printf(s, "\tDTADR = %08x\n", DTADR(chan));
+       pos += seq_printf(s, "\tDDADR = %08x\n", DDADR(chan));
+       return pos;
+}
+
+static int dbg_show_state(struct seq_file *s, void *p)
+{
+       int pos = 0;
+
+       /* basic device status */
+       pos += seq_printf(s, "DMA engine status\n");
+       pos += seq_printf(s, "\tChannel number: %d\n", num_dma_channels);
+
+       return pos;
+}
+
+#define DBGFS_FUNC_DECL(name) \
+static int dbg_open_##name(struct inode *inode, struct file *file) \
+{ \
+       return single_open(file, dbg_show_##name, inode->i_private); \
+} \
+static const struct file_operations dbg_fops_##name = { \
+       .owner          = THIS_MODULE, \
+       .open           = dbg_open_##name, \
+       .llseek         = seq_lseek, \
+       .read           = seq_read, \
+       .release        = single_release, \
+}
+
+DBGFS_FUNC_DECL(state);
+DBGFS_FUNC_DECL(chan_state);
+DBGFS_FUNC_DECL(descriptors);
+DBGFS_FUNC_DECL(requester_chan);
+
+static struct dentry *pxa_dma_dbg_alloc_chan(int ch, struct dentry *chandir)
+{
+       char chan_name[11];
+       struct dentry *chan, *chan_state = NULL, *chan_descr = NULL;
+       struct dentry *chan_reqs = NULL;
+       void *dt;
+
+       scnprintf(chan_name, sizeof(chan_name), "%d", ch);
+       chan = debugfs_create_dir(chan_name, chandir);
+       dt = (void *)ch;
+
+       if (chan)
+               chan_state = debugfs_create_file("state", 0400, chan, dt,
+                                                &dbg_fops_chan_state);
+       if (chan_state)
+               chan_descr = debugfs_create_file("descriptors", 0400, chan, dt,
+                                                &dbg_fops_descriptors);
+       if (chan_descr)
+               chan_reqs = debugfs_create_file("requesters", 0400, chan, dt,
+                                               &dbg_fops_requester_chan);
+       if (!chan_reqs)
+               goto err_state;
+
+       return chan;
+
+err_state:
+       debugfs_remove_recursive(chan);
+       return NULL;
+}
+
+static void pxa_dma_init_debugfs(void)
+{
+       int i;
+       struct dentry *chandir;
+
+       dbgfs_root = debugfs_create_dir(DMA_DEBUG_NAME, NULL);
+       if (IS_ERR(dbgfs_root) || !dbgfs_root)
+               goto err_root;
+
+       dbgfs_state = debugfs_create_file("state", 0400, dbgfs_root, NULL,
+                                         &dbg_fops_state);
+       if (!dbgfs_state)
+               goto err_state;
+
+       dbgfs_chan = kmalloc(sizeof(*dbgfs_state) * num_dma_channels,
+                            GFP_KERNEL);
+       if (!dbgfs_state)
+               goto err_alloc;
+
+       chandir = debugfs_create_dir("channels", dbgfs_root);
+       if (!chandir)
+               goto err_chandir;
+
+       for (i = 0; i < num_dma_channels; i++) {
+               dbgfs_chan[i] = pxa_dma_dbg_alloc_chan(i, chandir);
+               if (!dbgfs_chan[i])
+                       goto err_chans;
+       }
+
+       return;
+err_chans:
+err_chandir:
+       kfree(dbgfs_chan);
+err_alloc:
+err_state:
+       debugfs_remove_recursive(dbgfs_root);
+err_root:
+       pr_err("pxa_dma: debugfs is not available\n");
+}
+
+static void __exit pxa_dma_cleanup_debugfs(void)
+{
+       debugfs_remove_recursive(dbgfs_root);
+}
+#else
+static inline void pxa_dma_init_debugfs(void) {}
+static inline void pxa_dma_cleanup_debugfs(void) {}
+#endif
+
 int pxa_request_dma (char *name, pxa_dma_prio prio,
                        void (*irq_handler)(int, void *),
                        void *data)
@@ -71,6 +315,7 @@ int pxa_request_dma (char *name, pxa_dma_prio prio,
        local_irq_restore(flags);
        return i;
 }
+EXPORT_SYMBOL(pxa_request_dma);
 
 void pxa_free_dma (int dma_ch)
 {
@@ -88,24 +333,26 @@ void pxa_free_dma (int dma_ch)
        dma_channels[dma_ch].name = NULL;
        local_irq_restore(flags);
 }
+EXPORT_SYMBOL(pxa_free_dma);
 
 static irqreturn_t dma_irq_handler(int irq, void *dev_id)
 {
        int i, dint = DINT;
+       struct dma_channel *channel;
 
-       for (i = 0; i < num_dma_channels; i++) {
-               if (dint & (1 << i)) {
-                       struct dma_channel *channel = &dma_channels[i];
-                       if (channel->name && channel->irq_handler) {
-                               channel->irq_handler(i, channel->data);
-                       } else {
-                               /*
-                                * IRQ for an unregistered DMA channel:
-                                * let's clear the interrupts and disable it.
-                                */
-                               printk (KERN_WARNING "spurious IRQ for DMA channel %d\n", i);
-                               DCSR(i) = DCSR_STARTINTR|DCSR_ENDINTR|DCSR_BUSERR;
-                       }
+       while (dint) {
+               i = __ffs(dint);
+               dint &= (dint - 1);
+               channel = &dma_channels[i];
+               if (channel->name && channel->irq_handler) {
+                       channel->irq_handler(i, channel->data);
+               } else {
+                       /*
+                        * IRQ for an unregistered DMA channel:
+                        * let's clear the interrupts and disable it.
+                        */
+                       printk (KERN_WARNING "spurious IRQ for DMA channel %d\n", i);
+                       DCSR(i) = DCSR_STARTINTR|DCSR_ENDINTR|DCSR_BUSERR;
                }
        }
        return IRQ_HANDLED;
@@ -127,6 +374,7 @@ int __init pxa_init_dma(int irq, int num_ch)
        for (i = 0; i < num_ch; i++) {
                DCSR(i) = 0;
                dma_channels[i].prio = min((i & 0xf) >> 2, DMA_PRIO_LOW);
+               spin_lock_init(&dma_channels[i].lock);
        }
 
        ret = request_irq(irq, dma_irq_handler, IRQF_DISABLED, "DMA", NULL);
@@ -135,10 +383,9 @@ int __init pxa_init_dma(int irq, int num_ch)
                kfree(dma_channels);
                return ret;
        }
-
        num_dma_channels = num_ch;
+
+       pxa_dma_init_debugfs();
+
        return 0;
 }
-
-EXPORT_SYMBOL(pxa_request_dma);
-EXPORT_SYMBOL(pxa_free_dma);
index 6401946..22086e6 100644 (file)
@@ -150,6 +150,74 @@ enum {
        MFP_PIN_GPIO125,
        MFP_PIN_GPIO126,
        MFP_PIN_GPIO127,
+
+       MFP_PIN_GPIO128,
+       MFP_PIN_GPIO129,
+       MFP_PIN_GPIO130,
+       MFP_PIN_GPIO131,
+       MFP_PIN_GPIO132,
+       MFP_PIN_GPIO133,
+       MFP_PIN_GPIO134,
+       MFP_PIN_GPIO135,
+       MFP_PIN_GPIO136,
+       MFP_PIN_GPIO137,
+       MFP_PIN_GPIO138,
+       MFP_PIN_GPIO139,
+       MFP_PIN_GPIO140,
+       MFP_PIN_GPIO141,
+       MFP_PIN_GPIO142,
+       MFP_PIN_GPIO143,
+       MFP_PIN_GPIO144,
+       MFP_PIN_GPIO145,
+       MFP_PIN_GPIO146,
+       MFP_PIN_GPIO147,
+       MFP_PIN_GPIO148,
+       MFP_PIN_GPIO149,
+       MFP_PIN_GPIO150,
+       MFP_PIN_GPIO151,
+       MFP_PIN_GPIO152,
+       MFP_PIN_GPIO153,
+       MFP_PIN_GPIO154,
+       MFP_PIN_GPIO155,
+       MFP_PIN_GPIO156,
+       MFP_PIN_GPIO157,
+       MFP_PIN_GPIO158,
+       MFP_PIN_GPIO159,
+       MFP_PIN_GPIO160,
+       MFP_PIN_GPIO161,
+       MFP_PIN_GPIO162,
+       MFP_PIN_GPIO163,
+       MFP_PIN_GPIO164,
+       MFP_PIN_GPIO165,
+       MFP_PIN_GPIO166,
+       MFP_PIN_GPIO167,
+       MFP_PIN_GPIO168,
+       MFP_PIN_GPIO169,
+       MFP_PIN_GPIO170,
+       MFP_PIN_GPIO171,
+       MFP_PIN_GPIO172,
+       MFP_PIN_GPIO173,
+       MFP_PIN_GPIO174,
+       MFP_PIN_GPIO175,
+       MFP_PIN_GPIO176,
+       MFP_PIN_GPIO177,
+       MFP_PIN_GPIO178,
+       MFP_PIN_GPIO179,
+       MFP_PIN_GPIO180,
+       MFP_PIN_GPIO181,
+       MFP_PIN_GPIO182,
+       MFP_PIN_GPIO183,
+       MFP_PIN_GPIO184,
+       MFP_PIN_GPIO185,
+       MFP_PIN_GPIO186,
+       MFP_PIN_GPIO187,
+       MFP_PIN_GPIO188,
+       MFP_PIN_GPIO189,
+       MFP_PIN_GPIO190,
+       MFP_PIN_GPIO191,
+
+       MFP_PIN_GPIO255 = 255,
+
        MFP_PIN_GPIO0_2,
        MFP_PIN_GPIO1_2,
        MFP_PIN_GPIO2_2,
@@ -325,8 +393,9 @@ typedef unsigned long mfp_cfg_t;
 #define MFP_PULL_LOW           (0x1 << 21)
 #define MFP_PULL_HIGH          (0x2 << 21)
 #define MFP_PULL_BOTH          (0x3 << 21)
-#define MFP_PULL_MASK          (0x3 << 21)
-#define MFP_PULL(x)            (((x) >> 21) & 0x3)
+#define MFP_PULL_FLOAT         (0x4 << 21)
+#define MFP_PULL_MASK          (0x7 << 21)
+#define MFP_PULL(x)            (((x) >> 21) & 0x7)
 
 #define MFP_CFG_DEFAULT                (MFP_AF0 | MFP_DS03X | MFP_LPM_DEFAULT |\
                                 MFP_LPM_EDGE_NONE | MFP_PULL_NONE)
index e716c62..9405d03 100644 (file)
  * MFPR_PULL_LOW        1         0        1
  * MFPR_PULL_HIGH       1         1        0
  * MFPR_PULL_BOTH       1         1        1
+ * MFPR_PULL_FLOAT     1         0        0
  */
 #define MFPR_PULL_NONE         (0)
 #define MFPR_PULL_LOW          (MFPR_PULL_SEL | MFPR_PULLDOWN_EN)
 #define MFPR_PULL_BOTH         (MFPR_PULL_LOW | MFPR_PULLUP_EN)
 #define MFPR_PULL_HIGH         (MFPR_PULL_SEL | MFPR_PULLUP_EN)
+#define MFPR_PULL_FLOAT                (MFPR_PULL_SEL)
 
 /* mfp_spin_lock is used to ensure that MFP register configuration
  * (most likely a read-modify-write operation) is atomic, and that
@@ -116,6 +118,7 @@ static const unsigned long mfpr_pull[] = {
        MFPR_PULL_LOW,
        MFPR_PULL_HIGH,
        MFPR_PULL_BOTH,
+       MFPR_PULL_FLOAT,
 };
 
 /* mapping of MFP_LPM_EDGE_* definitions to MFPR_EDGE_* register bits */
index 260fdc6..5ff24e0 100644 (file)
@@ -28,7 +28,7 @@ static __init void s3c_gpiolib_track(struct s3c_gpio_chip *chip)
 
        gpn = chip->chip.base;
        for (i = 0; i < chip->chip.ngpio; i++, gpn++) {
-               BUG_ON(gpn > ARRAY_SIZE(s3c_gpios));
+               BUG_ON(gpn >= ARRAY_SIZE(s3c_gpios));
                s3c_gpios[gpn] = chip;
        }
 }
index 67aa93d..266a107 100644 (file)
@@ -345,13 +345,13 @@ int s3c2410_dma_enqueue(unsigned int channel, void *id,
        if (!chan)
                return -EINVAL;
 
-       buff = kzalloc(sizeof(struct s3c64xx_dma_buff), GFP_KERNEL);
+       buff = kzalloc(sizeof(struct s3c64xx_dma_buff), GFP_ATOMIC);
        if (!buff) {
                printk(KERN_ERR "%s: no memory for buffer\n", __func__);
                return -ENOMEM;
        }
 
-       lli = dma_pool_alloc(dma_pool, GFP_KERNEL, &buff->lli_dma);
+       lli = dma_pool_alloc(dma_pool, GFP_ATOMIC, &buff->lli_dma);
        if (!lli) {
                printk(KERN_ERR "%s: no memory for lli\n", __func__);
                ret = -ENOMEM;
@@ -697,7 +697,7 @@ static int __init s3c64xx_dma_init(void)
 
        printk(KERN_INFO "%s: Registering DMA channels\n", __func__);
 
-       dma_pool = dma_pool_create("DMA-LLI", NULL, 32, 16, 0);
+       dma_pool = dma_pool_create("DMA-LLI", NULL, sizeof(struct pl080s_lli), 16, 0);
        if (!dma_pool) {
                printk(KERN_ERR "%s: failed to create pool\n", __func__);
                return -ENOMEM;
index 0c30dd9..8f76a1e 100644 (file)
@@ -26,7 +26,7 @@ struct s3c64xx_dma_buff {
        struct s3c64xx_dma_buff *next;
 
        void                    *pw;
-       struct pl080_lli        *lli;
+       struct pl080s_lli       *lli;
        dma_addr_t               lli_dma;
 };
 
index 743a700..7956fd3 100644 (file)
 
 #define IRQ_EINT_GROUP(group, no)      (IRQ_EINT_GROUP##group##_BASE + (no))
 
+/* Define a group of interrupts for board-specific use (eg, for MFD
+ * interrupt controllers). */
+#define IRQ_BOARD_START (IRQ_EINT_GROUP9_BASE + IRQ_EINT_GROUP9_NR + 1)
+
+#define IRQ_BOARD_NR 16
+
+#define IRQ_BOARD_END (IRQ_BOARD_START + IRQ_BOARD_NR)
+
 /* Set the default NR_IRQS */
 
-#define NR_IRQS        (IRQ_EINT_GROUP9_BASE + IRQ_EINT_GROUP9_NR + 1)
+#define NR_IRQS        (IRQ_BOARD_END + 1)
 
 #endif /* __ASM_PLAT_S3C64XX_IRQS_H */
 
index febac19..9745852 100644 (file)
@@ -302,8 +302,8 @@ static int s3c64xx_setrate_clksrc(struct clk *clk, unsigned long rate)
                return -EINVAL;
 
        val = __raw_readl(reg);
-       val &= ~(0xf << sclk->shift);
-       val |= (div - 1) << sclk->shift;
+       val &= ~(0xf << sclk->divider_shift);
+       val |= (div - 1) << sclk->divider_shift;
        __raw_writel(val, reg);
 
        return 0;
@@ -328,6 +328,8 @@ static int s3c64xx_setparent_clksrc(struct clk *clk, struct clk *parent)
                clksrc |= src_nr << sclk->shift;
 
                __raw_writel(clksrc, S3C_CLK_SRC);
+
+               clk->parent = parent;
                return 0;
        }
 
@@ -343,7 +345,7 @@ static unsigned long s3c64xx_roundrate_clksrc(struct clk *clk,
        if (rate > parent_rate)
                rate = parent_rate;
        else {
-               div = rate / parent_rate;
+               div = parent_rate / rate;
 
                if (div == 0)
                        div = 1;
index d2f4977..ef88f25 100644 (file)
@@ -264,7 +264,7 @@ int stmp3xxx_dma_make_chain(int ch, struct stmp37xx_circ_dma_chain *chain,
                                        stmp3xxx_dma_free_command(ch,
                                                                  &descriptors
                                                                  [i]);
-                               } while (i-- >= 0);
+                               } while (i-- > 0);
                        }
                        return err;
                }
index c8c55b4..94be7bb 100644 (file)
@@ -12,7 +12,7 @@
 #
 #   http://www.arm.linux.org.uk/developer/machines/?action=new
 #
-# Last update: Sat Sep 12 12:00:16 2009
+# Last update: Fri Sep 18 21:42:00 2009
 #
 # machine_is_xxx       CONFIG_xxxx             MACH_TYPE_xxx           number
 #
@@ -1638,7 +1638,7 @@ mx35evb                   MACH_MX35EVB            MX35EVB                 1643
 aml_m8050              MACH_AML_M8050          AML_M8050               1644
 mx35_3ds               MACH_MX35_3DS           MX35_3DS                1645
 mars                   MACH_MARS               MARS                    1646
-ntosd_644xa            MACH_NTOSD_644XA        NTOSD_644XA             1647
+neuros_osd2            MACH_NEUROS_OSD2        NEUROS_OSD2             1647
 badger                 MACH_BADGER             BADGER                  1648
 trizeps4wl             MACH_TRIZEPS4WL         TRIZEPS4WL              1649
 trizeps5               MACH_TRIZEPS5           TRIZEPS5                1650
@@ -1654,7 +1654,7 @@ vf10xx                    MACH_VF10XX             VF10XX                  1659
 zoran43xx              MACH_ZORAN43XX          ZORAN43XX               1660
 sonix926               MACH_SONIX926           SONIX926                1661
 celestialsemi          MACH_CELESTIALSEMI      CELESTIALSEMI           1662
-cc9m2443               MACH_CC9M2443           CC9M2443                1663
+cc9m2443js             MACH_CC9M2443JS         CC9M2443JS              1663
 tw5334                 MACH_TW5334             TW5334                  1664
 omap_htcartemis                MACH_HTCARTEMIS         HTCARTEMIS              1665
 nal_hlite              MACH_NAL_HLITE          NAL_HLITE               1666
@@ -1802,7 +1802,7 @@ ccw9p9215js               MACH_CCW9P9215JS        CCW9P9215JS             1811
 rd88f5181l_ge          MACH_RD88F5181L_GE      RD88F5181L_GE           1812
 sifmain                        MACH_SIFMAIN            SIFMAIN                 1813
 sam9_l9261             MACH_SAM9_L9261         SAM9_L9261              1814
-cc9m2443js             MACH_CC9M2443JS         CC9M2443JS              1815
+cc9m2443               MACH_CC9M2443           CC9M2443                1815
 xaria300               MACH_XARIA300           XARIA300                1816
 it9200                 MACH_IT9200             IT9200                  1817
 rd88f5181l_fxo         MACH_RD88F5181L_FXO     RD88F5181L_FXO          1818
@@ -2409,3 +2409,15 @@ platypus         MACH_PLATYPUS           PLATYPUS                2422
 pss2                   MACH_PSS2               PSS2                    2423
 davinci_apm150         MACH_DAVINCI_APM150     DAVINCI_APM150          2424
 str9100                        MACH_STR9100            STR9100                 2425
+net5big                        MACH_NET5BIG            NET5BIG                 2426
+seabed9263             MACH_SEABED9263         SEABED9263              2427
+mx51_m2id              MACH_MX51_M2ID          MX51_M2ID               2428
+octvocplus_eb          MACH_OCTVOCPLUS_EB      OCTVOCPLUS_EB           2429
+klk_firefox            MACH_KLK_FIREFOX        KLK_FIREFOX             2430
+klk_wirma_module       MACH_KLK_WIRMA_MODULE   KLK_WIRMA_MODULE        2431
+klk_wirma_mmi          MACH_KLK_WIRMA_MMI      KLK_WIRMA_MMI           2432
+supersonic             MACH_SUPERSONIC         SUPERSONIC              2433
+liberty                        MACH_LIBERTY            LIBERTY                 2434
+mh355                  MACH_MH355              MH355                   2435
+pc7802                 MACH_PC7802             PC7802                  2436
+gnet_sgc               MACH_GNET_SGC           GNET_SGC                2437
index 21ac7c2..ffd90fb 100644 (file)
@@ -96,8 +96,7 @@ SECTIONS
        {
                __sdata = .;
                /* This gets done first, so the glob doesn't suck it in */
-               . = ALIGN(32);
-               *(.data.cacheline_aligned)
+               CACHELINE_ALIGNED_DATA(32)
 
 #if !L1_DATA_A_LENGTH
                . = ALIGN(32);
@@ -116,12 +115,7 @@ SECTIONS
                DATA_DATA
                CONSTRUCTORS
 
-               /* make sure the init_task is aligned to the
-                * kernel thread size so we can locate the kernel
-                * stack properly and quickly.
-                */
-               . = ALIGN(THREAD_SIZE);
-               *(.init_task.data)
+               INIT_TASK_DATA(THREAD_SIZE)
 
                __edata = .;
        }
@@ -134,39 +128,10 @@ SECTIONS
        . = ALIGN(PAGE_SIZE);
        ___init_begin = .;
 
-       .init.text :
-       {
-               . = ALIGN(PAGE_SIZE);
-               __sinittext = .;
-               INIT_TEXT
-               __einittext = .;
-       }
-       .init.data :
-       {
-               . = ALIGN(16);
-               INIT_DATA
-       }
-       .init.setup :
-       {
-               . = ALIGN(16);
-               ___setup_start = .;
-               *(.init.setup)
-               ___setup_end = .;
-       }
-       .initcall.init :
-       {
-               ___initcall_start = .;
-               INITCALLS
-               ___initcall_end = .;
-       }
-       .con_initcall.init :
-       {
-               ___con_initcall_start = .;
-               *(.con_initcall.init)
-               ___con_initcall_end = .;
-       }
+       INIT_TEXT_SECTION(PAGE_SIZE)
+       . = ALIGN(16);
+       INIT_DATA_SECTION(16)
        PERCPU(4)
-       SECURITY_INIT
 
        /* we have to discard exit text and such at runtime, not link time, to
         * handle embedded cross-section references (alt instructions, bug
@@ -181,18 +146,9 @@ SECTIONS
                EXIT_DATA
        }
 
-       .init.ramfs :
-       {
-               . = ALIGN(4);
-               ___initramfs_start = .;
-               *(.init.ramfs)
-               . = ALIGN(4);
-               ___initramfs_end = .;
-       }
-
        __l1_lma_start = .;
 
-       .text_l1 L1_CODE_START : AT(LOADADDR(.init.ramfs) + SIZEOF(.init.ramfs))
+       .text_l1 L1_CODE_START : AT(LOADADDR(.exit.data) + SIZEOF(.exit.data))
        {
                . = ALIGN(4);
                __stext_l1 = .;
index 2b73c7a..31ca141 100644 (file)
@@ -28,7 +28,6 @@
 
 extern void update_xtime_from_cmos(void);
 extern int set_rtc_mmss(unsigned long nowtime);
-extern int setup_irq(int, struct irqaction *);
 extern int have_rtc;
 
 unsigned long get_ns_in_jiffie(void)
index d2a3ff8..058addd 100644 (file)
@@ -52,8 +52,6 @@ static struct mm_struct* flush_mm;
 static struct vm_area_struct* flush_vma;
 static unsigned long flush_addr;
 
-extern int setup_irq(int, struct irqaction *);
-
 /* Mode registers */
 static unsigned long irq_regs[NR_CPUS] = {
   regi_irq,
index 65633d0..b1920d8 100644 (file)
@@ -46,7 +46,6 @@ unsigned long timer_regs[NR_CPUS] =
 
 extern void update_xtime_from_cmos(void);
 extern int set_rtc_mmss(unsigned long nowtime);
-extern int setup_irq(int, struct irqaction *);
 extern int have_rtc;
 
 #ifdef CONFIG_CPU_FREQ
index c22f67e..090ceb9 100644 (file)
@@ -36,7 +36,7 @@ struct crisv32_ioport crisv32_ioports[] = {
        },
 };
 
-#define NBR_OF_PORTS sizeof(crisv32_ioports)/sizeof(struct crisv32_ioport)
+#define NBR_OF_PORTS ARRAY_SIZE(crisv32_ioports)
 
 struct crisv32_iopin crisv32_led_net0_green;
 struct crisv32_iopin crisv32_led_net0_red;
index cb6327b..a695866 100644 (file)
@@ -52,7 +52,7 @@ struct crisv32_ioport crisv32_ioports[] = {
        }
 };
 
-#define NBR_OF_PORTS sizeof(crisv32_ioports)/sizeof(struct crisv32_ioport)
+#define NBR_OF_PORTS ARRAY_SIZE(crisv32_ioports)
 
 struct crisv32_iopin crisv32_led_net0_green;
 struct crisv32_iopin crisv32_led_net0_red;
index df84f17..e829e5a 100644 (file)
@@ -33,10 +33,10 @@ typedef struct
 
 /* CRIS PTE bits (see R_TLB_LO in the register description)
  *
- *   Bit:  31-13 12-------4    3        2       1       0  
- *         ________________________________________________
- *        | pfn | reserved | global | valid | kernel | we  |
- *        |_____|__________|________|_______|________|_____|
+ *   Bit:  31     30-13 12-------4    3        2       1       0
+ *         _______________________________________________________
+ *        | cache |pfn | reserved | global | valid | kernel | we  |
+ *        |_______|____|__________|________|_______|________|_____|
  *
  * (pfn = physical frame number)
  */
@@ -53,6 +53,7 @@ typedef struct
 #define _PAGE_VALID       (1<<2) /* page is valid */
 #define _PAGE_SILENT_READ  (1<<2) /* synonym */
 #define _PAGE_GLOBAL       (1<<3) /* global page - context is ignored */
+#define _PAGE_NO_CACHE    (1<<31) /* part of the uncached memory map */
 
 /* Bits the HW doesn't care about but the kernel uses them in SW */
 
index 6bcdc3f..c1a13e0 100644 (file)
@@ -28,10 +28,10 @@ typedef struct
 /*
  * CRISv32 PTE bits:
  *
- *  Bit:  31-13  12-5     4        3       2        1        0
- *       +-----+------+--------+-------+--------+-------+---------+
- *       | pfn | zero | global | valid | kernel | write | execute |
- *       +-----+------+--------+-------+--------+-------+---------+
+ *  Bit:   31     30-13  12-5     4        3       2        1        0
+ *       +-------+-----+------+--------+-------+--------+-------+---------+
+ *       | cache | pfn | zero | global | valid | kernel | write | execute |
+ *       +-------+-----+------+--------+-------+--------+-------+---------+
  */
 
 /*
@@ -45,6 +45,8 @@ typedef struct
 #define _PAGE_VALID         (1 << 3)   /* Page is valid. */
 #define _PAGE_SILENT_READ   (1 << 3)   /* Same as above. */
 #define _PAGE_GLOBAL        (1 << 4)   /* Global page. */
+#define _PAGE_NO_CACHE     (1 << 31)   /* part of the uncached memory map */
+
 
 /*
  * The hardware doesn't care about these bits, but the kernel uses them in
index 74178ad..17bb12d 100644 (file)
@@ -2,16 +2,6 @@
 #define __ASM_HARDIRQ_H
 
 #include <asm/irq.h>
-#include <linux/threads.h>
-#include <linux/cache.h>
-
-typedef struct {
-       unsigned int __softirq_pending;
-} ____cacheline_aligned irq_cpustat_t;
-
-#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
-
-void ack_bad_irq(unsigned int irq);
 
 #define HARDIRQ_BITS   8
 
@@ -24,4 +14,6 @@ void ack_bad_irq(unsigned int irq);
 # error HARDIRQ_BITS is too low!
 #endif
 
+#include <asm-generic/hardirq.h>
+
 #endif /* __ASM_HARDIRQ_H */
index 50aa974..1fcce00 100644 (file)
@@ -197,6 +197,8 @@ static inline pte_t __mk_pte(void * page, pgprot_t pgprot)
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 { pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); return pte; }
 
+#define pgprot_noncached(prot) __pgprot((pgprot_val(prot) | _PAGE_NO_CACHE))
+
 
 /* pte_val refers to a page in the 0x4xxxxxxx physical DRAM interval
  * __pte_page(pte_val) refers to the "virtual" DRAM interval
index 7f642fc..0ca7d98 100644 (file)
 
 #include <asm/io.h>
 
-void ack_bad_irq(unsigned int irq)
-{
-       printk("unexpected IRQ trap at vector %02x\n", irq);
-}
-
 int show_interrupts(struct seq_file *p, void *v)
 {
        int i = *(loff_t *) v, j;
index 6c81836..bbfda67 100644 (file)
@@ -51,10 +51,7 @@ SECTIONS
        _etext = . ;                    /* End of text section. */
        __etext = .;
 
-       . = ALIGN(4);                   /* Exception table. */
-       __start___ex_table = .;
-       __ex_table : { *(__ex_table) }
-       __stop___ex_table = .;
+       EXCEPTION_TABLE(4)
 
        RODATA
 
@@ -67,36 +64,24 @@ SECTIONS
        __edata = . ;                   /* End of data section. */
        _edata = . ;
 
-       . = ALIGN(PAGE_SIZE);   /* init_task and stack, must be aligned. */
-       .data.init_task : { *(.data.init_task) }
+       INIT_TASK_DATA_SECTION(PAGE_SIZE)
 
        . = ALIGN(PAGE_SIZE);           /* Init code and data. */
        __init_begin = .;
-       .init.text : {
-                  _sinittext = .;
-                  INIT_TEXT
-                  _einittext = .;
-       }
+       INIT_TEXT_SECTION(PAGE_SIZE)
        .init.data : { INIT_DATA }
-       . = ALIGN(16);
-       __setup_start = .;
-       .init.setup : { *(.init.setup) }
-       __setup_end = .;
+       .init.setup : { INIT_SETUP(16) }
 #ifdef CONFIG_ETRAX_ARCH_V32
        __start___param = .;
        __param : { *(__param) }
        __stop___param = .;
 #endif
        .initcall.init : {
-               __initcall_start = .;
-               INITCALLS
-               __initcall_end = .;
+               INIT_CALLS
        }
 
        .con_initcall.init : {
-               __con_initcall_start = .;
-               *(.con_initcall.init)
-               __con_initcall_end = .;
+               CON_INITCALL
        }
        SECURITY_INIT
 
@@ -114,9 +99,7 @@ SECTIONS
        PERCPU(PAGE_SIZE)
 
        .init.ramfs : {
-               __initramfs_start = .;
-               *(.init.ramfs)
-               __initramfs_end = .;
+               INIT_RAM_FS
        }
 #endif
 
@@ -130,11 +113,7 @@ SECTIONS
        __init_end = .;
 
        __data_end = . ;                /* Move to _edata ? */
-       __bss_start = .;                /* BSS. */
-       .bss : {
-               *(COMMON)
-               *(.bss)
-       }
+       BSS_SECTION(0, 0, 0)
 
        . =  ALIGN (0x20);
        _end = .;
index be722fc..0d4d3e3 100644 (file)
@@ -150,7 +150,7 @@ static int user_atoi(char __user *ubuf, size_t len)
 /*
  * Send us to sleep.
  */
-static int sysctl_pm_do_suspend(ctl_table *ctl, int write, struct file *filp,
+static int sysctl_pm_do_suspend(ctl_table *ctl, int write,
                                void __user *buffer, size_t *lenp, loff_t *fpos)
 {
        int retval, mode;
@@ -198,13 +198,13 @@ static int try_set_cmode(int new_cmode)
 }
 
 
-static int cmode_procctl(ctl_table *ctl, int write, struct file *filp,
+static int cmode_procctl(ctl_table *ctl, int write,
                         void __user *buffer, size_t *lenp, loff_t *fpos)
 {
        int new_cmode;
 
        if (!write)
-               return proc_dointvec(ctl, write, filp, buffer, lenp, fpos);
+               return proc_dointvec(ctl, write, buffer, lenp, fpos);
 
        new_cmode = user_atoi(buffer, *lenp);
 
@@ -301,13 +301,13 @@ static int try_set_cm(int new_cm)
        return 0;
 }
 
-static int p0_procctl(ctl_table *ctl, int write, struct file *filp,
+static int p0_procctl(ctl_table *ctl, int write,
                      void __user *buffer, size_t *lenp, loff_t *fpos)
 {
        int new_p0;
 
        if (!write)
-               return proc_dointvec(ctl, write, filp, buffer, lenp, fpos);
+               return proc_dointvec(ctl, write, buffer, lenp, fpos);
 
        new_p0 = user_atoi(buffer, *lenp);
 
@@ -345,13 +345,13 @@ static int p0_sysctl(ctl_table *table,
        return 1;
 }
 
-static int cm_procctl(ctl_table *ctl, int write, struct file *filp,
+static int cm_procctl(ctl_table *ctl, int write,
                      void __user *buffer, size_t *lenp, loff_t *fpos)
 {
        int new_cm;
 
        if (!write)
-               return proc_dointvec(ctl, write, filp, buffer, lenp, fpos);
+               return proc_dointvec(ctl, write, buffer, lenp, fpos);
 
        new_cm = user_atoi(buffer, *lenp);
 
index 76595e8..b73b542 100644 (file)
@@ -11,3 +11,5 @@ else
 obj-y += pci-dma-nommu.o
 endif
 endif
+
+obj-$(CONFIG_MTD) += flash.o
diff --git a/arch/frv/mb93090-mb00/flash.c b/arch/frv/mb93090-mb00/flash.c
new file mode 100644 (file)
index 0000000..c0e3707
--- /dev/null
@@ -0,0 +1,90 @@
+/* Flash mappings for the MB93090-MB00 motherboard
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/physmap.h>
+
+#define MB93090_BOOTROM_ADDR   0xFF000000      /* Boot ROM */
+#define MB93090_BOOTROM_SIZE   (2 * 1024 * 1024)
+#define MB93090_USERROM_ADDR   0xFF200000      /* User ROM */
+#define MB93090_USERROM_SIZE   (2 * 1024 * 1024)
+
+/*
+ * default MTD partition table for both main flash devices, expected to be
+ * overridden by RedBoot
+ */
+static struct mtd_partition mb93090_partitions[] = {
+       {
+               .name           = "Filesystem",
+               .size           = MTDPART_SIZ_FULL,
+               .offset         = 0,
+       }
+};
+
+/*
+ * Definition of the MB93090 Boot ROM (on the CPU card)
+ */
+static struct physmap_flash_data mb93090_bootrom_data = {
+       .width          = 2,
+       .nr_parts       = 2,
+       .parts          = mb93090_partitions,
+};
+
+static struct resource mb93090_bootrom_resource = {
+       .start          = MB93090_BOOTROM_ADDR,
+       .end            = MB93090_BOOTROM_ADDR + MB93090_BOOTROM_SIZE - 1,
+       .flags          = IORESOURCE_MEM,
+};
+
+static struct platform_device mb93090_bootrom = {
+       .name           = "physmap-flash",
+       .id             = 0,
+       .dev.platform_data = &mb93090_bootrom_data,
+       .num_resources  = 1,
+       .resource       = &mb93090_bootrom_resource,
+};
+
+/*
+ * Definition of the MB93090 User ROM definition (on the motherboard)
+ */
+static struct physmap_flash_data mb93090_userrom_data = {
+       .width          = 2,
+       .nr_parts       = 2,
+       .parts          = mb93090_partitions,
+};
+
+static struct resource mb93090_userrom_resource = {
+       .start          = MB93090_USERROM_ADDR,
+       .end            = MB93090_USERROM_ADDR + MB93090_USERROM_SIZE - 1,
+       .flags          = IORESOURCE_MEM,
+};
+
+static struct platform_device mb93090_userrom = {
+       .name           = "physmap-flash",
+       .id             = 1,
+       .dev.platform_data = &mb93090_userrom_data,
+       .num_resources  = 1,
+       .resource       = &mb93090_userrom_resource,
+};
+
+/*
+ * register the MB93090 flashes
+ */
+static int __init mb93090_mtd_init(void)
+{
+       platform_device_register(&mb93090_bootrom);
+       platform_device_register(&mb93090_userrom);
+       return 0;
+}
+
+module_init(mb93090_mtd_init);
index 662b02e..b9e2490 100644 (file)
@@ -1,5 +1,6 @@
 #define VMLINUX_SYMBOL(_sym_) _##_sym_
 #include <asm-generic/vmlinux.lds.h>
+#include <asm/page.h>
 
 /* target memory map */
 #ifdef CONFIG_H8300H_GENERIC
@@ -79,11 +80,8 @@ SECTIONS
        SCHED_TEXT
        LOCK_TEXT
        __etext = . ;
-       . = ALIGN(16);          /* Exception table              */
-       ___start___ex_table = .;
-               *(__ex_table)
-       ___stop___ex_table = .;
        }
+       EXCEPTION_TABLE(16)
 
        RODATA
 #if defined(CONFIG_ROMKERNEL)
@@ -100,8 +98,7 @@ SECTIONS
        __sdata = . ;
        ___data_start = . ;
 
-       . = ALIGN(0x2000) ;
-               *(.data.init_task)
+       INIT_TASK_DATA(0x2000)
        . = ALIGN(0x4) ;
                DATA_DATA
        . = ALIGN(0x4) ;
@@ -114,24 +111,16 @@ SECTIONS
        __einittext = .; 
                INIT_DATA
        . = ALIGN(0x4) ;
+       INIT_SETUP(0x4)
        ___setup_start = .;
                *(.init.setup)
        . = ALIGN(0x4) ;
        ___setup_end = .;
-       ___initcall_start = .;
-               INITCALLS
-       ___initcall_end = .;
-       ___con_initcall_start = .;
-               *(.con_initcall.init)
-       ___con_initcall_end = .;
+       INIT_CALLS
+       CON_INITCALL
                EXIT_TEXT
                EXIT_DATA
-#if defined(CONFIG_BLK_DEV_INITRD)
-               . = ALIGN(4);
-       ___initramfs_start = .;
-               *(.init.ramfs)
-       ___initramfs_end = .;
-#endif
+       INIT_RAM_FS
        . = ALIGN(0x4) ;
        ___init_end = .;
        __edata = . ;
index 11777f7..725ede8 100644 (file)
@@ -1,9 +1,11 @@
 #ifndef _ASM_M32R_PAGE_H
 #define _ASM_M32R_PAGE_H
 
+#include <linux/const.h>
+
 /* PAGE_SHIFT determines the page size */
 #define PAGE_SHIFT     12
-#define PAGE_SIZE      (1UL << PAGE_SHIFT)
+#define PAGE_SIZE      (_AC(1, UL) << PAGE_SHIFT)
 #define PAGE_MASK      (~(PAGE_SIZE-1))
 
 #ifndef __ASSEMBLY__
index 1a997fc..8397c24 100644 (file)
@@ -140,8 +140,6 @@ unsigned long get_wchan(struct task_struct *p);
 #define KSTK_EIP(tsk)  ((tsk)->thread.lr)
 #define KSTK_ESP(tsk)  ((tsk)->thread.sp)
 
-#define THREAD_SIZE (2*PAGE_SIZE)
-
 #define cpu_relax()    barrier()
 
 #endif /* _ASM_M32R_PROCESSOR_H */
index 7157815..ed240b6 100644 (file)
@@ -55,6 +55,8 @@ struct thread_info {
 
 #define PREEMPT_ACTIVE         0x10000000
 
+#define THREAD_SIZE (PAGE_SIZE << 1)
+
 /*
  * macros/functions for gaining access to the thread information structure
  */
@@ -76,8 +78,6 @@ struct thread_info {
 #define init_thread_info       (init_thread_union.thread_info)
 #define init_stack             (init_thread_union.stack)
 
-#define THREAD_SIZE (2*PAGE_SIZE)
-
 /* how to get the thread information struct from C */
 static inline struct thread_info *current_thread_info(void)
 {
@@ -125,17 +125,6 @@ static inline unsigned int get_thread_fault_code(void)
        return ti->flags >> TI_FLAG_FAULT_CODE_SHIFT;
 }
 
-#else /* !__ASSEMBLY__ */
-
-#define THREAD_SIZE    8192
-
-/* how to get the thread information struct from ASM */
-#define GET_THREAD_INFO(reg)   GET_THREAD_INFO reg
-       .macro GET_THREAD_INFO reg
-       ldi     \reg, #-THREAD_SIZE
-       and     \reg, sp
-       .endm
-
 #endif
 
 /*
index 612d35b..4038698 100644 (file)
 #define resume_kernel          restore_all
 #endif
 
+/* how to get the thread information struct from ASM */
+#define GET_THREAD_INFO(reg)   GET_THREAD_INFO reg
+       .macro GET_THREAD_INFO reg
+       ldi     \reg, #-THREAD_SIZE
+       and     \reg, sp
+       .endm
+
 ENTRY(ret_from_fork)
        pop     r0
        bl      schedule_tail
index 0a71944..a46652d 100644 (file)
@@ -268,13 +268,13 @@ ENTRY(empty_zero_page)
 /*------------------------------------------------------------------------
  * Stack area
  */
-       .section .spi
+       .section .init.data, "aw"
        ALIGN
        .global spi_stack_top
        .zero   1024
 spi_stack_top:
 
-       .section .spu
+       .section .init.data, "aw"
        ALIGN
        .global spu_stack_top
        .zero   1024
index de5e21c..8ceb618 100644 (file)
@@ -4,6 +4,7 @@
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/addrspace.h>
 #include <asm/page.h>
+#include <asm/thread_info.h>
 
 OUTPUT_ARCH(m32r)
 #if defined(__LITTLE_ENDIAN__)
@@ -40,83 +41,22 @@ SECTIONS
 #endif
   _etext = .;                  /* End of text section */
 
-  . = ALIGN(16);               /* Exception table */
-  __start___ex_table = .;
-  __ex_table : { *(__ex_table) }
-  __stop___ex_table = .;
-
+  EXCEPTION_TABLE(16)
   RODATA
-
-  /* writeable */
-  .data : {                    /* Data */
-       *(.spu)
-       *(.spi)
-       DATA_DATA
-       CONSTRUCTORS
-       }
-
-  . = ALIGN(4096);
-  __nosave_begin = .;
-  .data_nosave : { *(.data.nosave) }
-  . = ALIGN(4096);
-  __nosave_end = .;
-
-  . = ALIGN(32);
-  .data.cacheline_aligned : { *(.data.cacheline_aligned) }
-
+  RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE)
   _edata = .;                  /* End of data section */
 
-  . = ALIGN(8192);             /* init_task */
-  .data.init_task : { *(.data.init_task) }
-
   /* will be freed after init */
-  . = ALIGN(4096);             /* Init code and data */
+  . = ALIGN(PAGE_SIZE);                /* Init code and data */
   __init_begin = .;
-  .init.text : {
-       _sinittext = .;
-       INIT_TEXT
-       _einittext = .;
-  }
-  .init.data : { INIT_DATA }
-  . = ALIGN(16);
-  __setup_start = .;
-  .init.setup : { *(.init.setup) }
-  __setup_end = .;
-  __initcall_start = .;
-  .initcall.init : {
-       INITCALLS
-  }
-  __initcall_end = .;
-  __con_initcall_start = .;
-  .con_initcall.init : { *(.con_initcall.init) }
-  __con_initcall_end = .;
-  SECURITY_INIT
-  . = ALIGN(4);
-  __alt_instructions = .;
-  .altinstructions : { *(.altinstructions) }
-  __alt_instructions_end = .;
-  .altinstr_replacement : { *(.altinstr_replacement) }
-  /* .exit.text is discard at runtime, not link time, to deal with references
-     from .altinstructions and .eh_frame */
-  .exit.text : { EXIT_TEXT }
-  .exit.data : { EXIT_DATA }
-
-#ifdef CONFIG_BLK_DEV_INITRD
-  . = ALIGN(4096);
-  __initramfs_start = .;
-  .init.ramfs : { *(.init.ramfs) }
-  __initramfs_end = .;
-#endif
-
-  PERCPU(4096)
-  . = ALIGN(4096);
+  INIT_TEXT_SECTION(PAGE_SIZE)
+  INIT_DATA_SECTION(16)
+  PERCPU(PAGE_SIZE)
+  . = ALIGN(PAGE_SIZE);
   __init_end = .;
   /* freed after init ends here */
 
-  __bss_start = .;             /* BSS */
-  .bss : { *(.bss) }
-  . = ALIGN(4);
-  __bss_stop = .;
+  BSS_SECTION(0, 0, 4)
 
   _end = . ;
 
index 2db722d..bbd8327 100644 (file)
@@ -6,6 +6,7 @@ mainmenu "Linux/Microblaze Kernel Configuration"
 config MICROBLAZE
        def_bool y
        select HAVE_LMB
+       select USB_ARCH_HAS_EHCI
        select ARCH_WANT_OPTIONAL_GPIOLIB
 
 config SWAP
index 8439598..3418735 100644 (file)
@@ -37,12 +37,12 @@ CPUFLAGS-$(CONFIG_XILINX_MICROBLAZE0_USE_PCMP_INSTR) += -mxl-pattern-compare
 CPUFLAGS-1 += $(call cc-option,-mcpu=v$(CPU_VER))
 
 # r31 holds current when in kernel mode
-KBUILD_KERNEL += -ffixed-r31 $(CPUFLAGS-1) $(CPUFLAGS-2)
+KBUILD_CFLAGS += -ffixed-r31 $(CPUFLAGS-1) $(CPUFLAGS-2)
 
 LDFLAGS                :=
 LDFLAGS_vmlinux        :=
 
-LIBGCC := $(shell $(CC) $(KBUILD_KERNEL) -print-libgcc-file-name)
+LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
 
 head-y := arch/microblaze/kernel/head.o
 libs-y += arch/microblaze/lib/
@@ -53,22 +53,41 @@ core-y += arch/microblaze/platform/
 
 boot := arch/microblaze/boot
 
+# Are we making a simpleImage.<boardname> target? If so, crack out the boardname
+DTB:=$(subst simpleImage.,,$(filter simpleImage.%, $(MAKECMDGOALS)))
+
+ifneq ($(DTB),)
+       core-y  += $(boot)/
+endif
+
 # defines filename extension depending memory management type
 ifeq ($(CONFIG_MMU),)
 MMU := -nommu
 endif
 
-export MMU
+export MMU DTB
 
 all: linux.bin
 
+BOOT_TARGETS = linux.bin linux.bin.gz simpleImage.%
+
 archclean:
        $(Q)$(MAKE) $(clean)=$(boot)
 
-linux.bin linux.bin.gz: vmlinux
+$(BOOT_TARGETS): vmlinux
        $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
 define archhelp
-  echo  '* linux.bin    - Create raw binary'
-  echo  '  linux.bin.gz - Create compressed raw binary'
+  echo '* linux.bin    - Create raw binary'
+  echo '  linux.bin.gz - Create compressed raw binary'
+  echo '  simpleImage.<dt> - ELF image with $(arch)/boot/dts/<dt>.dts linked in'
+  echo '                   - stripped elf with fdt blob
+  echo '  simpleImage.<dt>.unstrip - full ELF image with fdt blob'
+  echo '  *_defconfig      - Select default config from arch/microblaze/configs'
+  echo ''
+  echo '  Targets with <dt> embed a device tree blob inside the image'
+  echo '  These targets support board with firmware that does not'
+  echo '  support passing a device tree directly. Replace <dt> with the'
+  echo '  name of a dts file from the arch/microblaze/boot/dts/ directory'
+  echo '  (minus the .dts extension).'
 endef
index c2bb043..21f1332 100644 (file)
@@ -2,10 +2,24 @@
 # arch/microblaze/boot/Makefile
 #
 
-targets := linux.bin linux.bin.gz
+obj-y += linked_dtb.o
+
+targets := linux.bin linux.bin.gz simpleImage.%
 
 OBJCOPYFLAGS_linux.bin  := -O binary
 
+# Where the DTS files live
+dtstree         := $(srctree)/$(src)/dts
+
+# Ensure system.dtb exists
+$(obj)/linked_dtb.o: $(obj)/system.dtb
+
+# Generate system.dtb from $(DTB).dtb
+ifneq ($(DTB),system)
+$(obj)/system.dtb: $(obj)/$(DTB).dtb
+       $(call if_changed,cp)
+endif
+
 $(obj)/linux.bin: vmlinux FORCE
        [ -n $(CONFIG_INITRAMFS_SOURCE) ] && [ ! -e $(CONFIG_INITRAMFS_SOURCE) ] && \
        touch $(CONFIG_INITRAMFS_SOURCE) || echo "No CPIO image"
@@ -16,4 +30,27 @@ $(obj)/linux.bin.gz: $(obj)/linux.bin FORCE
        $(call if_changed,gzip)
        @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
 
-clean-kernel += linux.bin linux.bin.gz
+quiet_cmd_cp = CP      $< $@$2
+       cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false)
+
+quiet_cmd_strip = STRIP   $@
+      cmd_strip = $(STRIP) -K _start -K _end -K __log_buf -K _fdt_start vmlinux -o $@
+
+$(obj)/simpleImage.%: vmlinux FORCE
+       $(call if_changed,cp,.unstrip)
+       $(call if_changed,strip)
+       @echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
+
+# Rule to build device tree blobs
+DTC = $(objtree)/scripts/dtc/dtc
+
+# Rule to build device tree blobs
+quiet_cmd_dtc = DTC     $@
+       cmd_dtc = $(DTC) -O dtb -o $(obj)/$*.dtb -b 0 -p 1024 $(dtstree)/$*.dts
+
+$(obj)/%.dtb: $(dtstree)/%.dts FORCE
+       $(call if_changed,dtc)
+
+clean-kernel += linux.bin linux.bin.gz simpleImage.*
+
+clean-files += *.dtb
diff --git a/arch/microblaze/boot/dts/system.dts b/arch/microblaze/boot/dts/system.dts
new file mode 120000 (symlink)
index 0000000..7cb6578
--- /dev/null
@@ -0,0 +1 @@
+../../platform/generic/system.dts
\ No newline at end of file
diff --git a/arch/microblaze/boot/linked_dtb.S b/arch/microblaze/boot/linked_dtb.S
new file mode 100644 (file)
index 0000000..cb2b537
--- /dev/null
@@ -0,0 +1,3 @@
+.section __fdt_blob,"a"
+.incbin "arch/microblaze/boot/system.dtb"
+
index 09c3296..bb7c374 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.31-rc6
-# Tue Aug 18 11:00:02 2009
+# Linux kernel version: 2.6.31
+# Thu Sep 24 10:28:50 2009
 #
 CONFIG_MICROBLAZE=y
 # CONFIG_SWAP is not set
@@ -42,11 +42,12 @@ CONFIG_SYSVIPC_SYSCTL=y
 #
 # RCU Subsystem
 #
-CONFIG_CLASSIC_RCU=y
-# CONFIG_TREE_RCU is not set
-# CONFIG_PREEMPT_RCU is not set
+CONFIG_TREE_RCU=y
+# CONFIG_TREE_PREEMPT_RCU is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RCU_FANOUT=32
+# CONFIG_RCU_FANOUT_EXACT is not set
 # CONFIG_TREE_RCU_TRACE is not set
-# CONFIG_PREEMPT_RCU_TRACE is not set
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=17
@@ -260,6 +261,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
 # CONFIG_NETFILTER is not set
 # CONFIG_IP_DCCP is not set
 # CONFIG_IP_SCTP is not set
+# CONFIG_RDS is not set
 # CONFIG_TIPC is not set
 # CONFIG_ATM is not set
 # CONFIG_BRIDGE is not set
@@ -357,12 +359,10 @@ CONFIG_NET_ETHERNET=y
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_KS8842 is not set
+CONFIG_XILINX_EMACLITE=y
 CONFIG_NETDEV_1000=y
 CONFIG_NETDEV_10000=y
-
-#
-# Wireless LAN
-#
+CONFIG_WLAN=y
 # CONFIG_WLAN_PRE80211 is not set
 # CONFIG_WLAN_80211 is not set
 
@@ -460,6 +460,7 @@ CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
 # CONFIG_DISPLAY_SUPPORT is not set
 # CONFIG_SOUND is not set
 # CONFIG_USB_SUPPORT is not set
+CONFIG_USB_ARCH_HAS_EHCI=y
 # CONFIG_MMC is not set
 # CONFIG_MEMSTICK is not set
 # CONFIG_NEW_LEDS is not set
@@ -488,6 +489,7 @@ CONFIG_EXT2_FS=y
 # CONFIG_GFS2_FS is not set
 # CONFIG_OCFS2_FS is not set
 # CONFIG_BTRFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_FILE_LOCKING=y
 CONFIG_FSNOTIFY=y
 # CONFIG_DNOTIFY is not set
@@ -546,7 +548,6 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
-# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -671,18 +672,20 @@ CONFIG_DEBUG_INFO=y
 # CONFIG_DEBUG_LIST is not set
 # CONFIG_DEBUG_SG is not set
 # CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_DEBUG_CREDENTIALS is not set
 # CONFIG_BOOT_PRINTK_DELAY is not set
 # CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
 # CONFIG_FAULT_INJECTION is not set
 # CONFIG_SYSCTL_SYSCALL_CHECK is not set
 # CONFIG_PAGE_POISONING is not set
 # CONFIG_SAMPLES is not set
 # CONFIG_KMEMCHECK is not set
 CONFIG_EARLY_PRINTK=y
-CONFIG_HEART_BEAT=y
+# CONFIG_HEART_BEAT is not set
 CONFIG_DEBUG_BOOTMEM=y
 
 #
@@ -697,7 +700,6 @@ CONFIG_CRYPTO=y
 #
 # Crypto core or helper
 #
-# CONFIG_CRYPTO_FIPS is not set
 # CONFIG_CRYPTO_MANAGER is not set
 # CONFIG_CRYPTO_MANAGER2 is not set
 # CONFIG_CRYPTO_GF128MUL is not set
@@ -729,11 +731,13 @@ CONFIG_CRYPTO=y
 #
 # CONFIG_CRYPTO_HMAC is not set
 # CONFIG_CRYPTO_XCBC is not set
+# CONFIG_CRYPTO_VMAC is not set
 
 #
 # Digest
 #
 # CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_GHASH is not set
 # CONFIG_CRYPTO_MD4 is not set
 # CONFIG_CRYPTO_MD5 is not set
 # CONFIG_CRYPTO_MICHAEL_MIC is not set
index 8b63861..adb839b 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.31-rc6
-# Tue Aug 18 10:35:30 2009
+# Linux kernel version: 2.6.31
+# Thu Sep 24 10:29:43 2009
 #
 CONFIG_MICROBLAZE=y
 # CONFIG_SWAP is not set
@@ -44,11 +44,12 @@ CONFIG_BSD_PROCESS_ACCT_V3=y
 #
 # RCU Subsystem
 #
-CONFIG_CLASSIC_RCU=y
-# CONFIG_TREE_RCU is not set
-# CONFIG_PREEMPT_RCU is not set
+CONFIG_TREE_RCU=y
+# CONFIG_TREE_PREEMPT_RCU is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RCU_FANOUT=32
+# CONFIG_RCU_FANOUT_EXACT is not set
 # CONFIG_TREE_RCU_TRACE is not set
-# CONFIG_PREEMPT_RCU_TRACE is not set
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=17
@@ -243,6 +244,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
 # CONFIG_NETFILTER is not set
 # CONFIG_IP_DCCP is not set
 # CONFIG_IP_SCTP is not set
+# CONFIG_RDS is not set
 # CONFIG_TIPC is not set
 # CONFIG_ATM is not set
 # CONFIG_BRIDGE is not set
@@ -272,6 +274,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
 # CONFIG_AF_RXRPC is not set
 CONFIG_WIRELESS=y
 # CONFIG_CFG80211 is not set
+CONFIG_CFG80211_DEFAULT_PS_VALUE=0
 CONFIG_WIRELESS_OLD_REGULATORY=y
 # CONFIG_WIRELESS_EXT is not set
 # CONFIG_LIB80211 is not set
@@ -279,7 +282,6 @@ CONFIG_WIRELESS_OLD_REGULATORY=y
 #
 # CFG80211 needs to be enabled for MAC80211
 #
-CONFIG_MAC80211_DEFAULT_PS_VALUE=0
 # CONFIG_WIMAX is not set
 # CONFIG_RFKILL is not set
 # CONFIG_NET_9P is not set
@@ -304,6 +306,7 @@ CONFIG_MTD_PARTITIONS=y
 # CONFIG_MTD_TESTS is not set
 # CONFIG_MTD_REDBOOT_PARTS is not set
 CONFIG_MTD_CMDLINE_PARTS=y
+# CONFIG_MTD_OF_PARTS is not set
 # CONFIG_MTD_AR7_PARTS is not set
 
 #
@@ -349,6 +352,7 @@ CONFIG_MTD_RAM=y
 #
 # CONFIG_MTD_COMPLEX_MAPPINGS is not set
 # CONFIG_MTD_PHYSMAP is not set
+# CONFIG_MTD_PHYSMAP_OF is not set
 CONFIG_MTD_UCLINUX=y
 # CONFIG_MTD_PLATRAM is not set
 
@@ -429,12 +433,10 @@ CONFIG_NET_ETHERNET=y
 # CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
 # CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
 # CONFIG_KS8842 is not set
+# CONFIG_XILINX_EMACLITE is not set
 CONFIG_NETDEV_1000=y
 CONFIG_NETDEV_10000=y
-
-#
-# Wireless LAN
-#
+CONFIG_WLAN=y
 # CONFIG_WLAN_PRE80211 is not set
 # CONFIG_WLAN_80211 is not set
 
@@ -535,7 +537,7 @@ CONFIG_VIDEO_OUTPUT_CONTROL=y
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 # CONFIG_USB_ARCH_HAS_OHCI is not set
-# CONFIG_USB_ARCH_HAS_EHCI is not set
+CONFIG_USB_ARCH_HAS_EHCI=y
 # CONFIG_USB is not set
 # CONFIG_USB_OTG_WHITELIST is not set
 # CONFIG_USB_OTG_BLACKLIST_HUB is not set
@@ -579,6 +581,7 @@ CONFIG_FS_POSIX_ACL=y
 # CONFIG_GFS2_FS is not set
 # CONFIG_OCFS2_FS is not set
 # CONFIG_BTRFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_FILE_LOCKING=y
 CONFIG_FSNOTIFY=y
 # CONFIG_DNOTIFY is not set
@@ -639,7 +642,6 @@ CONFIG_ROMFS_BACKED_BY_BLOCK=y
 CONFIG_ROMFS_ON_BLOCK=y
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
-# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -710,18 +712,20 @@ CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_LIST=y
 CONFIG_DEBUG_SG=y
 # CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_DEBUG_CREDENTIALS is not set
 # CONFIG_BOOT_PRINTK_DELAY is not set
 # CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 # CONFIG_BACKTRACE_SELF_TEST is not set
 # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set
 # CONFIG_FAULT_INJECTION is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 # CONFIG_PAGE_POISONING is not set
 # CONFIG_DYNAMIC_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_EARLY_PRINTK=y
-CONFIG_HEART_BEAT=y
+# CONFIG_HEART_BEAT is not set
 # CONFIG_DEBUG_BOOTMEM is not set
 
 #
@@ -736,7 +740,6 @@ CONFIG_CRYPTO=y
 #
 # Crypto core or helper
 #
-# CONFIG_CRYPTO_FIPS is not set
 # CONFIG_CRYPTO_MANAGER is not set
 # CONFIG_CRYPTO_MANAGER2 is not set
 # CONFIG_CRYPTO_GF128MUL is not set
@@ -768,11 +771,13 @@ CONFIG_CRYPTO=y
 #
 # CONFIG_CRYPTO_HMAC is not set
 # CONFIG_CRYPTO_XCBC is not set
+# CONFIG_CRYPTO_VMAC is not set
 
 #
 # Digest
 #
 # CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_GHASH is not set
 # CONFIG_CRYPTO_MD4 is not set
 # CONFIG_CRYPTO_MD5 is not set
 # CONFIG_CRYPTO_MICHAEL_MIC is not set
diff --git a/arch/microblaze/include/asm/asm-compat.h b/arch/microblaze/include/asm/asm-compat.h
new file mode 100644 (file)
index 0000000..e7bc9dc
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef _ASM_MICROBLAZE_ASM_COMPAT_H
+#define _ASM_MICROBLAZE_ASM_COMPAT_H
+
+#include <asm/types.h>
+
+#ifdef __ASSEMBLY__
+#  define stringify_in_c(...)  __VA_ARGS__
+#  define ASM_CONST(x)         x
+#else
+/* This version of stringify will deal with commas... */
+#  define __stringify_in_c(...)        #__VA_ARGS__
+#  define stringify_in_c(...)  __stringify_in_c(__VA_ARGS__) " "
+#  define __ASM_CONST(x)       x##UL
+#  define ASM_CONST(x)         __ASM_CONST(x)
+#endif
+
+#endif /* _ASM_MICROBLAZE_ASM_COMPAT_H */
index 7c3ec13..fc9997b 100644 (file)
@@ -210,6 +210,9 @@ static inline void __iomem *__ioremap(phys_addr_t address, unsigned long size,
 #define in_be32(a) __raw_readl((const void __iomem __force *)(a))
 #define in_be16(a) __raw_readw(a)
 
+#define writel_be(v, a)        out_be32((__force unsigned *)a, v)
+#define readl_be(a)    in_be32((__force unsigned *)a)
+
 /*
  * Little endian
  */
diff --git a/arch/microblaze/include/asm/ipc.h b/arch/microblaze/include/asm/ipc.h
deleted file mode 100644 (file)
index a46e3d9..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipc.h>
index 72aceae..880c988 100644 (file)
@@ -17,6 +17,7 @@
 
 #include <linux/pfn.h>
 #include <asm/setup.h>
+#include <asm/asm-compat.h>
 #include <linux/const.h>
 
 #ifdef __KERNEL__
@@ -26,6 +27,8 @@
 #define PAGE_SIZE      (_AC(1, UL) << PAGE_SHIFT)
 #define PAGE_MASK      (~(PAGE_SIZE-1))
 
+#define LOAD_OFFSET    ASM_CONST((CONFIG_KERNEL_START-CONFIG_KERNEL_BASE_ADDR))
+
 #ifndef __ASSEMBLY__
 
 #define PAGE_UP(addr)  (((addr)+((PAGE_SIZE)-1))&(~((PAGE_SIZE)-1)))
index 27f8daf..ed67c9e 100644 (file)
@@ -38,7 +38,7 @@ extern void early_console_reg_tlb_alloc(unsigned int addr);
 void time_init(void);
 void init_IRQ(void);
 void machine_early_init(const char *cmdline, unsigned int ram,
-                                               unsigned int fdt);
+                       unsigned int fdt, unsigned int msr);
 
 void machine_restart(char *cmd);
 void machine_shutdown(void);
diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h
new file mode 100644 (file)
index 0000000..048dfcd
--- /dev/null
@@ -0,0 +1,99 @@
+#ifndef __ASM_MICROBLAZE_SYSCALL_H
+#define __ASM_MICROBLAZE_SYSCALL_H
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+
+/* The system call number is given by the user in R12 */
+static inline long syscall_get_nr(struct task_struct *task,
+                                 struct pt_regs *regs)
+{
+       return regs->r12;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+                                   struct pt_regs *regs)
+{
+       /* TODO.  */
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+                                    struct pt_regs *regs)
+{
+       return IS_ERR_VALUE(regs->r3) ? regs->r3 : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+                                           struct pt_regs *regs)
+{
+       return regs->r3;
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+                                           struct pt_regs *regs,
+                                           int error, long val)
+{
+       if (error)
+               regs->r3 = -error;
+       else
+               regs->r3 = val;
+}
+
+static inline microblaze_reg_t microblaze_get_syscall_arg(struct pt_regs *regs,
+                                                         unsigned int n)
+{
+       switch (n) {
+       case 5: return regs->r10;
+       case 4: return regs->r9;
+       case 3: return regs->r8;
+       case 2: return regs->r7;
+       case 1: return regs->r6;
+       case 0: return regs->r5;
+       default:
+               BUG();
+       }
+       return ~0;
+}
+
+static inline void microblaze_set_syscall_arg(struct pt_regs *regs,
+                                             unsigned int n,
+                                             unsigned long val)
+{
+       switch (n) {
+       case 5:
+               regs->r10 = val;
+       case 4:
+               regs->r9 = val;
+       case 3:
+               regs->r8 = val;
+       case 2:
+               regs->r7 = val;
+       case 1:
+               regs->r6 = val;
+       case 0:
+               regs->r5 = val;
+       default:
+               BUG();
+       }
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+                                        struct pt_regs *regs,
+                                        unsigned int i, unsigned int n,
+                                        unsigned long *args)
+{
+       while (n--)
+               *args++ = microblaze_get_syscall_arg(regs, i++);
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+                                        struct pt_regs *regs,
+                                        unsigned int i, unsigned int n,
+                                        const unsigned long *args)
+{
+       while (n--)
+               microblaze_set_syscall_arg(regs, i++, *args++);
+}
+
+#endif /* __ASM_MICROBLAZE_SYSCALL_H */
index c411c67..3539bab 100644 (file)
@@ -28,6 +28,7 @@ const struct cpu_ver_key cpu_ver_lookup[] = {
        {"7.10.d", 0x0b},
        {"7.20.a", 0x0c},
        {"7.20.b", 0x0d},
+       {"7.20.c", 0x0e},
        /* FIXME There is no keycode defined in MBV for these versions */
        {"2.10.a", 0x10},
        {"3.00.a", 0x20},
@@ -49,6 +50,8 @@ const struct family_string_key family_string_lookup[] = {
        {"spartan3a", 0xa},
        {"spartan3an", 0xb},
        {"spartan3adsp", 0xc},
+       {"spartan6", 0xd},
+       {"virtex6", 0xe},
        /* FIXME There is no key code defined for spartan2 */
        {"spartan2", 0xf0},
        {NULL, 0},
index c7353e7..acc1f05 100644 (file)
@@ -308,38 +308,69 @@ C_ENTRY(_user_exception):
        swi     r12, r1, PTO+PT_R0;
        tovirt(r1,r1)
 
-       la      r15, r0, ret_from_trap-8
 /* where the trap should return need -8 to adjust for rtsd r15, 8*/
 /* Jump to the appropriate function for the system call number in r12
  * (r12 is not preserved), or return an error if r12 is not valid. The LP
  * register should point to the location where
  * the called function should return.  [note that MAKE_SYS_CALL uses label 1] */
-       /* See if the system call number is valid.  */
+
+       # Step into virtual mode.
+       set_vms;
+       addik   r11, r0, 3f
+       rtid    r11, 0
+       nop
+3:
+       add     r11, r0, CURRENT_TASK    /* Get current task ptr into r11 */
+       lwi     r11, r11, TS_THREAD_INFO /* get thread info */
+       lwi     r11, r11, TI_FLAGS       /* get flags in thread info */
+       andi    r11, r11, _TIF_WORK_SYSCALL_MASK
+       beqi    r11, 4f
+
+       addik   r3, r0, -ENOSYS
+       swi     r3, r1, PTO + PT_R3
+       brlid   r15, do_syscall_trace_enter
+       addik   r5, r1, PTO + PT_R0
+
+       # do_syscall_trace_enter returns the new syscall nr.
+       addk    r12, r0, r3
+       lwi     r5, r1, PTO+PT_R5;
+       lwi     r6, r1, PTO+PT_R6;
+       lwi     r7, r1, PTO+PT_R7;
+       lwi     r8, r1, PTO+PT_R8;
+       lwi     r9, r1, PTO+PT_R9;
+       lwi     r10, r1, PTO+PT_R10;
+4:
+/* Jump to the appropriate function for the system call number in r12
+ * (r12 is not preserved), or return an error if r12 is not valid.
+ * The LP register should point to the location where the called function
+ * should return.  [note that MAKE_SYS_CALL uses label 1] */
+       /* See if the system call number is valid */
        addi    r11, r12, -__NR_syscalls;
-       bgei    r11,1f;
+       bgei    r11,5f;
        /* Figure out which function to use for this system call.  */
        /* Note Microblaze barrel shift is optional, so don't rely on it */
        add     r12, r12, r12;                  /* convert num -> ptr */
        add     r12, r12, r12;
 
        /* Trac syscalls and stored them to r0_ram */
-       lwi     r3, r12, 0x400 + TOPHYS(r0_ram)
+       lwi     r3, r12, 0x400 + r0_ram
        addi    r3, r3, 1
-       swi     r3, r12, 0x400 + TOPHYS(r0_ram)
+       swi     r3, r12, 0x400 + r0_ram
+
+       # Find and jump into the syscall handler.
+       lwi     r12, r12, sys_call_table
+       /* where the trap should return need -8 to adjust for rtsd r15, 8 */
+       la      r15, r0, ret_from_trap-8
+       bra     r12
 
-       lwi     r12, r12, TOPHYS(sys_call_table); /* Function ptr */
-       /* Make the system call.  to r12*/
-       set_vms;
-       rtid    r12, 0;
-       nop;
        /* The syscall number is invalid, return an error.  */
-1:     VM_ON;  /* RETURN() expects virtual mode*/
+5:
        addi    r3, r0, -ENOSYS;
        rtsd    r15,8;          /* looks like a normal subroutine return */
        or      r0, r0, r0
 
 
-/* Entry point used to return from a syscall/trap */
+/* Entry point used to return from a syscall/trap */
 /* We re-enable BIP bit before state restore */
 C_ENTRY(ret_from_trap):
        set_bip;                        /*  Ints masked for state restore*/
@@ -347,6 +378,23 @@ C_ENTRY(ret_from_trap):
 /* See if returning to kernel mode, if so, skip resched &c.  */
        bnei    r11, 2f;
 
+       /* We're returning to user mode, so check for various conditions that
+        * trigger rescheduling. */
+       # FIXME: Restructure all these flag checks.
+       add     r11, r0, CURRENT_TASK;  /* Get current task ptr into r11 */
+       lwi     r11, r11, TS_THREAD_INFO;       /* get thread info */
+       lwi     r11, r11, TI_FLAGS;             /* get flags in thread info */
+       andi    r11, r11, _TIF_WORK_SYSCALL_MASK
+       beqi    r11, 1f
+
+       swi     r3, r1, PTO + PT_R3
+       swi     r4, r1, PTO + PT_R4
+       brlid   r15, do_syscall_trace_leave
+       addik   r5, r1, PTO + PT_R0
+       lwi     r3, r1, PTO + PT_R3
+       lwi     r4, r1, PTO + PT_R4
+1:
+
        /* We're returning to user mode, so check for various conditions that
         * trigger rescheduling. */
        /* Get current task ptr into r11 */
index 0cb64a3..d9f70f8 100644 (file)
@@ -72,7 +72,8 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
 #endif
 
 #if 0
-       printk(KERN_WARNING "Exception %02x in %s mode, FSR=%08x PC=%08x ESR=%08x\n",
+       printk(KERN_WARNING "Exception %02x in %s mode, FSR=%08x PC=%08x " \
+                                                       "ESR=%08x\n",
                        type, user_mode(regs) ? "user" : "kernel", fsr,
                        (unsigned int) regs->pc, (unsigned int) regs->esr);
 #endif
@@ -80,42 +81,50 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
        switch (type & 0x1F) {
        case MICROBLAZE_ILL_OPCODE_EXCEPTION:
                if (user_mode(regs)) {
-                       printk(KERN_WARNING "Illegal opcode exception in user mode.\n");
+                       pr_debug(KERN_WARNING "Illegal opcode exception " \
+                                                       "in user mode.\n");
                        _exception(SIGILL, regs, ILL_ILLOPC, addr);
                        return;
                }
-               printk(KERN_WARNING "Illegal opcode exception in kernel mode.\n");
+               printk(KERN_WARNING "Illegal opcode exception " \
+                                                       "in kernel mode.\n");
                die("opcode exception", regs, SIGBUS);
                break;
        case MICROBLAZE_IBUS_EXCEPTION:
                if (user_mode(regs)) {
-                       printk(KERN_WARNING "Instruction bus error exception in user mode.\n");
+                       pr_debug(KERN_WARNING "Instruction bus error " \
+                                               "exception in user mode.\n");
                        _exception(SIGBUS, regs, BUS_ADRERR, addr);
                        return;
                }
-               printk(KERN_WARNING "Instruction bus error exception in kernel mode.\n");
+               printk(KERN_WARNING "Instruction bus error exception " \
+                                                       "in kernel mode.\n");
                die("bus exception", regs, SIGBUS);
                break;
        case MICROBLAZE_DBUS_EXCEPTION:
                if (user_mode(regs)) {
-                       printk(KERN_WARNING "Data bus error exception in user mode.\n");
+                       pr_debug(KERN_WARNING "Data bus error exception " \
+                                                       "in user mode.\n");
                        _exception(SIGBUS, regs, BUS_ADRERR, addr);
                        return;
                }
-               printk(KERN_WARNING "Data bus error exception in kernel mode.\n");
+               printk(KERN_WARNING "Data bus error exception " \
+                                                       "in kernel mode.\n");
                die("bus exception", regs, SIGBUS);
                break;
        case MICROBLAZE_DIV_ZERO_EXCEPTION:
                if (user_mode(regs)) {
-                       printk(KERN_WARNING "Divide by zero exception in user mode\n");
-                       _exception(SIGILL, regs, ILL_ILLOPC, addr);
+                       pr_debug(KERN_WARNING "Divide by zero exception " \
+                                                       "in user mode\n");
+                       _exception(SIGILL, regs, FPE_INTDIV, addr);
                        return;
                }
-               printk(KERN_WARNING "Divide by zero exception in kernel mode.\n");
+               printk(KERN_WARNING "Divide by zero exception " \
+                                                       "in kernel mode.\n");
                die("Divide by exception", regs, SIGBUS);
                break;
        case MICROBLAZE_FPU_EXCEPTION:
-               printk(KERN_WARNING "FPU exception\n");
+               pr_debug(KERN_WARNING "FPU exception\n");
                /* IEEE FP exception */
                /* I removed fsr variable and use code var for storing fsr */
                if (fsr & FSR_IO)
@@ -133,7 +142,7 @@ asmlinkage void full_exception(struct pt_regs *regs, unsigned int type,
 
 #ifdef CONFIG_MMU
        case MICROBLAZE_PRIVILEGED_EXCEPTION:
-               printk(KERN_WARNING "Privileged exception\n");
+               pr_debug(KERN_WARNING "Privileged exception\n");
                /* "brk r0,r0" - used as debug breakpoint */
                if (get_user(code, (unsigned long *)regs->pc) == 0
                        && code == 0x980c0000) {
index e41c6ce..697ce30 100644 (file)
@@ -54,6 +54,16 @@ ENTRY(_start)
        mfs     r1, rmsr
        andi    r1, r1, ~2
        mts     rmsr, r1
+/*
+ * Here is checking mechanism which check if Microblaze has msr instructions
+ * We load msr and compare it with previous r1 value - if is the same,
+ * msr instructions works if not - cpu don't have them.
+ */
+       /* r8=0 - I have msr instr, 1 - I don't have them */
+       rsubi   r0, r0, 1       /* set the carry bit */
+       msrclr  r0, 0x4         /* try to clear it */
+       /* read the carry bit, r8 will be '0' if msrclr exists */
+       addik   r8, r0, 0
 
 /* r7 may point to an FDT, or there may be one linked in.
    if it's in r7, we've got to save it away ASAP.
@@ -209,8 +219,8 @@ start_here:
         * Please see $(ARCH)/mach-$(SUBARCH)/setup.c for
         * the function.
         */
-       la      r8, r0, machine_early_init
-       brald   r15, r8
+       la      r9, r0, machine_early_init
+       brald   r15, r9
        nop
 
 #ifndef CONFIG_MMU
index 3288c97..6b0288e 100644 (file)
 #define NUM_TO_REG(num)                r ## num
 
 #ifdef CONFIG_MMU
-/* FIXME you can't change first load of MSR because there is
- * hardcoded jump bri 4 */
        #define RESTORE_STATE                   \
+               lwi     r5, r1, 0;              \
+               mts     rmsr, r5;               \
+               nop;                            \
                lwi     r3, r1, PT_R3;          \
                lwi     r4, r1, PT_R4;          \
                lwi     r5, r1, PT_R5;          \
@@ -309,6 +310,9 @@ _hw_exception_handler:
        lwi     r31, r0, TOPHYS(PER_CPU(CURRENT_SAVE)) /* get saved current */
 #endif
 
+       mfs     r5, rmsr;
+       nop
+       swi     r5, r1, 0;
        mfs     r3, resr
        nop
        mfs     r4, rear;
@@ -380,6 +384,8 @@ handle_other_ex: /* Handle Other exceptions here */
        addk    r8, r17, r0; /* Load exception address */
        bralid  r15, full_exception; /* Branch to the handler */
        nop;
+       mts     r0, rfsr;       /* Clear sticky fsr */
+       nop
 
        /*
         * Trigger execution of the signal handler by enabling
index 00b12c6..4201c74 100644 (file)
@@ -235,6 +235,7 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp)
        regs->pc = pc;
        regs->r1 = usp;
        regs->pt_mode = 0;
+       regs->msr |= MSR_UMS;
 }
 
 #ifdef CONFIG_MMU
index 53ff39a..4b3ac32 100644 (file)
 #include <linux/sched.h>
 #include <linux/ptrace.h>
 #include <linux/signal.h>
+#include <linux/elf.h>
+#include <linux/audit.h>
+#include <linux/seccomp.h>
+#include <linux/tracehook.h>
 
 #include <linux/errno.h>
 #include <asm/processor.h>
@@ -174,6 +178,64 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
        return rval;
 }
 
+asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
+{
+       long ret = 0;
+
+       secure_computing(regs->r12);
+
+       if (test_thread_flag(TIF_SYSCALL_TRACE) &&
+           tracehook_report_syscall_entry(regs))
+               /*
+                * Tracing decided this syscall should not happen.
+                * We'll return a bogus call number to get an ENOSYS
+                * error, but leave the original number in regs->regs[0].
+                */
+               ret = -1L;
+
+       if (unlikely(current->audit_context))
+               audit_syscall_entry(EM_XILINX_MICROBLAZE, regs->r12,
+                                   regs->r5, regs->r6,
+                                   regs->r7, regs->r8);
+
+       return ret ?: regs->r12;
+}
+
+asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
+{
+       int step;
+
+       if (unlikely(current->audit_context))
+               audit_syscall_exit(AUDITSC_RESULT(regs->r3), regs->r3);
+
+       step = test_thread_flag(TIF_SINGLESTEP);
+       if (step || test_thread_flag(TIF_SYSCALL_TRACE))
+               tracehook_report_syscall_exit(regs, step);
+}
+
+#if 0
+static asmlinkage void syscall_trace(void)
+{
+       if (!test_thread_flag(TIF_SYSCALL_TRACE))
+               return;
+       if (!(current->ptrace & PT_PTRACED))
+               return;
+       /* The 0x80 provides a way for the tracing parent to distinguish
+        between a syscall stop and SIGTRAP delivery */
+       ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+                               ? 0x80 : 0));
+       /*
+        * this isn't the same as continuing with a signal, but it will do
+        * for normal use. strace only continues with a signal if the
+        * stopping signal is not SIGTRAP. -brl
+        */
+       if (current->exit_code) {
+               send_sig(current->exit_code, current, 1);
+               current->exit_code = 0;
+       }
+}
+#endif
+
 void ptrace_disable(struct task_struct *child)
 {
        /* nothing to do */
index 2a97bf5..8c1e0f4 100644 (file)
@@ -94,7 +94,7 @@ inline unsigned get_romfs_len(unsigned *addr)
 #endif /* CONFIG_MTD_UCLINUX_EBSS */
 
 void __init machine_early_init(const char *cmdline, unsigned int ram,
-               unsigned int fdt)
+               unsigned int fdt, unsigned int msr)
 {
        unsigned long *src, *dst = (unsigned long *)0x0;
 
@@ -157,6 +157,16 @@ void __init machine_early_init(const char *cmdline, unsigned int ram,
        early_printk("New klimit: 0x%08x\n", (unsigned)klimit);
 #endif
 
+#if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR
+       if (msr)
+               early_printk("!!!Your kernel has setup MSR instruction but "
+                               "CPU don't have it %d\n", msr);
+#else
+       if (!msr)
+               early_printk("!!!Your kernel not setup MSR instruction but "
+                               "CPU have it %d\n", msr);
+#endif
+
        for (src = __ivt_start; src < __ivt_end; src++, dst++)
                *dst = *src;
 
index ec5fa91..e704188 100644 (file)
@@ -12,13 +12,16 @@ OUTPUT_FORMAT("elf32-microblaze", "elf32-microblaze", "elf32-microblaze")
 OUTPUT_ARCH(microblaze)
 ENTRY(_start)
 
+#include <asm/page.h>
 #include <asm-generic/vmlinux.lds.h>
+#include <asm/thread_info.h>
 
 jiffies = jiffies_64 + 4;
 
 SECTIONS {
        . = CONFIG_KERNEL_START;
-       .text : {
+       _start = CONFIG_KERNEL_BASE_ADDR;
+       .text : AT(ADDR(.text) - LOAD_OFFSET) {
                _text = . ;
                _stext = . ;
                *(.text .text.*)
@@ -33,24 +36,22 @@ SECTIONS {
        }
 
        . = ALIGN (4) ;
-       _fdt_start = . ; /* place for fdt blob */
-       . = . + 0x4000;
-       _fdt_end = . ;
+       __fdt_blob : AT(ADDR(__fdt_blob) - LOAD_OFFSET) {
+               _fdt_start = . ;                /* place for fdt blob */
+               *(__fdt_blob) ;                 /* Any link-placed DTB */
+               . = _fdt_start + 0x4000;        /* Pad up to 16kbyte */
+               _fdt_end = . ;
+       }
 
        . = ALIGN(16);
        RODATA
-       . = ALIGN(16);
-       __ex_table : {
-               __start___ex_table = .;
-               *(__ex_table)
-               __stop___ex_table = .;
-       }
+       EXCEPTION_TABLE(16)
 
        /*
         * sdata2 section can go anywhere, but must be word aligned
         * and SDA2_BASE must point to the middle of it
         */
-       .sdata2 : {
+       .sdata2 : AT(ADDR(.sdata2) - LOAD_OFFSET) {
                _ssrw = .;
                . = ALIGN(4096); /* page aligned when MMU used - origin 0x8 */
                *(.sdata2)
@@ -61,12 +62,7 @@ SECTIONS {
        }
 
        _sdata = . ;
-       .data ALIGN (4096) : { /* page aligned when MMU used - origin 0x4 */
-               DATA_DATA
-               CONSTRUCTORS
-       }
-       . = ALIGN(32);
-       .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+       RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE)
        _edata = . ;
 
        /* Reserve some low RAM for r0 based memory references */
@@ -74,18 +70,14 @@ SECTIONS {
        r0_ram = . ;
        . = . +  4096;  /* a page should be enough */
 
-       /* The initial task */
-       . = ALIGN(8192);
-       .data.init_task : { *(.data.init_task) }
-
        /* Under the microblaze ABI, .sdata and .sbss must be contiguous */
        . = ALIGN(8);
-       .sdata : {
+       .sdata : AT(ADDR(.sdata) - LOAD_OFFSET) {
                _ssro = .;
                *(.sdata)
        }
 
-       .sbss : {
+       .sbss : AT(ADDR(.sbss) - LOAD_OFFSET) {
                _ssbss = .;
                *(.sbss)
                _esbss = .;
@@ -96,47 +88,36 @@ SECTIONS {
 
        __init_begin = .;
 
-       . = ALIGN(4096);
-       .init.text : {
-               _sinittext = . ;
-               INIT_TEXT
-               _einittext = .;
-       }
+       INIT_TEXT_SECTION(PAGE_SIZE)
 
-       .init.data : {
+       .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
                INIT_DATA
        }
 
        . = ALIGN(4);
-       .init.ivt : {
+       .init.ivt : AT(ADDR(.init.ivt) - LOAD_OFFSET) {
                __ivt_start = .;
                *(.init.ivt)
                __ivt_end = .;
        }
 
-       .init.setup : {
-               __setup_start = .;
-               *(.init.setup)
-               __setup_end = .;
+       .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
+               INIT_SETUP(0)
        }
 
-       .initcall.init : {
-               __initcall_start = .;
-               INITCALLS
-               __initcall_end = .;
+       .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET ) {
+               INIT_CALLS
        }
 
-       .con_initcall.init : {
-               __con_initcall_start = .;
-               *(.con_initcall.init)
-               __con_initcall_end = .;
+       .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+               CON_INITCALL
        }
 
        SECURITY_INIT
 
        __init_end_before_initramfs = .;
 
-       .init.ramfs ALIGN(4096) : {
+       .init.ramfs ALIGN(4096) : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
                __initramfs_start = .;
                *(.init.ramfs)
                __initramfs_end = .;
@@ -152,7 +133,8 @@ SECTIONS {
        }
        __init_end = .;
 
-       .bss ALIGN (4096) : { /* page aligned when MMU used */
+       .bss ALIGN (4096) : AT(ADDR(.bss) - LOAD_OFFSET) {
+               /* page aligned when MMU used */
                __bss_start = . ;
                        *(.bss*)
                        *(COMMON)
index 1110784..a44892e 100644 (file)
@@ -180,7 +180,8 @@ void free_initrd_mem(unsigned long start, unsigned long end)
                totalram_pages++;
                pages++;
        }
-       printk(KERN_NOTICE "Freeing initrd memory: %dk freed\n", pages);
+       printk(KERN_NOTICE "Freeing initrd memory: %dk freed\n",
+                                       (int)(pages * (PAGE_SIZE / 1024)));
 }
 #endif
 
index 3f04d4c..b3deed8 100644 (file)
@@ -56,12 +56,12 @@ int sysctl_lasatstring(ctl_table *table,
 
 
 /* And the same for proc */
-int proc_dolasatstring(ctl_table *table, int write, struct file *filp,
+int proc_dolasatstring(ctl_table *table, int write,
                       void *buffer, size_t *lenp, loff_t *ppos)
 {
        int r;
 
-       r = proc_dostring(table, write, filp, buffer, lenp, ppos);
+       r = proc_dostring(table, write, buffer, lenp, ppos);
        if ((!write) || r)
                return r;
 
@@ -71,12 +71,12 @@ int proc_dolasatstring(ctl_table *table, int write, struct file *filp,
 }
 
 /* proc function to write EEPROM after changing int entry */
-int proc_dolasatint(ctl_table *table, int write, struct file *filp,
+int proc_dolasatint(ctl_table *table, int write,
                       void *buffer, size_t *lenp, loff_t *ppos)
 {
        int r;
 
-       r = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       r = proc_dointvec(table, write, buffer, lenp, ppos);
        if ((!write) || r)
                return r;
 
@@ -89,7 +89,7 @@ int proc_dolasatint(ctl_table *table, int write, struct file *filp,
 static int rtctmp;
 
 /* proc function to read/write RealTime Clock */
-int proc_dolasatrtc(ctl_table *table, int write, struct file *filp,
+int proc_dolasatrtc(ctl_table *table, int write,
                       void *buffer, size_t *lenp, loff_t *ppos)
 {
        struct timespec ts;
@@ -102,7 +102,7 @@ int proc_dolasatrtc(ctl_table *table, int write, struct file *filp,
                if (rtctmp < 0)
                        rtctmp = 0;
        }
-       r = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       r = proc_dointvec(table, write, buffer, lenp, ppos);
        if (r)
                return r;
 
@@ -154,7 +154,7 @@ int sysctl_lasat_rtc(ctl_table *table,
 #endif
 
 #ifdef CONFIG_INET
-int proc_lasat_ip(ctl_table *table, int write, struct file *filp,
+int proc_lasat_ip(ctl_table *table, int write,
                       void *buffer, size_t *lenp, loff_t *ppos)
 {
        unsigned int ip;
@@ -231,12 +231,12 @@ static int sysctl_lasat_prid(ctl_table *table,
        return 0;
 }
 
-int proc_lasat_prid(ctl_table *table, int write, struct file *filp,
+int proc_lasat_prid(ctl_table *table, int write,
                       void *buffer, size_t *lenp, loff_t *ppos)
 {
        int r;
 
-       r = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       r = proc_dointvec(table, write, buffer, lenp, ppos);
        if (r < 0)
                return r;
        if (write) {
index 76f41bd..10549dc 100644 (file)
@@ -44,24 +44,8 @@ SECTIONS
   RO_DATA(PAGE_SIZE)
 
   /* writeable */
-  .data : {                    /* Data */
-       DATA_DATA
-       CONSTRUCTORS
-       }
-
-  .data_nosave : { NOSAVE_DATA; }
-
-  .data.page_aligned : { PAGE_ALIGNED_DATA(PAGE_SIZE); }
-  .data.cacheline_aligned : { CACHELINE_ALIGNED_DATA(32); }
-
-  /* rarely changed data like cpu maps */
-  . = ALIGN(32);
-  .data.read_mostly : AT(ADDR(.data.read_mostly)) {
-       READ_MOSTLY_DATA(32);
-       _edata = .;             /* End of data section */
-  }
-
-  .data.init_task : { INIT_TASK_DATA(THREAD_SIZE); }
+  RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE)
+  _edata = .;
 
   /* might get freed after init */
   . = ALIGN(PAGE_SIZE);
@@ -74,22 +58,8 @@ SECTIONS
   /* will be freed after init */
   . = ALIGN(PAGE_SIZE);                /* Init code and data */
   __init_begin = .;
-  .init.text : {
-       _sinittext = .;
-       INIT_TEXT;
-       _einittext = .;
-  }
-  .init.data : { INIT_DATA; }
-  .setup.init : { INIT_SETUP(16); }
-
-  __initcall_start = .;
-  .initcall.init : {
-       INITCALLS
-  }
-  __initcall_end = .;
-  .con_initcall.init : { CON_INITCALL; }
-
-  SECURITY_INIT
+  INIT_TEXT_SECTION(PAGE_SIZE)
+  INIT_DATA_SECTION(16)
   . = ALIGN(4);
   __alt_instructions = .;
   .altinstructions : { *(.altinstructions) }
@@ -100,8 +70,6 @@ SECTIONS
        .exit.text : { EXIT_TEXT; }
        .exit.data : { EXIT_DATA; }
 
-  .init.ramfs : { INIT_RAM_FS; }
-
   PERCPU(32)
   . = ALIGN(PAGE_SIZE);
   __init_end = .;
index 1e1c824..5f39d55 100644 (file)
@@ -28,6 +28,8 @@
 #define F_SETOWN       12      /*  for sockets. */
 #define F_SETSIG       13      /*  for sockets. */
 #define F_GETSIG       14      /*  for sockets. */
+#define F_GETOWN_EX    15
+#define F_SETOWN_EX    16
 
 /* for posix fcntl() and lockf() */
 #define F_RDLCK                01
index aea1784..775be27 100644 (file)
@@ -77,13 +77,7 @@ SECTIONS
         */
        . = ALIGN(PAGE_SIZE);
        data_start = .;
-       . = ALIGN(16);
-       /* Exception table */
-       __ex_table : {
-               __start___ex_table = .;
-               *(__ex_table)
-               __stop___ex_table = .;
-       }
+       EXCEPTION_TABLE(16)
 
        NOTES
 
@@ -94,23 +88,8 @@ SECTIONS
                __stop___unwind = .;
        }
 
-       /* rarely changed data like cpu maps */
-       . = ALIGN(16);
-       .data.read_mostly : {
-               *(.data.read_mostly)
-       }
-
-       . = ALIGN(L1_CACHE_BYTES);
        /* Data */
-       .data : {
-               DATA_DATA
-               CONSTRUCTORS
-       }
-
-       . = ALIGN(L1_CACHE_BYTES);
-       .data.cacheline_aligned : {
-               *(.data.cacheline_aligned)
-       }
+       RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
 
        /* PA-RISC locks requires 16-byte alignment */
        . = ALIGN(16);
@@ -118,17 +97,6 @@ SECTIONS
                *(.data.lock_aligned)
        }
 
-       /* nosave data is really only used for software suspend...it's here
-        * just in case we ever implement it
-        */
-       . = ALIGN(PAGE_SIZE);
-       __nosave_begin = .;
-       .data_nosave : {
-               *(.data.nosave)
-       }
-       . = ALIGN(PAGE_SIZE);
-       __nosave_end = .;
-
        /* End of data section */
        _edata = .;
 
@@ -147,14 +115,6 @@ SECTIONS
        }
        __bss_stop = .;
 
-
-       /* assembler code expects init_task to be 16k aligned */
-       . = ALIGN(16384);
-       /* init_task */
-       .data.init_task : {
-               *(.data.init_task)
-       }
-
 #ifdef CONFIG_64BIT
        . = ALIGN(16);
        /* Linkage tables */
@@ -172,64 +132,8 @@ SECTIONS
        /* reserve space for interrupt stack by aligning __init* to 16k */
        . = ALIGN(16384);
        __init_begin = .;
-       .init.text : { 
-               _sinittext = .;
-               INIT_TEXT
-               _einittext = .;
-       }
-       .init.data : {
-               INIT_DATA
-       }
-       . = ALIGN(16);
-       .init.setup : {
-               __setup_start = .;
-               *(.init.setup)
-               __setup_end = .;
-       }
-       .initcall.init : {
-               __initcall_start = .;
-               INITCALLS
-               __initcall_end = .;
-       }
-       .con_initcall.init : {
-               __con_initcall_start = .;
-               *(.con_initcall.init)
-               __con_initcall_end = .;
-       }
-       SECURITY_INIT
-
-       /* alternate instruction replacement.  This is a mechanism x86 uses
-        * to detect the CPU type and replace generic instruction sequences
-        * with CPU specific ones.  We don't currently do this in PA, but
-        * it seems like a good idea...
-        */
-       . = ALIGN(4);
-       .altinstructions : {
-               __alt_instructions = .;
-               *(.altinstructions)
-               __alt_instructions_end = .; 
-       } 
-       .altinstr_replacement : {
-               *(.altinstr_replacement)
-       } 
-
-       /* .exit.text is discard at runtime, not link time, to deal with references
-        *  from .altinstructions and .eh_frame
-        */
-       .exit.text : {
-               EXIT_TEXT
-       }
-       .exit.data : {
-               EXIT_DATA
-       }
-#ifdef CONFIG_BLK_DEV_INITRD
-       . = ALIGN(PAGE_SIZE);
-       .init.ramfs : {
-               __initramfs_start = .;
-               *(.init.ramfs)
-               __initramfs_end = .;
-       }
-#endif
+       INIT_TEXT_SECTION(16384)
+       INIT_DATA_SECTION(16)
 
        PERCPU(PAGE_SIZE)
        . = ALIGN(PAGE_SIZE);
index 4fd4790..10a0a54 100644 (file)
@@ -385,9 +385,15 @@ config NUMA
 
 config NODES_SHIFT
        int
+       default "8" if PPC64
        default "4"
        depends on NEED_MULTIPLE_NODES
 
+config MAX_ACTIVE_REGIONS
+       int
+       default "256" if PPC64
+       default "32"
+
 config ARCH_SELECT_MEMORY_MODEL
        def_bool y
        depends on PPC64
index aacf629..1a54a3b 100644 (file)
@@ -164,6 +164,17 @@ PHONY += $(BOOT_TARGETS)
 
 boot := arch/$(ARCH)/boot
 
+ifeq ($(CONFIG_RELOCATABLE),y)
+quiet_cmd_relocs_check = CALL    $<
+      cmd_relocs_check = perl $< "$(OBJDUMP)" "$(obj)/vmlinux"
+
+PHONY += relocs_check
+relocs_check: arch/powerpc/relocs_check.pl vmlinux
+       $(call cmd,relocs_check)
+
+zImage: relocs_check
+endif
+
 $(BOOT_TARGETS): vmlinux
        $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
 
index 9dade15..6d94d27 100644 (file)
@@ -15,7 +15,16 @@ struct dev_archdata {
 
        /* DMA operations on that device */
        struct dma_map_ops      *dma_ops;
-       void                    *dma_data;
+
+       /*
+        * When an iommu is in use, dma_data is used as a ptr to the base of the
+        * iommu_table.  Otherwise, it is a simple numerical offset.
+        */
+       union {
+               dma_addr_t      dma_offset;
+               void            *iommu_table_base;
+       } dma_data;
+
 #ifdef CONFIG_SWIOTLB
        dma_addr_t              max_direct_dma_addr;
 #endif
index cb2ca41..e281dae 100644 (file)
@@ -26,7 +26,6 @@ extern void *dma_direct_alloc_coherent(struct device *dev, size_t size,
 extern void dma_direct_free_coherent(struct device *dev, size_t size,
                                     void *vaddr, dma_addr_t dma_handle);
 
-extern unsigned long get_dma_direct_offset(struct device *dev);
 
 #ifdef CONFIG_NOT_COHERENT_CACHE
 /*
@@ -90,6 +89,28 @@ static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
        dev->archdata.dma_ops = ops;
 }
 
+/*
+ * get_dma_offset()
+ *
+ * Get the dma offset on configurations where the dma address can be determined
+ * from the physical address by looking at a simple offset.  Direct dma and
+ * swiotlb use this function, but it is typically not used by implementations
+ * with an iommu.
+ */
+static inline dma_addr_t get_dma_offset(struct device *dev)
+{
+       if (dev)
+               return dev->archdata.dma_data.dma_offset;
+
+       return PCI_DRAM_OFFSET;
+}
+
+static inline void set_dma_offset(struct device *dev, dma_addr_t off)
+{
+       if (dev)
+               dev->archdata.dma_data.dma_offset = off;
+}
+
 /* this will be removed soon */
 #define flush_write_buffers()
 
@@ -181,12 +202,12 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 
 static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
-       return paddr + get_dma_direct_offset(dev);
+       return paddr + get_dma_offset(dev);
 }
 
 static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
-       return daddr - get_dma_direct_offset(dev);
+       return daddr - get_dma_offset(dev);
 }
 
 #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
diff --git a/arch/powerpc/include/asm/fsldma.h b/arch/powerpc/include/asm/fsldma.h
new file mode 100644 (file)
index 0000000..a67aeed
--- /dev/null
@@ -0,0 +1,136 @@
+/*
+ * Freescale MPC83XX / MPC85XX DMA Controller
+ *
+ * Copyright (c) 2009 Ira W. Snyder <iws@ovro.caltech.edu>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __ARCH_POWERPC_ASM_FSLDMA_H__
+#define __ARCH_POWERPC_ASM_FSLDMA_H__
+
+#include <linux/dmaengine.h>
+
+/*
+ * Definitions for the Freescale DMA controller's DMA_SLAVE implemention
+ *
+ * The Freescale DMA_SLAVE implementation was designed to handle many-to-many
+ * transfers. An example usage would be an accelerated copy between two
+ * scatterlists. Another example use would be an accelerated copy from
+ * multiple non-contiguous device buffers into a single scatterlist.
+ *
+ * A DMA_SLAVE transaction is defined by a struct fsl_dma_slave. This
+ * structure contains a list of hardware addresses that should be copied
+ * to/from the scatterlist passed into device_prep_slave_sg(). The structure
+ * also has some fields to enable hardware-specific features.
+ */
+
+/**
+ * struct fsl_dma_hw_addr
+ * @entry: linked list entry
+ * @address: the hardware address
+ * @length: length to transfer
+ *
+ * Holds a single physical hardware address / length pair for use
+ * with the DMAEngine DMA_SLAVE API.
+ */
+struct fsl_dma_hw_addr {
+       struct list_head entry;
+
+       dma_addr_t address;
+       size_t length;
+};
+
+/**
+ * struct fsl_dma_slave
+ * @addresses: a linked list of struct fsl_dma_hw_addr structures
+ * @request_count: value for DMA request count
+ * @src_loop_size: setup and enable constant source-address DMA transfers
+ * @dst_loop_size: setup and enable constant destination address DMA transfers
+ * @external_start: enable externally started DMA transfers
+ * @external_pause: enable externally paused DMA transfers
+ *
+ * Holds a list of address / length pairs for use with the DMAEngine
+ * DMA_SLAVE API implementation for the Freescale DMA controller.
+ */
+struct fsl_dma_slave {
+
+       /* List of hardware address/length pairs */
+       struct list_head addresses;
+
+       /* Support for extra controller features */
+       unsigned int request_count;
+       unsigned int src_loop_size;
+       unsigned int dst_loop_size;
+       bool external_start;
+       bool external_pause;
+};
+
+/**
+ * fsl_dma_slave_append - add an address/length pair to a struct fsl_dma_slave
+ * @slave: the &struct fsl_dma_slave to add to
+ * @address: the hardware address to add
+ * @length: the length of bytes to transfer from @address
+ *
+ * Add a hardware address/length pair to a struct fsl_dma_slave. Returns 0 on
+ * success, -ERRNO otherwise.
+ */
+static inline int fsl_dma_slave_append(struct fsl_dma_slave *slave,
+                                      dma_addr_t address, size_t length)
+{
+       struct fsl_dma_hw_addr *addr;
+
+       addr = kzalloc(sizeof(*addr), GFP_ATOMIC);
+       if (!addr)
+               return -ENOMEM;
+
+       INIT_LIST_HEAD(&addr->entry);
+       addr->address = address;
+       addr->length = length;
+
+       list_add_tail(&addr->entry, &slave->addresses);
+       return 0;
+}
+
+/**
+ * fsl_dma_slave_free - free a struct fsl_dma_slave
+ * @slave: the struct fsl_dma_slave to free
+ *
+ * Free a struct fsl_dma_slave and all associated address/length pairs
+ */
+static inline void fsl_dma_slave_free(struct fsl_dma_slave *slave)
+{
+       struct fsl_dma_hw_addr *addr, *tmp;
+
+       if (slave) {
+               list_for_each_entry_safe(addr, tmp, &slave->addresses, entry) {
+                       list_del(&addr->entry);
+                       kfree(addr);
+               }
+
+               kfree(slave);
+       }
+}
+
+/**
+ * fsl_dma_slave_alloc - allocate a struct fsl_dma_slave
+ * @gfp: the flags to pass to kmalloc when allocating this structure
+ *
+ * Allocate a struct fsl_dma_slave for use by the DMA_SLAVE API. Returns a new
+ * struct fsl_dma_slave on success, or NULL on failure.
+ */
+static inline struct fsl_dma_slave *fsl_dma_slave_alloc(gfp_t gfp)
+{
+       struct fsl_dma_slave *slave;
+
+       slave = kzalloc(sizeof(*slave), gfp);
+       if (!slave)
+               return NULL;
+
+       INIT_LIST_HEAD(&slave->addresses);
+       return slave;
+}
+
+#endif /* __ARCH_POWERPC_ASM_FSLDMA_H__ */
index 7464c0d..edfc980 100644 (file)
@@ -70,6 +70,16 @@ struct iommu_table {
 
 struct scatterlist;
 
+static inline void set_iommu_table_base(struct device *dev, void *base)
+{
+       dev->archdata.dma_data.iommu_table_base = base;
+}
+
+static inline void *get_iommu_table_base(struct device *dev)
+{
+       return dev->archdata.dma_data.iommu_table_base;
+}
+
 /* Frees table for an individual device node */
 extern void iommu_free_table(struct iommu_table *tbl, const char *node_name);
 
index ccc68b5..5a9ede4 100644 (file)
@@ -29,7 +29,7 @@ int reserve_pmc_hardware(perf_irq_t new_perf_irq);
 void release_pmc_hardware(void);
 void ppc_enable_pmcs(void);
 
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S_64
 #include <asm/lppaca.h>
 
 static inline void ppc_set_pmu_inuse(int inuse)
index 6c3e1f4..ec0b0b0 100644 (file)
@@ -43,6 +43,7 @@
 #define        _PAGE_NO_CACHE  0x004   /* I: caching is inhibited */
 #define        _PAGE_WRITETHRU 0x008   /* W: caching is write-through */
 #define        _PAGE_USER      0x010   /* matches one of the zone permission bits */
+#define        _PAGE_SPECIAL   0x020   /* software: Special page */
 #define        _PAGE_RW        0x040   /* software: Writes permitted */
 #define        _PAGE_DIRTY     0x080   /* software: dirty page */
 #define _PAGE_HWWRITE  0x100   /* hardware: Dirty & RW, set in exception */
index 94e9797..dd5ea95 100644 (file)
@@ -32,6 +32,7 @@
 #define _PAGE_FILE     0x0002  /* when !present: nonlinear file mapping */
 #define _PAGE_NO_CACHE 0x0002  /* I: cache inhibit */
 #define _PAGE_SHARED   0x0004  /* No ASID (context) compare */
+#define _PAGE_SPECIAL  0x0008  /* SW entry, forced to 0 by the TLB miss */
 
 /* These five software bits must be masked out when the entry is loaded
  * into the TLB.
index c3b6507..f2b3701 100644 (file)
@@ -25,9 +25,6 @@
 #ifndef _PAGE_WRITETHRU
 #define _PAGE_WRITETHRU        0
 #endif
-#ifndef _PAGE_SPECIAL
-#define _PAGE_SPECIAL  0
-#endif
 #ifndef _PAGE_4K_PFN
 #define _PAGE_4K_PFN           0
 #endif
@@ -179,7 +176,5 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void);
 #define HAVE_PAGE_AGP
 
 /* Advertise support for _PAGE_SPECIAL */
-#ifdef _PAGE_SPECIAL
 #define __HAVE_ARCH_PTE_SPECIAL
-#endif
 
index 87ddb3f..37771a5 100644 (file)
@@ -18,7 +18,7 @@
 static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
                                      dma_addr_t *dma_handle, gfp_t flag)
 {
-       return iommu_alloc_coherent(dev, dev->archdata.dma_data, size,
+       return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
                                    dma_handle, device_to_mask(dev), flag,
                                    dev_to_node(dev));
 }
@@ -26,7 +26,7 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
 static void dma_iommu_free_coherent(struct device *dev, size_t size,
                                    void *vaddr, dma_addr_t dma_handle)
 {
-       iommu_free_coherent(dev->archdata.dma_data, size, vaddr, dma_handle);
+       iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
 }
 
 /* Creates TCEs for a user provided buffer.  The user buffer must be
@@ -39,8 +39,8 @@ static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page,
                                     enum dma_data_direction direction,
                                     struct dma_attrs *attrs)
 {
-       return iommu_map_page(dev, dev->archdata.dma_data, page, offset, size,
-                             device_to_mask(dev), direction, attrs);
+       return iommu_map_page(dev, get_iommu_table_base(dev), page, offset,
+                             size, device_to_mask(dev), direction, attrs);
 }
 
 
@@ -48,7 +48,7 @@ static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
                                 size_t size, enum dma_data_direction direction,
                                 struct dma_attrs *attrs)
 {
-       iommu_unmap_page(dev->archdata.dma_data, dma_handle, size, direction,
+       iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction,
                         attrs);
 }
 
@@ -57,7 +57,7 @@ static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
                            int nelems, enum dma_data_direction direction,
                            struct dma_attrs *attrs)
 {
-       return iommu_map_sg(dev, dev->archdata.dma_data, sglist, nelems,
+       return iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
                            device_to_mask(dev), direction, attrs);
 }
 
@@ -65,14 +65,14 @@ static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
                int nelems, enum dma_data_direction direction,
                struct dma_attrs *attrs)
 {
-       iommu_unmap_sg(dev->archdata.dma_data, sglist, nelems, direction,
+       iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems, direction,
                       attrs);
 }
 
 /* We support DMA to/from any memory page via the iommu */
 static int dma_iommu_dma_supported(struct device *dev, u64 mask)
 {
-       struct iommu_table *tbl = dev->archdata.dma_data;
+       struct iommu_table *tbl = get_iommu_table_base(dev);
 
        if (!tbl || tbl->it_offset > mask) {
                printk(KERN_INFO
index 21b784d..6215062 100644 (file)
  * default the offset is PCI_DRAM_OFFSET.
  */
 
-unsigned long get_dma_direct_offset(struct device *dev)
-{
-       if (dev)
-               return (unsigned long)dev->archdata.dma_data;
-
-       return PCI_DRAM_OFFSET;
-}
 
 void *dma_direct_alloc_coherent(struct device *dev, size_t size,
                                dma_addr_t *dma_handle, gfp_t flag)
@@ -37,7 +30,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size,
        ret = __dma_alloc_coherent(dev, size, dma_handle, flag);
        if (ret == NULL)
                return NULL;
-       *dma_handle += get_dma_direct_offset(dev);
+       *dma_handle += get_dma_offset(dev);
        return ret;
 #else
        struct page *page;
@@ -51,7 +44,7 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size,
                return NULL;
        ret = page_address(page);
        memset(ret, 0, size);
-       *dma_handle = virt_to_abs(ret) + get_dma_direct_offset(dev);
+       *dma_handle = virt_to_abs(ret) + get_dma_offset(dev);
 
        return ret;
 #endif
@@ -75,7 +68,7 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
        int i;
 
        for_each_sg(sgl, sg, nents, i) {
-               sg->dma_address = sg_phys(sg) + get_dma_direct_offset(dev);
+               sg->dma_address = sg_phys(sg) + get_dma_offset(dev);
                sg->dma_length = sg->length;
                __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
        }
@@ -110,7 +103,7 @@ static inline dma_addr_t dma_direct_map_page(struct device *dev,
 {
        BUG_ON(dir == DMA_NONE);
        __dma_sync_page(page, offset, size, dir);
-       return page_to_phys(page) + offset + get_dma_direct_offset(dev);
+       return page_to_phys(page) + offset + get_dma_offset(dev);
 }
 
 static inline void dma_direct_unmap_page(struct device *dev,
index 9048f96..24dcc0e 100644 (file)
@@ -17,7 +17,6 @@
 #include <asm/cputable.h>
 #include <asm/setup.h>
 #include <asm/thread_info.h>
-#include <asm/reg.h>
 #include <asm/exception-64e.h>
 #include <asm/bug.h>
 #include <asm/irqflags.h>
index e9f4840..bb8209e 100644 (file)
@@ -1117,7 +1117,7 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus)
 
                /* Hook up default DMA ops */
                sd->dma_ops = pci_dma_ops;
-               sd->dma_data = (void *)PCI_DRAM_OFFSET;
+               set_dma_offset(&dev->dev, PCI_DRAM_OFFSET);
 
                /* Additional platform DMA/iommu setup */
                if (ppc_md.pci_dma_dev_setup)
index 0a32164..1168c5f 100644 (file)
@@ -1165,7 +1165,22 @@ static inline unsigned long brk_rnd(void)
 
 unsigned long arch_randomize_brk(struct mm_struct *mm)
 {
-       unsigned long ret = PAGE_ALIGN(mm->brk + brk_rnd());
+       unsigned long base = mm->brk;
+       unsigned long ret;
+
+#ifdef CONFIG_PPC64
+       /*
+        * If we are using 1TB segments and we are allowed to randomise
+        * the heap, we can put it above 1TB so it is backed by a 1TB
+        * segment. Otherwise the heap will be in the bottom 1TB
+        * which always uses 256MB segments and this may result in a
+        * performance penalty.
+        */
+       if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T))
+               base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T);
+#endif
+
+       ret = PAGE_ALIGN(base + brk_rnd());
 
        if (ret < mm->brk)
                return mm->brk;
index 864334b..bafac2e 100644 (file)
@@ -800,7 +800,7 @@ static void __init prom_send_capabilities(void)
        root = call_prom("open", 1, 1, ADDR("/"));
        if (root != 0) {
                /* try calling the ibm,client-architecture-support method */
-               prom_printf("Calling ibm,client-architecture...");
+               prom_printf("Calling ibm,client-architecture-support...");
                if (call_prom_ret("call-method", 3, 2, &ret,
                                  ADDR("ibm,client-architecture-support"),
                                  root,
@@ -814,6 +814,7 @@ static void __init prom_send_capabilities(void)
                        return;
                }
                call_prom("close", 1, 0, root);
+               prom_printf(" not implemented\n");
        }
 
        /* no ibm,client-architecture-support call, try the old way */
index 3faaf29..94e2df3 100644 (file)
@@ -240,6 +240,13 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
                goto fail_mmapsem;
        }
 
+       /*
+        * Put vDSO base into mm struct. We need to do this before calling
+        * install_special_mapping or the perf counter mmap tracking code
+        * will fail to recognise it as a vDSO (since arch_vma_name fails).
+        */
+       current->mm->context.vdso_base = vdso_base;
+
        /*
         * our vma flags don't have VM_WRITE so by default, the process isn't
         * allowed to write those pages.
@@ -260,11 +267,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
                                     VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
                                     VM_ALWAYSDUMP,
                                     vdso_pagelist);
-       if (rc)
+       if (rc) {
+               current->mm->context.vdso_base = 0;
                goto fail_mmapsem;
-
-       /* Put vDSO base into mm struct */
-       current->mm->context.vdso_base = vdso_base;
+       }
 
        up_write(&mm->mmap_sem);
        return 0;
index bc7b41e..77f6421 100644 (file)
@@ -1054,6 +1054,8 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
                return NULL;
 
        tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
+       if (tbl == NULL)
+               return NULL;
 
        of_parse_dma_window(dev->dev.archdata.of_node, dma_window,
                            &tbl->it_index, &offset, &size);
@@ -1233,7 +1235,7 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
                vio_cmo_set_dma_ops(viodev);
        else
                viodev->dev.archdata.dma_ops = &dma_iommu_ops;
-       viodev->dev.archdata.dma_data = vio_build_iommu_table(viodev);
+       set_iommu_table_base(&viodev->dev, vio_build_iommu_table(viodev));
        set_dev_node(&viodev->dev, of_node_to_nid(of_node));
 
        /* init generic 'struct device' fields: */
index 58da407..f564293 100644 (file)
@@ -6,6 +6,7 @@
 #include <asm/page.h>
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/cache.h>
+#include <asm/thread_info.h>
 
 ENTRY(_stext)
 
@@ -71,12 +72,7 @@ SECTIONS
        /* Read-only data */
        RODATA
 
-       /* Exception & bug tables */
-       __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
-               __start___ex_table = .;
-               *(__ex_table)
-               __stop___ex_table = .;
-       }
+       EXCEPTION_TABLE(0)
 
        NOTES :kernel :notes
 
@@ -93,12 +89,7 @@ SECTIONS
  */
        . = ALIGN(PAGE_SIZE);
        __init_begin = .;
-
-       .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
-               _sinittext = .;
-               INIT_TEXT
-               _einittext = .;
-       } :kernel
+       INIT_TEXT_SECTION(PAGE_SIZE) :kernel
 
        /* .exit.text is discarded at runtime, not link time,
         * to deal with references from __bug_table
@@ -122,23 +113,16 @@ SECTIONS
 #endif
        }
 
-       . = ALIGN(16);
        .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
-               __setup_start = .;
-               *(.init.setup)
-               __setup_end = .;
+               INIT_SETUP(16)
        }
 
        .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
-               __initcall_start = .;
-               INITCALLS
-               __initcall_end = .;
-               }
+               INIT_CALLS
+       }
 
        .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
-               __con_initcall_start = .;
-               *(.con_initcall.init)
-               __con_initcall_end = .;
+               CON_INITCALL
        }
 
        SECURITY_INIT
@@ -169,14 +153,10 @@ SECTIONS
                __stop___fw_ftr_fixup = .;
        }
 #endif
-#ifdef CONFIG_BLK_DEV_INITRD
-       . = ALIGN(PAGE_SIZE);
        .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
-               __initramfs_start = .;
-               *(.init.ramfs)
-               __initramfs_end = .;
+               INIT_RAM_FS
        }
-#endif
+
        PERCPU(PAGE_SIZE)
 
        . = ALIGN(8);
@@ -240,36 +220,24 @@ SECTIONS
 #endif
 
        /* The initial task and kernel stack */
-#ifdef CONFIG_PPC32
-       . = ALIGN(8192);
-#else
-       . = ALIGN(16384);
-#endif
        .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
-               *(.data.init_task)
+               INIT_TASK_DATA(THREAD_SIZE)
        }
 
-       . = ALIGN(PAGE_SIZE);
        .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
-               *(.data.page_aligned)
+               PAGE_ALIGNED_DATA(PAGE_SIZE)
        }
 
-       . = ALIGN(L1_CACHE_BYTES);
        .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
-               *(.data.cacheline_aligned)
+               CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
        }
 
-       . = ALIGN(L1_CACHE_BYTES);
        .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
-               *(.data.read_mostly)
+               READ_MOSTLY_DATA(L1_CACHE_BYTES)
        }
 
-       . = ALIGN(PAGE_SIZE);
        .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
-               __nosave_begin = .;
-               *(.data.nosave)
-               . = ALIGN(PAGE_SIZE);
-               __nosave_end = .;
+               NOSAVE_DATA
        }
 
        . = ALIGN(PAGE_SIZE);
@@ -280,14 +248,7 @@ SECTIONS
  * And finally the bss
  */
 
-       .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
-               __bss_start = .;
-               *(.sbss) *(.scommon)
-               *(.dynbss)
-               *(.bss)
-               *(COMMON)
-               __bss_stop = .;
-       }
+       BSS_SECTION(0, 0, 0)
 
        . = ALIGN(PAGE_SIZE);
        _end = . ;
index 83f1551..5304093 100644 (file)
@@ -30,6 +30,8 @@
 #include <asm/tlbflush.h>
 #include <asm/tlb.h>
 
+#include "mmu_decl.h"
+
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
 #ifdef CONFIG_SMP
@@ -166,7 +168,7 @@ struct page * maybe_pte_to_page(pte_t pte)
  * support falls into the same category.
  */
 
-static pte_t set_pte_filter(pte_t pte)
+static pte_t set_pte_filter(pte_t pte, unsigned long addr)
 {
        pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
        if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
@@ -175,6 +177,17 @@ static pte_t set_pte_filter(pte_t pte)
                if (!pg)
                        return pte;
                if (!test_bit(PG_arch_1, &pg->flags)) {
+#ifdef CONFIG_8xx
+                       /* On 8xx, cache control instructions (particularly
+                        * "dcbst" from flush_dcache_icache) fault as write
+                        * operation if there is an unpopulated TLB entry
+                        * for the address in question. To workaround that,
+                        * we invalidate the TLB here, thus avoiding dcbst
+                        * misbehaviour.
+                        */
+                       /* 8xx doesn't care about PID, size or ind args */
+                       _tlbil_va(addr, 0, 0, 0);
+#endif /* CONFIG_8xx */
                        flush_dcache_icache_page(pg);
                        set_bit(PG_arch_1, &pg->flags);
                }
@@ -194,7 +207,7 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
  * as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so
  * instead we "filter out" the exec permission for non clean pages.
  */
-static pte_t set_pte_filter(pte_t pte)
+static pte_t set_pte_filter(pte_t pte, unsigned long addr)
 {
        struct page *pg;
 
@@ -276,7 +289,7 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
         * this context might not have been activated yet when this
         * is called.
         */
-       pte = set_pte_filter(pte);
+       pte = set_pte_filter(pte, addr);
 
        /* Perform the setting of the PTE */
        __set_pte_at(mm, addr, ptep, pte, 0);
index ef1cccf..f288279 100644 (file)
@@ -18,7 +18,6 @@
 #include <asm/asm-offsets.h>
 #include <asm/cputable.h>
 #include <asm/pgtable.h>
-#include <asm/reg.h>
 #include <asm/exception-64e.h>
 #include <asm/ppc-opcode.h>
 
index 93b0efd..39d361c 100644 (file)
@@ -77,7 +77,7 @@ static void __init celleb_init_direct_mapping(void)
 static void celleb_dma_dev_setup(struct device *dev)
 {
        dev->archdata.dma_ops = get_pci_dma_ops();
-       dev->archdata.dma_data = (void *)celleb_dma_direct_offset;
+       set_dma_offset(dev, celleb_dma_direct_offset);
 }
 
 static void celleb_pci_dma_dev_setup(struct pci_dev *pdev)
index 416db17..ca5bfdf 100644 (file)
@@ -657,15 +657,13 @@ static void cell_dma_dev_setup_fixed(struct device *dev);
 
 static void cell_dma_dev_setup(struct device *dev)
 {
-       struct dev_archdata *archdata = &dev->archdata;
-
        /* Order is important here, these are not mutually exclusive */
        if (get_dma_ops(dev) == &dma_iommu_fixed_ops)
                cell_dma_dev_setup_fixed(dev);
        else if (get_pci_dma_ops() == &dma_iommu_ops)
-               archdata->dma_data = cell_get_iommu_table(dev);
+               set_iommu_table_base(dev, cell_get_iommu_table(dev));
        else if (get_pci_dma_ops() == &dma_direct_ops)
-               archdata->dma_data = (void *)cell_dma_direct_offset;
+               set_dma_offset(dev, cell_dma_direct_offset);
        else
                BUG();
 }
@@ -973,11 +971,10 @@ static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask)
 
 static void cell_dma_dev_setup_fixed(struct device *dev)
 {
-       struct dev_archdata *archdata = &dev->archdata;
        u64 addr;
 
        addr = cell_iommu_get_fixed_address(dev) + dma_iommu_fixed_base;
-       archdata->dma_data = (void *)addr;
+       set_dma_offset(dev, addr);
 
        dev_dbg(dev, "iommu: fixed addr = %llx\n", addr);
 }
index 6c1e101..9d53cb4 100644 (file)
@@ -193,7 +193,7 @@ static void pci_dma_dev_setup_iseries(struct pci_dev *pdev)
                pdn->iommu_table = iommu_init_table(tbl, -1);
        else
                kfree(tbl);
-       pdev->dev.archdata.dma_data = pdn->iommu_table;
+       set_iommu_table_base(&pdev->dev, pdn->iommu_table);
 }
 #else
 #define pci_dma_dev_setup_iseries      NULL
index a0ff03a..7b1d608 100644 (file)
@@ -189,7 +189,7 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev)
        }
 #endif
 
-       dev->dev.archdata.dma_data = &iommu_table_iobmap;
+       set_iommu_table_base(&dev->dev, &iommu_table_iobmap);
 }
 
 static void pci_dma_bus_setup_null(struct pci_bus *b) { }
index 661c8e0..1a0000a 100644 (file)
@@ -482,7 +482,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
                                   phb->node);
                iommu_table_setparms(phb, dn, tbl);
                PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node);
-               dev->dev.archdata.dma_data = PCI_DN(dn)->iommu_table;
+               set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table);
                return;
        }
 
@@ -494,7 +494,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
                dn = dn->parent;
 
        if (dn && PCI_DN(dn))
-               dev->dev.archdata.dma_data = PCI_DN(dn)->iommu_table;
+               set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table);
        else
                printk(KERN_WARNING "iommu: Device %s has no iommu table\n",
                       pci_name(dev));
@@ -538,7 +538,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
         */
        if (dma_window == NULL || pdn->parent == NULL) {
                pr_debug("  no dma window for device, linking to parent\n");
-               dev->dev.archdata.dma_data = PCI_DN(pdn)->iommu_table;
+               set_iommu_table_base(&dev->dev, PCI_DN(pdn)->iommu_table);
                return;
        }
 
@@ -554,7 +554,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
                pr_debug("  found DMA window, table: %p\n", pci->iommu_table);
        }
 
-       dev->dev.archdata.dma_data = pci->iommu_table;
+       set_iommu_table_base(&dev->dev, pci->iommu_table);
 }
 #else  /* CONFIG_PCI */
 #define pci_dma_bus_setup_pSeries      NULL
diff --git a/arch/powerpc/relocs_check.pl b/arch/powerpc/relocs_check.pl
new file mode 100755 (executable)
index 0000000..d257109
--- /dev/null
@@ -0,0 +1,56 @@
+#!/usr/bin/perl
+
+# Copyright Â© 2009 IBM Corporation
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version
+# 2 of the License, or (at your option) any later version.
+
+# This script checks the relcoations of a vmlinux for "suspicious"
+# relocations.
+
+use strict;
+use warnings;
+
+if ($#ARGV != 1) {
+       die "$0 [path to objdump] [path to vmlinux]\n";
+}
+
+# Have Kbuild supply the path to objdump so we handle cross compilation.
+my $objdump = shift;
+my $vmlinux = shift;
+my $bad_relocs_count = 0;
+my $bad_relocs = "";
+my $old_binutils = 0;
+
+open(FD, "$objdump -R $vmlinux|") or die;
+while (<FD>) {
+       study $_;
+
+       # Only look at relcoation lines.
+       next if (!/\s+R_/);
+
+       # These relocations are okay
+       next if (/R_PPC64_RELATIVE/ or /R_PPC64_NONE/ or
+                /R_PPC64_ADDR64\s+mach_/);
+
+       # If we see this type of relcoation it's an idication that
+       # we /may/ be using an old version of binutils.
+       if (/R_PPC64_UADDR64/) {
+               $old_binutils++;
+       }
+
+       $bad_relocs_count++;
+       $bad_relocs .= $_;
+}
+
+if ($bad_relocs_count) {
+       print "WARNING: $bad_relocs_count bad relocations\n";
+       print $bad_relocs;
+}
+
+if ($old_binutils) {
+       print "WARNING: You need at binutils >= 2.19 to build a ".
+             "CONFIG_RELCOATABLE kernel\n";
+}
index 89639ec..ae3c4db 100644 (file)
@@ -297,7 +297,7 @@ static void pci_dma_dev_setup_dart(struct pci_dev *dev)
        /* We only have one iommu table on the mac for now, which makes
         * things simple. Setup all PCI devices to point to this table
         */
-       dev->dev.archdata.dma_data = &iommu_table_dart;
+       set_iommu_table_base(&dev->dev, &iommu_table_dart);
 }
 
 static void pci_dma_bus_setup_dart(struct pci_bus *bus)
index 0e09a45..c6f0a71 100644 (file)
@@ -335,6 +335,16 @@ int cpus_are_in_xmon(void)
 }
 #endif
 
+static inline int unrecoverable_excp(struct pt_regs *regs)
+{
+#ifdef CONFIG_4xx
+       /* We have no MSR_RI bit on 4xx, so we simply return false */
+       return 0;
+#else
+       return ((regs->msr & MSR_RI) == 0);
+#endif
+}
+
 static int xmon_core(struct pt_regs *regs, int fromipi)
 {
        int cmd = 0;
@@ -388,7 +398,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
        bp = NULL;
        if ((regs->msr & (MSR_IR|MSR_PR|MSR_SF)) == (MSR_IR|MSR_SF))
                bp = at_breakpoint(regs->nip);
-       if (bp || (regs->msr & MSR_RI) == 0)
+       if (bp || unrecoverable_excp(regs))
                fromipi = 0;
 
        if (!fromipi) {
@@ -399,7 +409,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
                               cpu, BP_NUM(bp));
                        xmon_print_symbol(regs->nip, " ", ")\n");
                }
-               if ((regs->msr & MSR_RI) == 0)
+               if (unrecoverable_excp(regs))
                        printf("WARNING: exception is not recoverable, "
                               "can't continue\n");
                release_output_lock();
@@ -490,7 +500,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
                        printf("Stopped at breakpoint %x (", BP_NUM(bp));
                        xmon_print_symbol(regs->nip, " ", ")\n");
                }
-               if ((regs->msr & MSR_RI) == 0)
+               if (unrecoverable_excp(regs))
                        printf("WARNING: exception is not recoverable, "
                               "can't continue\n");
                remove_bpts();
index 264528e..b55fd7e 100644 (file)
@@ -50,10 +50,9 @@ static struct platform_device *appldata_pdev;
  * /proc entries (sysctl)
  */
 static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata";
-static int appldata_timer_handler(ctl_table *ctl, int write, struct file *filp,
+static int appldata_timer_handler(ctl_table *ctl, int write,
                                  void __user *buffer, size_t *lenp, loff_t *ppos);
 static int appldata_interval_handler(ctl_table *ctl, int write,
-                                        struct file *filp,
                                         void __user *buffer,
                                         size_t *lenp, loff_t *ppos);
 
@@ -247,7 +246,7 @@ __appldata_vtimer_setup(int cmd)
  * Start/Stop timer, show status of timer (0 = not active, 1 = active)
  */
 static int
-appldata_timer_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_timer_handler(ctl_table *ctl, int write,
                           void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        int len;
@@ -289,7 +288,7 @@ out:
  * current timer interval.
  */
 static int
-appldata_interval_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_interval_handler(ctl_table *ctl, int write,
                           void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        int len, interval;
@@ -335,7 +334,7 @@ out:
  * monitoring (0 = not in process, 1 = in process)
  */
 static int
-appldata_generic_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_generic_handler(ctl_table *ctl, int write,
                           void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        struct appldata_ops *ops = NULL, *tmp_ops;
index 4c51256..20f282c 100644 (file)
@@ -881,11 +881,11 @@ static int debug_active=1;
  * if debug_active is already off
  */
 static int
-s390dbf_procactive(ctl_table *table, int write, struct file *filp,
+s390dbf_procactive(ctl_table *table, int write,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        if (!write || debug_stoppable || !debug_active)
-               return proc_dointvec(table, write, filp, buffer, lenp, ppos);
+               return proc_dointvec(table, write, buffer, lenp, ppos);
        else
                return 0;
 }
index 413c240..b201135 100644 (file)
@@ -262,7 +262,7 @@ cmm_skip_blanks(char *cp, char **endp)
 static struct ctl_table cmm_table[];
 
 static int
-cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
+cmm_pages_handler(ctl_table *ctl, int write,
                  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        char buf[16], *p;
@@ -303,7 +303,7 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
 }
 
 static int
-cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp,
+cmm_timeout_handler(ctl_table *ctl, int write,
                    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        char buf[64], *p;
index b91fa8d..4d58eb0 100644 (file)
@@ -1,12 +1,9 @@
 menu "DMA support"
 
-config SH_DMA_API
-       bool
 
 config SH_DMA
        bool "SuperH on-chip DMA controller (DMAC) support"
        depends on CPU_SH3 || CPU_SH4
-       select SH_DMA_API
        default n
 
 config SH_DMA_IRQ_MULTI
@@ -19,6 +16,15 @@ config SH_DMA_IRQ_MULTI
                     CPU_SUBTYPE_SH7780  || CPU_SUBTYPE_SH7785  || \
                     CPU_SUBTYPE_SH7760
 
+config SH_DMA_API
+       depends on SH_DMA
+       bool "SuperH DMA API support"
+       default n
+       help
+         SH_DMA_API always enabled DMA API of used SuperH.
+         If you want to use DMA ENGINE, you must not enable this.
+         Please enable DMA_ENGINE and SH_DMAE.
+
 config NR_ONCHIP_DMA_CHANNELS
        int
        depends on SH_DMA
index c606813..d88c948 100644 (file)
@@ -2,8 +2,7 @@
 # Makefile for the SuperH DMA specific kernel interface routines under Linux.
 #
 
-obj-$(CONFIG_SH_DMA_API)       += dma-api.o dma-sysfs.o
-obj-$(CONFIG_SH_DMA)           += dma-sh.o
+obj-$(CONFIG_SH_DMA_API)       += dma-sh.o dma-api.o dma-sysfs.o
 obj-$(CONFIG_PVR2_DMA)         += dma-pvr2.o
 obj-$(CONFIG_G2_DMA)           += dma-g2.o
 obj-$(CONFIG_SH_DMABRG)                += dmabrg.o
index 68a5f4c..78eed3e 100644 (file)
@@ -116,4 +116,17 @@ static u32 dma_base_addr[] __maybe_unused = {
 #define CHCR    0x0C
 #define DMAOR  0x40
 
+/*
+ * for dma engine
+ *
+ * SuperH DMA mode
+ */
+#define SHDMA_MIX_IRQ  (1 << 1)
+#define SHDMA_DMAOR1   (1 << 2)
+#define SHDMA_DMAE1            (1 << 3)
+
+struct sh_dmae_pdata {
+       unsigned int mode;
+};
+
 #endif /* __DMA_SH_H */
index 37ecc55..ac55b9e 100644 (file)
 
   . = ALIGN(4096);
   .note : { *(.note.*) }
-  __ex_table : {
-       __start___ex_table = .;
-       *(__ex_table)
-       __stop___ex_table = .;
-  }
+  EXCEPTION_TABLE(0)
 
   BUG_TABLE
 
   }
        
   .init.setup : {
-       __setup_start = .;
-       *(.init.setup)
-       __setup_end = .;
+       INIT_SETUP(0)
   }
 
-  . = ALIGN(32);
-  .data.percpu : {
-       __per_cpu_start = . ;
-       *(.data.percpu)
-       __per_cpu_end = . ;
-  }
+  PERCPU(32)
        
   .initcall.init : {
-       __initcall_start = .;
-       INITCALLS
-       __initcall_end = .;
+       INIT_CALLS
   }
 
   .con_initcall.init : {
-       __con_initcall_start = .;
-       *(.con_initcall.init)
-       __con_initcall_end = .;
+       CON_INITCALL
   }
 
   .uml.initcall.init : {
 
    . = ALIGN(4096);
   .init.ramfs : {
-       __initramfs_start = .;
-       *(.init.ramfs)
-       __initramfs_end = .;
+       INIT_RAM_FS
   }
 
index 715a188..7fcad58 100644 (file)
@@ -16,11 +16,7 @@ SECTIONS
   _text = .;
   _stext = .;
   __init_begin = .;
-  .init.text : {
-       _sinittext = .;
-       INIT_TEXT
-       _einittext = .;
-  }
+  INIT_TEXT_SECTION(PAGE_SIZE)
 
   . = ALIGN(PAGE_SIZE);
 
@@ -96,8 +92,7 @@ SECTIONS
   .init_array     : { *(.init_array) }
   .fini_array     : { *(.fini_array) }
   .data           : {
-    . = ALIGN(KERNEL_STACK_SIZE);              /* init_task */
-    *(.data.init_task)
+    INIT_TASK_DATA(KERNEL_STACK_SIZE)
     . = ALIGN(KERNEL_STACK_SIZE);
     *(.data.init_irqstack)
     DATA_DATA
index 2ebd397..e7a6cca 100644 (file)
@@ -22,11 +22,7 @@ SECTIONS
   _text = .;
   _stext = .;
   __init_begin = .;
-  .init.text : {
-       _sinittext = .;
-       INIT_TEXT
-       _einittext = .;
-  }
+  INIT_TEXT_SECTION(PAGE_SIZE)
   . = ALIGN(PAGE_SIZE);
 
   .text      :
@@ -52,8 +48,7 @@ SECTIONS
   init.data : { INIT_DATA }
   .data    :
   {
-    . = ALIGN(KERNEL_STACK_SIZE);              /* init_task */
-    *(.data.init_task)
+    INIT_TASK_DATA(KERNEL_STACK_SIZE)
     . = ALIGN(KERNEL_STACK_SIZE);
     *(.data.init_irqstack)
     DATA_DATA
@@ -81,19 +76,10 @@ SECTIONS
   _edata  =  .;
   PROVIDE (edata = .);
   . = ALIGN(PAGE_SIZE);
-  .sbss      :
-  {
-   __bss_start = .;
-   PROVIDE(_bss_start = .);
-   *(.sbss)
-   *(.scommon)
-  }
-  .bss       :
-  {
-   *(.dynbss)
-   *(.bss)
-   *(COMMON)
-  }
+  __bss_start = .;
+  PROVIDE(_bss_start = .);
+  SBSS(0)
+  BSS(0)
   _end = .;
   PROVIDE (end = .);
 
index e63cf7d..139d4c1 100644 (file)
@@ -40,8 +40,7 @@ extern unsigned int nmi_watchdog;
 #define NMI_INVALID    3
 
 struct ctl_table;
-struct file;
-extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
+extern int proc_nmi_enabled(struct ctl_table *, int ,
                        void __user *, size_t *, loff_t *);
 extern int unknown_nmi_panic;
 
index f76a162..ada8c20 100644 (file)
@@ -143,7 +143,11 @@ static inline int __pcibus_to_node(const struct pci_bus *bus)
 static inline const struct cpumask *
 cpumask_of_pcibus(const struct pci_bus *bus)
 {
-       return cpumask_of_node(__pcibus_to_node(bus));
+       int node;
+
+       node = __pcibus_to_node(bus);
+       return (node == -1) ? cpu_online_mask :
+                             cpumask_of_node(node);
 }
 #endif
 
index cb66a22..7ff61d6 100644 (file)
@@ -508,14 +508,14 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
 /*
  * proc handler for /proc/sys/kernel/nmi
  */
-int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
+int proc_nmi_enabled(struct ctl_table *table, int write,
                        void __user *buffer, size_t *length, loff_t *ppos)
 {
        int old_state;
 
        nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
        old_state = nmi_watchdog_enabled;
-       proc_dointvec(table, write, file, buffer, length, ppos);
+       proc_dointvec(table, write, buffer, length, ppos);
        if (!!old_state == !!nmi_watchdog_enabled)
                return 0;
 
index cf53a78..8cb4974 100644 (file)
@@ -228,19 +228,11 @@ static long __vsyscall(3) venosys_1(void)
 }
 
 #ifdef CONFIG_SYSCTL
-
-static int
-vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
-                      void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-       return proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-}
-
 static ctl_table kernel_table2[] = {
        { .procname = "vsyscall64",
          .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
          .mode = 0644,
-         .proc_handler = vsyscall_sysctl_change },
+         .proc_handler = proc_dointvec },
        {}
 };
 
index 82728f2..f4cee90 100644 (file)
@@ -167,6 +167,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
        info.si_errno   = 0;
        info.si_code    = si_code;
        info.si_addr    = (void __user *)address;
+       info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0;
 
        force_sig_info(si_signo, &info, tsk);
 }
@@ -790,10 +791,12 @@ out_of_memory(struct pt_regs *regs, unsigned long error_code,
 }
 
 static void
-do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
+         unsigned int fault)
 {
        struct task_struct *tsk = current;
        struct mm_struct *mm = tsk->mm;
+       int code = BUS_ADRERR;
 
        up_read(&mm->mmap_sem);
 
@@ -809,7 +812,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
        tsk->thread.error_code  = error_code;
        tsk->thread.trap_no     = 14;
 
-       force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+#ifdef CONFIG_MEMORY_FAILURE
+       if (fault & VM_FAULT_HWPOISON) {
+               printk(KERN_ERR
+       "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
+                       tsk->comm, tsk->pid, address);
+               code = BUS_MCEERR_AR;
+       }
+#endif
+       force_sig_info_fault(SIGBUS, code, address, tsk);
 }
 
 static noinline void
@@ -819,8 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
        if (fault & VM_FAULT_OOM) {
                out_of_memory(regs, error_code, address);
        } else {
-               if (fault & VM_FAULT_SIGBUS)
-                       do_sigbus(regs, error_code, address);
+               if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
+                       do_sigbus(regs, error_code, address, fault);
                else
                        BUG();
        }
index 24952fd..dd38bfb 100644 (file)
@@ -144,6 +144,7 @@ void clflush_cache_range(void *vaddr, unsigned int size)
 
        mb();
 }
+EXPORT_SYMBOL_GPL(clflush_cache_range);
 
 static void __cpa_flush_all(void *arg)
 {
index 5db96d4..1331fcf 100644 (file)
@@ -646,7 +646,7 @@ int get_mp_bus_to_node(int busnum)
 
 #else /* CONFIG_X86_32 */
 
-static unsigned char mp_bus_to_node[BUS_NR] = {
+static int mp_bus_to_node[BUS_NR] = {
        [0 ... BUS_NR - 1] = -1
 };
 
index 921b6ff..9b52615 100644 (file)
@@ -15,6 +15,8 @@
  */
 
 #include <asm-generic/vmlinux.lds.h>
+#include <asm/page.h>
+#include <asm/thread_info.h>
 
 #include <variant/core.h>
 #include <platform/hardware.h>
@@ -107,41 +109,18 @@ SECTIONS
 
   .fixup   : { *(.fixup) }
 
-  . = ALIGN(16);
-
-  __ex_table : {
-    __start___ex_table = .;
-    *(__ex_table)
-    __stop___ex_table = .;
-  }
-
+  EXCEPTION_TABLE(16)
   /* Data section */
 
-  . = ALIGN(XCHAL_ICACHE_LINESIZE);
   _fdata = .;
-  .data :
-  {
-    DATA_DATA
-    CONSTRUCTORS
-    . = ALIGN(XCHAL_ICACHE_LINESIZE);
-    *(.data.cacheline_aligned)
-  }
-
+  RW_DATA_SECTION(XCHAL_ICACHE_LINESIZE, PAGE_SIZE, THREAD_SIZE)
   _edata = .;
 
-  /* The initial task */
-  . = ALIGN(8192);
-  .data.init_task : { *(.data.init_task) }
-
   /* Initialization code and data: */
 
-  . = ALIGN(1 << 12);
+  . = ALIGN(PAGE_SIZE);
   __init_begin = .;
-  .init.text : {
-       _sinittext = .;
-       INIT_TEXT
-       _einittext = .;
-  }
+  INIT_TEXT_SECTION(PAGE_SIZE)
 
   .init.data :
   {
@@ -168,36 +147,15 @@ SECTIONS
                   .DebugInterruptVector.text);
   
     __boot_reloc_table_end = ABSOLUTE(.) ;
-  }
 
-  . = ALIGN(XCHAL_ICACHE_LINESIZE);
-
-  __setup_start = .;
-  .init.setup : { *(.init.setup) }
-  __setup_end = .;
-
-  __initcall_start = .;
-  .initcall.init : {
-       INITCALLS
+    INIT_SETUP(XCHAL_ICACHE_LINESIZE)
+    INIT_CALLS
+    CON_INITCALL
+    SECURITY_INITCALL
+    INIT_RAM_FS
   }
-  __initcall_end = .;
-
-  __con_initcall_start = .;
-  .con_initcall.init : { *(.con_initcall.init) }
-  __con_initcall_end = .;
-
-  SECURITY_INIT
-
-
-#ifdef CONFIG_BLK_DEV_INITRD
-  . = ALIGN(4096);
-  __initramfs_start =.;
-  .init.ramfs : { *(.init.ramfs) }
-  __initramfs_end = .;
-#endif
-
-  PERCPU(4096)
 
+  PERCPU(PAGE_SIZE)
 
   /* We need this dummy segment here */
 
@@ -252,16 +210,11 @@ SECTIONS
                  .DoubleExceptionVector.literal)
 
   . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3;
-  . = ALIGN(1 << 12);
+  . = ALIGN(PAGE_SIZE);
 
   __init_end = .;
 
-  . = ALIGN(8192);
-
-  /* BSS section */
-  _bss_start = .;
-  .bss : { *(.bss.page_aligned) *(.bss) }
-  _bss_end = .;
+  BSS_SECTION(0, 8192, 0)
 
   _end = .;
 
index d8fb391..e5aeb2b 100644 (file)
@@ -14,3 +14,12 @@ config ASYNC_MEMSET
        tristate
        select ASYNC_CORE
 
+config ASYNC_PQ
+       tristate
+       select ASYNC_CORE
+
+config ASYNC_RAID6_RECOV
+       tristate
+       select ASYNC_CORE
+       select ASYNC_PQ
+
index 27baa7d..d1e0e6f 100644 (file)
@@ -2,3 +2,6 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o
 obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
 obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
 obj-$(CONFIG_ASYNC_XOR) += async_xor.o
+obj-$(CONFIG_ASYNC_PQ) += async_pq.o
+obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o
+obj-$(CONFIG_ASYNC_RAID6_TEST) += raid6test.o
index ddccfb0..0ec1fb6 100644 (file)
  * async_memcpy - attempt to copy memory with a dma engine.
  * @dest: destination page
  * @src: src page
- * @offset: offset in pages to start transaction
+ * @dest_offset: offset into 'dest' to start transaction
+ * @src_offset: offset into 'src' to start transaction
  * @len: length in bytes
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK,
- * @depend_tx: memcpy depends on the result of this transaction
- * @cb_fn: function to call when the memcpy completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK
  */
 struct dma_async_tx_descriptor *
 async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
-       unsigned int src_offset, size_t len, enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_param)
+            unsigned int src_offset, size_t len,
+            struct async_submit_ctl *submit)
 {
-       struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY,
+       struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY,
                                                      &dest, 1, &src, 1, len);
        struct dma_device *device = chan ? chan->device : NULL;
        struct dma_async_tx_descriptor *tx = NULL;
 
-       if (device) {
+       if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
                dma_addr_t dma_dest, dma_src;
-               unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+               unsigned long dma_prep_flags = 0;
 
+               if (submit->cb_fn)
+                       dma_prep_flags |= DMA_PREP_INTERRUPT;
+               if (submit->flags & ASYNC_TX_FENCE)
+                       dma_prep_flags |= DMA_PREP_FENCE;
                dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
                                        DMA_FROM_DEVICE);
 
@@ -67,13 +70,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 
        if (tx) {
                pr_debug("%s: (async) len: %zu\n", __func__, len);
-               async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+               async_tx_submit(chan, tx, submit);
        } else {
                void *dest_buf, *src_buf;
                pr_debug("%s: (sync) len: %zu\n", __func__, len);
 
                /* wait for any prerequisite operations */
-               async_tx_quiesce(&depend_tx);
+               async_tx_quiesce(&submit->depend_tx);
 
                dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
                src_buf = kmap_atomic(src, KM_USER1) + src_offset;
@@ -83,26 +86,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
                kunmap_atomic(dest_buf, KM_USER0);
                kunmap_atomic(src_buf, KM_USER1);
 
-               async_tx_sync_epilog(cb_fn, cb_param);
+               async_tx_sync_epilog(submit);
        }
 
        return tx;
 }
 EXPORT_SYMBOL_GPL(async_memcpy);
 
-static int __init async_memcpy_init(void)
-{
-       return 0;
-}
-
-static void __exit async_memcpy_exit(void)
-{
-       do { } while (0);
-}
-
-module_init(async_memcpy_init);
-module_exit(async_memcpy_exit);
-
 MODULE_AUTHOR("Intel Corporation");
 MODULE_DESCRIPTION("asynchronous memcpy api");
 MODULE_LICENSE("GPL");
index 5b5eb99..58e4a87 100644 (file)
  * @val: fill value
  * @offset: offset in pages to start transaction
  * @len: length in bytes
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
- * @depend_tx: memset depends on the result of this transaction
- * @cb_fn: function to call when the memcpy completes
- * @cb_param: parameter to pass to the callback routine
+ *
+ * honored flags: ASYNC_TX_ACK
  */
 struct dma_async_tx_descriptor *
-async_memset(struct page *dest, int val, unsigned int offset,
-       size_t len, enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_param)
+async_memset(struct page *dest, int val, unsigned int offset, size_t len,
+            struct async_submit_ctl *submit)
 {
-       struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET,
+       struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET,
                                                      &dest, 1, NULL, 0, len);
        struct dma_device *device = chan ? chan->device : NULL;
        struct dma_async_tx_descriptor *tx = NULL;
 
-       if (device) {
+       if (device && is_dma_fill_aligned(device, offset, 0, len)) {
                dma_addr_t dma_dest;
-               unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+               unsigned long dma_prep_flags = 0;
 
+               if (submit->cb_fn)
+                       dma_prep_flags |= DMA_PREP_INTERRUPT;
+               if (submit->flags & ASYNC_TX_FENCE)
+                       dma_prep_flags |= DMA_PREP_FENCE;
                dma_dest = dma_map_page(device->dev, dest, offset, len,
                                        DMA_FROM_DEVICE);
 
@@ -64,38 +64,25 @@ async_memset(struct page *dest, int val, unsigned int offset,
 
        if (tx) {
                pr_debug("%s: (async) len: %zu\n", __func__, len);
-               async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+               async_tx_submit(chan, tx, submit);
        } else { /* run the memset synchronously */
                void *dest_buf;
                pr_debug("%s: (sync) len: %zu\n", __func__, len);
 
-               dest_buf = (void *) (((char *) page_address(dest)) + offset);
+               dest_buf = page_address(dest) + offset;
 
                /* wait for any prerequisite operations */
-               async_tx_quiesce(&depend_tx);
+               async_tx_quiesce(&submit->depend_tx);
 
                memset(dest_buf, val, len);
 
-               async_tx_sync_epilog(cb_fn, cb_param);
+               async_tx_sync_epilog(submit);
        }
 
        return tx;
 }
 EXPORT_SYMBOL_GPL(async_memset);
 
-static int __init async_memset_init(void)
-{
-       return 0;
-}
-
-static void __exit async_memset_exit(void)
-{
-       do { } while (0);
-}
-
-module_init(async_memset_init);
-module_exit(async_memset_exit);
-
 MODULE_AUTHOR("Intel Corporation");
 MODULE_DESCRIPTION("asynchronous memset api");
 MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
new file mode 100644 (file)
index 0000000..b88db6d
--- /dev/null
@@ -0,0 +1,395 @@
+/*
+ * Copyright(c) 2007 Yuri Tikhonov <yur@emcraft.com>
+ * Copyright(c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/raid/pq.h>
+#include <linux/async_tx.h>
+
+/**
+ * scribble - space to hold throwaway P buffer for synchronous gen_syndrome
+ */
+static struct page *scribble;
+
+static bool is_raid6_zero_block(struct page *p)
+{
+       return p == (void *) raid6_empty_zero_page;
+}
+
+/* the struct page *blocks[] parameter passed to async_gen_syndrome()
+ * and async_syndrome_val() contains the 'P' destination address at
+ * blocks[disks-2] and the 'Q' destination address at blocks[disks-1]
+ *
+ * note: these are macros as they are used as lvalues
+ */
+#define P(b, d) (b[d-2])
+#define Q(b, d) (b[d-1])
+
+/**
+ * do_async_gen_syndrome - asynchronously calculate P and/or Q
+ */
+static __async_inline struct dma_async_tx_descriptor *
+do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
+                     const unsigned char *scfs, unsigned int offset, int disks,
+                     size_t len, dma_addr_t *dma_src,
+                     struct async_submit_ctl *submit)
+{
+       struct dma_async_tx_descriptor *tx = NULL;
+       struct dma_device *dma = chan->device;
+       enum dma_ctrl_flags dma_flags = 0;
+       enum async_tx_flags flags_orig = submit->flags;
+       dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+       dma_async_tx_callback cb_param_orig = submit->cb_param;
+       int src_cnt = disks - 2;
+       unsigned char coefs[src_cnt];
+       unsigned short pq_src_cnt;
+       dma_addr_t dma_dest[2];
+       int src_off = 0;
+       int idx;
+       int i;
+
+       /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */
+       if (P(blocks, disks))
+               dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset,
+                                          len, DMA_BIDIRECTIONAL);
+       else
+               dma_flags |= DMA_PREP_PQ_DISABLE_P;
+       if (Q(blocks, disks))
+               dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset,
+                                          len, DMA_BIDIRECTIONAL);
+       else
+               dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+
+       /* convert source addresses being careful to collapse 'empty'
+        * sources and update the coefficients accordingly
+        */
+       for (i = 0, idx = 0; i < src_cnt; i++) {
+               if (is_raid6_zero_block(blocks[i]))
+                       continue;
+               dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len,
+                                           DMA_TO_DEVICE);
+               coefs[idx] = scfs[i];
+               idx++;
+       }
+       src_cnt = idx;
+
+       while (src_cnt > 0) {
+               submit->flags = flags_orig;
+               pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags));
+               /* if we are submitting additional pqs, leave the chain open,
+                * clear the callback parameters, and leave the destination
+                * buffers mapped
+                */
+               if (src_cnt > pq_src_cnt) {
+                       submit->flags &= ~ASYNC_TX_ACK;
+                       submit->flags |= ASYNC_TX_FENCE;
+                       dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
+                       submit->cb_fn = NULL;
+                       submit->cb_param = NULL;
+               } else {
+                       dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP;
+                       submit->cb_fn = cb_fn_orig;
+                       submit->cb_param = cb_param_orig;
+                       if (cb_fn_orig)
+                               dma_flags |= DMA_PREP_INTERRUPT;
+               }
+               if (submit->flags & ASYNC_TX_FENCE)
+                       dma_flags |= DMA_PREP_FENCE;
+
+               /* Since we have clobbered the src_list we are committed
+                * to doing this asynchronously.  Drivers force forward
+                * progress in case they can not provide a descriptor
+                */
+               for (;;) {
+                       tx = dma->device_prep_dma_pq(chan, dma_dest,
+                                                    &dma_src[src_off],
+                                                    pq_src_cnt,
+                                                    &coefs[src_off], len,
+                                                    dma_flags);
+                       if (likely(tx))
+                               break;
+                       async_tx_quiesce(&submit->depend_tx);
+                       dma_async_issue_pending(chan);
+               }
+
+               async_tx_submit(chan, tx, submit);
+               submit->depend_tx = tx;
+
+               /* drop completed sources */
+               src_cnt -= pq_src_cnt;
+               src_off += pq_src_cnt;
+
+               dma_flags |= DMA_PREP_CONTINUE;
+       }
+
+       return tx;
+}
+
+/**
+ * do_sync_gen_syndrome - synchronously calculate a raid6 syndrome
+ */
+static void
+do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
+                    size_t len, struct async_submit_ctl *submit)
+{
+       void **srcs;
+       int i;
+
+       if (submit->scribble)
+               srcs = submit->scribble;
+       else
+               srcs = (void **) blocks;
+
+       for (i = 0; i < disks; i++) {
+               if (is_raid6_zero_block(blocks[i])) {
+                       BUG_ON(i > disks - 3); /* P or Q can't be zero */
+                       srcs[i] = blocks[i];
+               } else
+                       srcs[i] = page_address(blocks[i]) + offset;
+       }
+       raid6_call.gen_syndrome(disks, len, srcs);
+       async_tx_sync_epilog(submit);
+}
+
+/**
+ * async_gen_syndrome - asynchronously calculate a raid6 syndrome
+ * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
+ * @offset: common offset into each block (src and dest) to start transaction
+ * @disks: number of blocks (including missing P or Q, see below)
+ * @len: length of operation in bytes
+ * @submit: submission/completion modifiers
+ *
+ * General note: This routine assumes a field of GF(2^8) with a
+ * primitive polynomial of 0x11d and a generator of {02}.
+ *
+ * 'disks' note: callers can optionally omit either P or Q (but not
+ * both) from the calculation by setting blocks[disks-2] or
+ * blocks[disks-1] to NULL.  When P or Q is omitted 'len' must be <=
+ * PAGE_SIZE as a temporary buffer of this size is used in the
+ * synchronous path.  'disks' always accounts for both destination
+ * buffers.
+ *
+ * 'blocks' note: if submit->scribble is NULL then the contents of
+ * 'blocks' may be overridden
+ */
+struct dma_async_tx_descriptor *
+async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
+                  size_t len, struct async_submit_ctl *submit)
+{
+       int src_cnt = disks - 2;
+       struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+                                                     &P(blocks, disks), 2,
+                                                     blocks, src_cnt, len);
+       struct dma_device *device = chan ? chan->device : NULL;
+       dma_addr_t *dma_src = NULL;
+
+       BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
+
+       if (submit->scribble)
+               dma_src = submit->scribble;
+       else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+               dma_src = (dma_addr_t *) blocks;
+
+       if (dma_src && device &&
+           (src_cnt <= dma_maxpq(device, 0) ||
+            dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
+           is_dma_pq_aligned(device, offset, 0, len)) {
+               /* run the p+q asynchronously */
+               pr_debug("%s: (async) disks: %d len: %zu\n",
+                        __func__, disks, len);
+               return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset,
+                                            disks, len, dma_src, submit);
+       }
+
+       /* run the pq synchronously */
+       pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);
+
+       /* wait for any prerequisite operations */
+       async_tx_quiesce(&submit->depend_tx);
+
+       if (!P(blocks, disks)) {
+               P(blocks, disks) = scribble;
+               BUG_ON(len + offset > PAGE_SIZE);
+       }
+       if (!Q(blocks, disks)) {
+               Q(blocks, disks) = scribble;
+               BUG_ON(len + offset > PAGE_SIZE);
+       }
+       do_sync_gen_syndrome(blocks, offset, disks, len, submit);
+
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(async_gen_syndrome);
+
+/**
+ * async_syndrome_val - asynchronously validate a raid6 syndrome
+ * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
+ * @offset: common offset into each block (src and dest) to start transaction
+ * @disks: number of blocks (including missing P or Q, see below)
+ * @len: length of operation in bytes
+ * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set
+ * @spare: temporary result buffer for the synchronous case
+ * @submit: submission / completion modifiers
+ *
+ * The same notes from async_gen_syndrome apply to the 'blocks',
+ * and 'disks' parameters of this routine.  The synchronous path
+ * requires a temporary result buffer and submit->scribble to be
+ * specified.
+ */
+struct dma_async_tx_descriptor *
+async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
+                  size_t len, enum sum_check_flags *pqres, struct page *spare,
+                  struct async_submit_ctl *submit)
+{
+       struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL,
+                                                     NULL, 0,  blocks, disks,
+                                                     len);
+       struct dma_device *device = chan ? chan->device : NULL;
+       struct dma_async_tx_descriptor *tx;
+       enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+       dma_addr_t *dma_src = NULL;
+
+       BUG_ON(disks < 4);
+
+       if (submit->scribble)
+               dma_src = submit->scribble;
+       else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+               dma_src = (dma_addr_t *) blocks;
+
+       if (dma_src && device && disks <= dma_maxpq(device, 0) &&
+           is_dma_pq_aligned(device, offset, 0, len)) {
+               struct device *dev = device->dev;
+               dma_addr_t *pq = &dma_src[disks-2];
+               int i;
+
+               pr_debug("%s: (async) disks: %d len: %zu\n",
+                        __func__, disks, len);
+               if (!P(blocks, disks))
+                       dma_flags |= DMA_PREP_PQ_DISABLE_P;
+               if (!Q(blocks, disks))
+                       dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+               if (submit->flags & ASYNC_TX_FENCE)
+                       dma_flags |= DMA_PREP_FENCE;
+               for (i = 0; i < disks; i++)
+                       if (likely(blocks[i])) {
+                               BUG_ON(is_raid6_zero_block(blocks[i]));
+                               dma_src[i] = dma_map_page(dev, blocks[i],
+                                                         offset, len,
+                                                         DMA_TO_DEVICE);
+                       }
+
+               for (;;) {
+                       tx = device->device_prep_dma_pq_val(chan, pq, dma_src,
+                                                           disks - 2,
+                                                           raid6_gfexp,
+                                                           len, pqres,
+                                                           dma_flags);
+                       if (likely(tx))
+                               break;
+                       async_tx_quiesce(&submit->depend_tx);
+                       dma_async_issue_pending(chan);
+               }
+               async_tx_submit(chan, tx, submit);
+
+               return tx;
+       } else {
+               struct page *p_src = P(blocks, disks);
+               struct page *q_src = Q(blocks, disks);
+               enum async_tx_flags flags_orig = submit->flags;
+               dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+               void *scribble = submit->scribble;
+               void *cb_param_orig = submit->cb_param;
+               void *p, *q, *s;
+
+               pr_debug("%s: (sync) disks: %d len: %zu\n",
+                        __func__, disks, len);
+
+               /* caller must provide a temporary result buffer and
+                * allow the input parameters to be preserved
+                */
+               BUG_ON(!spare || !scribble);
+
+               /* wait for any prerequisite operations */
+               async_tx_quiesce(&submit->depend_tx);
+
+               /* recompute p and/or q into the temporary buffer and then
+                * check to see the result matches the current value
+                */
+               tx = NULL;
+               *pqres = 0;
+               if (p_src) {
+                       init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL,
+                                         NULL, NULL, scribble);
+                       tx = async_xor(spare, blocks, offset, disks-2, len, submit);
+                       async_tx_quiesce(&tx);
+                       p = page_address(p_src) + offset;
+                       s = page_address(spare) + offset;
+                       *pqres |= !!memcmp(p, s, len) << SUM_CHECK_P;
+               }
+
+               if (q_src) {
+                       P(blocks, disks) = NULL;
+                       Q(blocks, disks) = spare;
+                       init_async_submit(submit, 0, NULL, NULL, NULL, scribble);
+                       tx = async_gen_syndrome(blocks, offset, disks, len, submit);
+                       async_tx_quiesce(&tx);
+                       q = page_address(q_src) + offset;
+                       s = page_address(spare) + offset;
+                       *pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q;
+               }
+
+               /* restore P, Q and submit */
+               P(blocks, disks) = p_src;
+               Q(blocks, disks) = q_src;
+
+               submit->cb_fn = cb_fn_orig;
+               submit->cb_param = cb_param_orig;
+               submit->flags = flags_orig;
+               async_tx_sync_epilog(submit);
+
+               return NULL;
+       }
+}
+EXPORT_SYMBOL_GPL(async_syndrome_val);
+
+static int __init async_pq_init(void)
+{
+       scribble = alloc_page(GFP_KERNEL);
+
+       if (scribble)
+               return 0;
+
+       pr_err("%s: failed to allocate required spare page\n", __func__);
+
+       return -ENOMEM;
+}
+
+static void __exit async_pq_exit(void)
+{
+       put_page(scribble);
+}
+
+module_init(async_pq_init);
+module_exit(async_pq_exit);
+
+MODULE_DESCRIPTION("asynchronous raid6 syndrome generation/validation");
+MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
new file mode 100644 (file)
index 0000000..6d73dde
--- /dev/null
@@ -0,0 +1,468 @@
+/*
+ * Asynchronous RAID-6 recovery calculations ASYNC_TX API.
+ * Copyright(c) 2009 Intel Corporation
+ *
+ * based on raid6recov.c:
+ *   Copyright 2002 H. Peter Anvin
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/raid/pq.h>
+#include <linux/async_tx.h>
+
+static struct dma_async_tx_descriptor *
+async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
+                 size_t len, struct async_submit_ctl *submit)
+{
+       struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+                                                     &dest, 1, srcs, 2, len);
+       struct dma_device *dma = chan ? chan->device : NULL;
+       const u8 *amul, *bmul;
+       u8 ax, bx;
+       u8 *a, *b, *c;
+
+       if (dma) {
+               dma_addr_t dma_dest[2];
+               dma_addr_t dma_src[2];
+               struct device *dev = dma->dev;
+               struct dma_async_tx_descriptor *tx;
+               enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
+
+               if (submit->flags & ASYNC_TX_FENCE)
+                       dma_flags |= DMA_PREP_FENCE;
+               dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+               dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
+               dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
+               tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef,
+                                            len, dma_flags);
+               if (tx) {
+                       async_tx_submit(chan, tx, submit);
+                       return tx;
+               }
+
+               /* could not get a descriptor, unmap and fall through to
+                * the synchronous path
+                */
+               dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
+               dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
+               dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE);
+       }
+
+       /* run the operation synchronously */
+       async_tx_quiesce(&submit->depend_tx);
+       amul = raid6_gfmul[coef[0]];
+       bmul = raid6_gfmul[coef[1]];
+       a = page_address(srcs[0]);
+       b = page_address(srcs[1]);
+       c = page_address(dest);
+
+       while (len--) {
+               ax    = amul[*a++];
+               bx    = bmul[*b++];
+               *c++ = ax ^ bx;
+       }
+
+       return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
+          struct async_submit_ctl *submit)
+{
+       struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+                                                     &dest, 1, &src, 1, len);
+       struct dma_device *dma = chan ? chan->device : NULL;
+       const u8 *qmul; /* Q multiplier table */
+       u8 *d, *s;
+
+       if (dma) {
+               dma_addr_t dma_dest[2];
+               dma_addr_t dma_src[1];
+               struct device *dev = dma->dev;
+               struct dma_async_tx_descriptor *tx;
+               enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
+
+               if (submit->flags & ASYNC_TX_FENCE)
+                       dma_flags |= DMA_PREP_FENCE;
+               dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+               dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
+               tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
+                                            len, dma_flags);
+               if (tx) {
+                       async_tx_submit(chan, tx, submit);
+                       return tx;
+               }
+
+               /* could not get a descriptor, unmap and fall through to
+                * the synchronous path
+                */
+               dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
+               dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
+       }
+
+       /* no channel available, or failed to allocate a descriptor, so
+        * perform the operation synchronously
+        */
+       async_tx_quiesce(&submit->depend_tx);
+       qmul  = raid6_gfmul[coef];
+       d = page_address(dest);
+       s = page_address(src);
+
+       while (len--)
+               *d++ = qmul[*s++];
+
+       return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
+             struct async_submit_ctl *submit)
+{
+       struct dma_async_tx_descriptor *tx = NULL;
+       struct page *p, *q, *a, *b;
+       struct page *srcs[2];
+       unsigned char coef[2];
+       enum async_tx_flags flags = submit->flags;
+       dma_async_tx_callback cb_fn = submit->cb_fn;
+       void *cb_param = submit->cb_param;
+       void *scribble = submit->scribble;
+
+       p = blocks[4-2];
+       q = blocks[4-1];
+
+       a = blocks[faila];
+       b = blocks[failb];
+
+       /* in the 4 disk case P + Pxy == P and Q + Qxy == Q */
+       /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+       srcs[0] = p;
+       srcs[1] = q;
+       coef[0] = raid6_gfexi[failb-faila];
+       coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+       init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+       tx = async_sum_product(b, srcs, coef, bytes, submit);
+
+       /* Dy = P+Pxy+Dx */
+       srcs[0] = p;
+       srcs[1] = b;
+       init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn,
+                         cb_param, scribble);
+       tx = async_xor(a, srcs, 0, 2, bytes, submit);
+
+       return tx;
+
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
+             struct async_submit_ctl *submit)
+{
+       struct dma_async_tx_descriptor *tx = NULL;
+       struct page *p, *q, *g, *dp, *dq;
+       struct page *srcs[2];
+       unsigned char coef[2];
+       enum async_tx_flags flags = submit->flags;
+       dma_async_tx_callback cb_fn = submit->cb_fn;
+       void *cb_param = submit->cb_param;
+       void *scribble = submit->scribble;
+       int uninitialized_var(good);
+       int i;
+
+       for (i = 0; i < 3; i++) {
+               if (i == faila || i == failb)
+                       continue;
+               else {
+                       good = i;
+                       break;
+               }
+       }
+       BUG_ON(i >= 3);
+
+       p = blocks[5-2];
+       q = blocks[5-1];
+       g = blocks[good];
+
+       /* Compute syndrome with zero for the missing data pages
+        * Use the dead data pages as temporary storage for delta p and
+        * delta q
+        */
+       dp = blocks[faila];
+       dq = blocks[failb];
+
+       init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+       tx = async_memcpy(dp, g, 0, 0, bytes, submit);
+       init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+       tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
+
+       /* compute P + Pxy */
+       srcs[0] = dp;
+       srcs[1] = p;
+       init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+                         NULL, NULL, scribble);
+       tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+       /* compute Q + Qxy */
+       srcs[0] = dq;
+       srcs[1] = q;
+       init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+                         NULL, NULL, scribble);
+       tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+       /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+       srcs[0] = dp;
+       srcs[1] = dq;
+       coef[0] = raid6_gfexi[failb-faila];
+       coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+       init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+       tx = async_sum_product(dq, srcs, coef, bytes, submit);
+
+       /* Dy = P+Pxy+Dx */
+       srcs[0] = dp;
+       srcs[1] = dq;
+       init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+                         cb_param, scribble);
+       tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+       return tx;
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_n(int disks, size_t bytes, int faila, int failb,
+             struct page **blocks, struct async_submit_ctl *submit)
+{
+       struct dma_async_tx_descriptor *tx = NULL;
+       struct page *p, *q, *dp, *dq;
+       struct page *srcs[2];
+       unsigned char coef[2];
+       enum async_tx_flags flags = submit->flags;
+       dma_async_tx_callback cb_fn = submit->cb_fn;
+       void *cb_param = submit->cb_param;
+       void *scribble = submit->scribble;
+
+       p = blocks[disks-2];
+       q = blocks[disks-1];
+
+       /* Compute syndrome with zero for the missing data pages
+        * Use the dead data pages as temporary storage for
+        * delta p and delta q
+        */
+       dp = blocks[faila];
+       blocks[faila] = (void *)raid6_empty_zero_page;
+       blocks[disks-2] = dp;
+       dq = blocks[failb];
+       blocks[failb] = (void *)raid6_empty_zero_page;
+       blocks[disks-1] = dq;
+
+       init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+       tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
+
+       /* Restore pointer table */
+       blocks[faila]   = dp;
+       blocks[failb]   = dq;
+       blocks[disks-2] = p;
+       blocks[disks-1] = q;
+
+       /* compute P + Pxy */
+       srcs[0] = dp;
+       srcs[1] = p;
+       init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+                         NULL, NULL, scribble);
+       tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+       /* compute Q + Qxy */
+       srcs[0] = dq;
+       srcs[1] = q;
+       init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+                         NULL, NULL, scribble);
+       tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+       /* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+       srcs[0] = dp;
+       srcs[1] = dq;
+       coef[0] = raid6_gfexi[failb-faila];
+       coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+       init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+       tx = async_sum_product(dq, srcs, coef, bytes, submit);
+
+       /* Dy = P+Pxy+Dx */
+       srcs[0] = dp;
+       srcs[1] = dq;
+       init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+                         cb_param, scribble);
+       tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+       return tx;
+}
+
+/**
+ * async_raid6_2data_recov - asynchronously calculate two missing data blocks
+ * @disks: number of disks in the RAID-6 array
+ * @bytes: block size
+ * @faila: first failed drive index
+ * @failb: second failed drive index
+ * @blocks: array of source pointers where the last two entries are p and q
+ * @submit: submission/completion modifiers
+ */
+struct dma_async_tx_descriptor *
+async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
+                       struct page **blocks, struct async_submit_ctl *submit)
+{
+       BUG_ON(faila == failb);
+       if (failb < faila)
+               swap(faila, failb);
+
+       pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
+
+       /* we need to preserve the contents of 'blocks' for the async
+        * case, so punt to synchronous if a scribble buffer is not available
+        */
+       if (!submit->scribble) {
+               void **ptrs = (void **) blocks;
+               int i;
+
+               async_tx_quiesce(&submit->depend_tx);
+               for (i = 0; i < disks; i++)
+                       ptrs[i] = page_address(blocks[i]);
+
+               raid6_2data_recov(disks, bytes, faila, failb, ptrs);
+
+               async_tx_sync_epilog(submit);
+
+               return NULL;
+       }
+
+       switch (disks) {
+       case 4:
+               /* dma devices do not uniformly understand a zero source pq
+                * operation (in contrast to the synchronous case), so
+                * explicitly handle the 4 disk special case
+                */
+               return __2data_recov_4(bytes, faila, failb, blocks, submit);
+       case 5:
+               /* dma devices do not uniformly understand a single
+                * source pq operation (in contrast to the synchronous
+                * case), so explicitly handle the 5 disk special case
+                */
+               return __2data_recov_5(bytes, faila, failb, blocks, submit);
+       default:
+               return __2data_recov_n(disks, bytes, faila, failb, blocks, submit);
+       }
+}
+EXPORT_SYMBOL_GPL(async_raid6_2data_recov);
+
+/**
+ * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block
+ * @disks: number of disks in the RAID-6 array
+ * @bytes: block size
+ * @faila: failed drive index
+ * @blocks: array of source pointers where the last two entries are p and q
+ * @submit: submission/completion modifiers
+ */
+struct dma_async_tx_descriptor *
+async_raid6_datap_recov(int disks, size_t bytes, int faila,
+                       struct page **blocks, struct async_submit_ctl *submit)
+{
+       struct dma_async_tx_descriptor *tx = NULL;
+       struct page *p, *q, *dq;
+       u8 coef;
+       enum async_tx_flags flags = submit->flags;
+       dma_async_tx_callback cb_fn = submit->cb_fn;
+       void *cb_param = submit->cb_param;
+       void *scribble = submit->scribble;
+       struct page *srcs[2];
+
+       pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
+
+       /* we need to preserve the contents of 'blocks' for the async
+        * case, so punt to synchronous if a scribble buffer is not available
+        */
+       if (!scribble) {
+               void **ptrs = (void **) blocks;
+               int i;
+
+               async_tx_quiesce(&submit->depend_tx);
+               for (i = 0; i < disks; i++)
+                       ptrs[i] = page_address(blocks[i]);
+
+               raid6_datap_recov(disks, bytes, faila, ptrs);
+
+               async_tx_sync_epilog(submit);
+
+               return NULL;
+       }
+
+       p = blocks[disks-2];
+       q = blocks[disks-1];
+
+       /* Compute syndrome with zero for the missing data page
+        * Use the dead data page as temporary storage for delta q
+        */
+       dq = blocks[faila];
+       blocks[faila] = (void *)raid6_empty_zero_page;
+       blocks[disks-1] = dq;
+
+       /* in the 4 disk case we only need to perform a single source
+        * multiplication
+        */
+       if (disks == 4) {
+               int good = faila == 0 ? 1 : 0;
+               struct page *g = blocks[good];
+
+               init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+                                 scribble);
+               tx = async_memcpy(p, g, 0, 0, bytes, submit);
+
+               init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+                                 scribble);
+               tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
+       } else {
+               init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+                                 scribble);
+               tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
+       }
+
+       /* Restore pointer table */
+       blocks[faila]   = dq;
+       blocks[disks-1] = q;
+
+       /* calculate g^{-faila} */
+       coef = raid6_gfinv[raid6_gfexp[faila]];
+
+       srcs[0] = dq;
+       srcs[1] = q;
+       init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+                         NULL, NULL, scribble);
+       tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+       init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
+       tx = async_mult(dq, dq, coef, bytes, submit);
+
+       srcs[0] = p;
+       srcs[1] = dq;
+       init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+                         cb_param, scribble);
+       tx = async_xor(p, srcs, 0, 2, bytes, submit);
+
+       return tx;
+}
+EXPORT_SYMBOL_GPL(async_raid6_datap_recov);
+
+MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
+MODULE_DESCRIPTION("asynchronous RAID-6 recovery api");
+MODULE_LICENSE("GPL");
index 06eb6cc..f9cdf04 100644 (file)
@@ -42,16 +42,21 @@ static void __exit async_tx_exit(void)
        async_dmaengine_put();
 }
 
+module_init(async_tx_init);
+module_exit(async_tx_exit);
+
 /**
  * __async_tx_find_channel - find a channel to carry out the operation or let
  *     the transaction execute synchronously
- * @depend_tx: transaction dependency
+ * @submit: transaction dependency and submission modifiers
  * @tx_type: transaction type
  */
 struct dma_chan *
-__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
-       enum dma_transaction_type tx_type)
+__async_tx_find_channel(struct async_submit_ctl *submit,
+                       enum dma_transaction_type tx_type)
 {
+       struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
+
        /* see if we can keep the chain on one channel */
        if (depend_tx &&
            dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
@@ -59,17 +64,6 @@ __async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
        return async_dma_find_channel(tx_type);
 }
 EXPORT_SYMBOL_GPL(__async_tx_find_channel);
-#else
-static int __init async_tx_init(void)
-{
-       printk(KERN_INFO "async_tx: api initialized (sync-only)\n");
-       return 0;
-}
-
-static void __exit async_tx_exit(void)
-{
-       do { } while (0);
-}
 #endif
 
 
@@ -83,10 +77,14 @@ static void
 async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
                        struct dma_async_tx_descriptor *tx)
 {
-       struct dma_chan *chan;
-       struct dma_device *device;
+       struct dma_chan *chan = depend_tx->chan;
+       struct dma_device *device = chan->device;
        struct dma_async_tx_descriptor *intr_tx = (void *) ~0;
 
+       #ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
+       BUG();
+       #endif
+
        /* first check to see if we can still append to depend_tx */
        spin_lock_bh(&depend_tx->lock);
        if (depend_tx->parent && depend_tx->chan == tx->chan) {
@@ -96,11 +94,11 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
        }
        spin_unlock_bh(&depend_tx->lock);
 
-       if (!intr_tx)
+       /* attached dependency, flush the parent channel */
+       if (!intr_tx) {
+               device->device_issue_pending(chan);
                return;
-
-       chan = depend_tx->chan;
-       device = chan->device;
+       }
 
        /* see if we can schedule an interrupt
         * otherwise poll for completion
@@ -134,6 +132,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
                        intr_tx->tx_submit(intr_tx);
                        async_tx_ack(intr_tx);
                }
+               device->device_issue_pending(chan);
        } else {
                if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
                        panic("%s: DMA_ERROR waiting for depend_tx\n",
@@ -144,13 +143,14 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
 
 
 /**
- * submit_disposition - while holding depend_tx->lock we must avoid submitting
- *     new operations to prevent a circular locking dependency with
- *     drivers that already hold a channel lock when calling
- *     async_tx_run_dependencies.
+ * submit_disposition - flags for routing an incoming operation
  * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock
  * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch
  * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly
+ *
+ * while holding depend_tx->lock we must avoid submitting new operations
+ * to prevent a circular locking dependency with drivers that already
+ * hold a channel lock when calling async_tx_run_dependencies.
  */
 enum submit_disposition {
        ASYNC_TX_SUBMITTED,
@@ -160,11 +160,12 @@ enum submit_disposition {
 
 void
 async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
-       enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_param)
+               struct async_submit_ctl *submit)
 {
-       tx->callback = cb_fn;
-       tx->callback_param = cb_param;
+       struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
+
+       tx->callback = submit->cb_fn;
+       tx->callback_param = submit->cb_param;
 
        if (depend_tx) {
                enum submit_disposition s;
@@ -220,30 +221,29 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
                tx->tx_submit(tx);
        }
 
-       if (flags & ASYNC_TX_ACK)
+       if (submit->flags & ASYNC_TX_ACK)
                async_tx_ack(tx);
 
-       if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
+       if (depend_tx)
                async_tx_ack(depend_tx);
 }
 EXPORT_SYMBOL_GPL(async_tx_submit);
 
 /**
- * async_trigger_callback - schedules the callback function to be run after
- * any dependent operations have been completed.
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
- * @depend_tx: 'callback' requires the completion of this transaction
- * @cb_fn: function to call after depend_tx completes
- * @cb_param: parameter to pass to the callback routine
+ * async_trigger_callback - schedules the callback function to be run
+ * @submit: submission and completion parameters
+ *
+ * honored flags: ASYNC_TX_ACK
+ *
+ * The callback is run after any dependent operations have completed.
  */
 struct dma_async_tx_descriptor *
-async_trigger_callback(enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_param)
+async_trigger_callback(struct async_submit_ctl *submit)
 {
        struct dma_chan *chan;
        struct dma_device *device;
        struct dma_async_tx_descriptor *tx;
+       struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
 
        if (depend_tx) {
                chan = depend_tx->chan;
@@ -262,14 +262,14 @@ async_trigger_callback(enum async_tx_flags flags,
        if (tx) {
                pr_debug("%s: (async)\n", __func__);
 
-               async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+               async_tx_submit(chan, tx, submit);
        } else {
                pr_debug("%s: (sync)\n", __func__);
 
                /* wait for any prerequisite operations */
-               async_tx_quiesce(&depend_tx);
+               async_tx_quiesce(&submit->depend_tx);
 
-               async_tx_sync_epilog(cb_fn, cb_param);
+               async_tx_sync_epilog(submit);
        }
 
        return tx;
@@ -295,9 +295,6 @@ void async_tx_quiesce(struct dma_async_tx_descriptor **tx)
 }
 EXPORT_SYMBOL_GPL(async_tx_quiesce);
 
-module_init(async_tx_init);
-module_exit(async_tx_exit);
-
 MODULE_AUTHOR("Intel Corporation");
 MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API");
 MODULE_LICENSE("GPL");
index 90dd3f8..b459a90 100644 (file)
 /* do_async_xor - dma map the pages and perform the xor with an engine */
 static __async_inline struct dma_async_tx_descriptor *
 do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
-            unsigned int offset, int src_cnt, size_t len,
-            enum async_tx_flags flags,
-            struct dma_async_tx_descriptor *depend_tx,
-            dma_async_tx_callback cb_fn, void *cb_param)
+            unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src,
+            struct async_submit_ctl *submit)
 {
        struct dma_device *dma = chan->device;
-       dma_addr_t *dma_src = (dma_addr_t *) src_list;
        struct dma_async_tx_descriptor *tx = NULL;
        int src_off = 0;
        int i;
-       dma_async_tx_callback _cb_fn;
-       void *_cb_param;
-       enum async_tx_flags async_flags;
+       dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+       void *cb_param_orig = submit->cb_param;
+       enum async_tx_flags flags_orig = submit->flags;
        enum dma_ctrl_flags dma_flags;
        int xor_src_cnt;
        dma_addr_t dma_dest;
@@ -63,25 +60,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
        }
 
        while (src_cnt) {
-               async_flags = flags;
+               submit->flags = flags_orig;
                dma_flags = 0;
-               xor_src_cnt = min(src_cnt, dma->max_xor);
+               xor_src_cnt = min(src_cnt, (int)dma->max_xor);
                /* if we are submitting additional xors, leave the chain open,
                 * clear the callback parameters, and leave the destination
                 * buffer mapped
                 */
                if (src_cnt > xor_src_cnt) {
-                       async_flags &= ~ASYNC_TX_ACK;
+                       submit->flags &= ~ASYNC_TX_ACK;
+                       submit->flags |= ASYNC_TX_FENCE;
                        dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
-                       _cb_fn = NULL;
-                       _cb_param = NULL;
+                       submit->cb_fn = NULL;
+                       submit->cb_param = NULL;
                } else {
-                       _cb_fn = cb_fn;
-                       _cb_param = cb_param;
+                       submit->cb_fn = cb_fn_orig;
+                       submit->cb_param = cb_param_orig;
                }
-               if (_cb_fn)
+               if (submit->cb_fn)
                        dma_flags |= DMA_PREP_INTERRUPT;
-
+               if (submit->flags & ASYNC_TX_FENCE)
+                       dma_flags |= DMA_PREP_FENCE;
                /* Since we have clobbered the src_list we are committed
                 * to doing this asynchronously.  Drivers force forward progress
                 * in case they can not provide a descriptor
@@ -90,7 +89,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
                                              xor_src_cnt, len, dma_flags);
 
                if (unlikely(!tx))
-                       async_tx_quiesce(&depend_tx);
+                       async_tx_quiesce(&submit->depend_tx);
 
                /* spin wait for the preceeding transactions to complete */
                while (unlikely(!tx)) {
@@ -101,11 +100,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
                                                      dma_flags);
                }
 
-               async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn,
-                               _cb_param);
-
-               depend_tx = tx;
-               flags |= ASYNC_TX_DEP_ACK;
+               async_tx_submit(chan, tx, submit);
+               submit->depend_tx = tx;
 
                if (src_cnt > xor_src_cnt) {
                        /* drop completed sources */
@@ -124,23 +120,27 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 
 static void
 do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
-           int src_cnt, size_t len, enum async_tx_flags flags,
-           dma_async_tx_callback cb_fn, void *cb_param)
+           int src_cnt, size_t len, struct async_submit_ctl *submit)
 {
        int i;
        int xor_src_cnt;
        int src_off = 0;
        void *dest_buf;
-       void **srcs = (void **) src_list;
+       void **srcs;
+
+       if (submit->scribble)
+               srcs = submit->scribble;
+       else
+               srcs = (void **) src_list;
 
-       /* reuse the 'src_list' array to convert to buffer pointers */
+       /* convert to buffer pointers */
        for (i = 0; i < src_cnt; i++)
                srcs[i] = page_address(src_list[i]) + offset;
 
        /* set destination address */
        dest_buf = page_address(dest) + offset;
 
-       if (flags & ASYNC_TX_XOR_ZERO_DST)
+       if (submit->flags & ASYNC_TX_XOR_ZERO_DST)
                memset(dest_buf, 0, len);
 
        while (src_cnt > 0) {
@@ -153,61 +153,70 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
                src_off += xor_src_cnt;
        }
 
-       async_tx_sync_epilog(cb_fn, cb_param);
+       async_tx_sync_epilog(submit);
 }
 
 /**
  * async_xor - attempt to xor a set of blocks with a dma engine.
- *     xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST
- *     flag must be set to not include dest data in the calculation.  The
- *     assumption with dma eninges is that they only use the destination
- *     buffer as a source when it is explicity specified in the source list.
  * @dest: destination page
- * @src_list: array of source pages (if the dest is also a source it must be
- *     at index zero).  The contents of this array may be overwritten.
- * @offset: offset in pages to start transaction
+ * @src_list: array of source pages
+ * @offset: common src/dst offset to start transaction
  * @src_cnt: number of source pages
  * @len: length in bytes
- * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST,
- *     ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
- * @depend_tx: xor depends on the result of this transaction.
- * @cb_fn: function to call when the xor completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST
+ *
+ * xor_blocks always uses the dest as a source so the
+ * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
+ * the calculation.  The assumption with dma eninges is that they only
+ * use the destination buffer as a source when it is explicity specified
+ * in the source list.
+ *
+ * src_list note: if the dest is also a source it must be at index zero.
+ * The contents of this array will be overwritten if a scribble region
+ * is not specified.
  */
 struct dma_async_tx_descriptor *
 async_xor(struct page *dest, struct page **src_list, unsigned int offset,
-       int src_cnt, size_t len, enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_param)
+         int src_cnt, size_t len, struct async_submit_ctl *submit)
 {
-       struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR,
+       struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
                                                      &dest, 1, src_list,
                                                      src_cnt, len);
+       dma_addr_t *dma_src = NULL;
+
        BUG_ON(src_cnt <= 1);
 
-       if (chan) {
+       if (submit->scribble)
+               dma_src = submit->scribble;
+       else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+               dma_src = (dma_addr_t *) src_list;
+
+       if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) {
                /* run the xor asynchronously */
                pr_debug("%s (async): len: %zu\n", __func__, len);
 
                return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
-                                   flags, depend_tx, cb_fn, cb_param);
+                                   dma_src, submit);
        } else {
                /* run the xor synchronously */
                pr_debug("%s (sync): len: %zu\n", __func__, len);
+               WARN_ONCE(chan, "%s: no space for dma address conversion\n",
+                         __func__);
 
                /* in the sync case the dest is an implied source
                 * (assumes the dest is the first source)
                 */
-               if (flags & ASYNC_TX_XOR_DROP_DST) {
+               if (submit->flags & ASYNC_TX_XOR_DROP_DST) {
                        src_cnt--;
                        src_list++;
                }
 
                /* wait for any prerequisite operations */
-               async_tx_quiesce(&depend_tx);
+               async_tx_quiesce(&submit->depend_tx);
 
-               do_sync_xor(dest, src_list, offset, src_cnt, len,
-                           flags, cb_fn, cb_param);
+               do_sync_xor(dest, src_list, offset, src_cnt, len, submit);
 
                return NULL;
        }
@@ -222,104 +231,94 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len)
 }
 
 /**
- * async_xor_zero_sum - attempt a xor parity check with a dma engine.
+ * async_xor_val - attempt a xor parity check with a dma engine.
  * @dest: destination page used if the xor is performed synchronously
- * @src_list: array of source pages.  The dest page must be listed as a source
- *     at index zero.  The contents of this array may be overwritten.
+ * @src_list: array of source pages
  * @offset: offset in pages to start transaction
  * @src_cnt: number of source pages
  * @len: length in bytes
  * @result: 0 if sum == 0 else non-zero
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
- * @depend_tx: xor depends on the result of this transaction.
- * @cb_fn: function to call when the xor completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK
+ *
+ * src_list note: if the dest is also a source it must be at index zero.
+ * The contents of this array will be overwritten if a scribble region
+ * is not specified.
  */
 struct dma_async_tx_descriptor *
-async_xor_zero_sum(struct page *dest, struct page **src_list,
-       unsigned int offset, int src_cnt, size_t len,
-       u32 *result, enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_param)
+async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
+             int src_cnt, size_t len, enum sum_check_flags *result,
+             struct async_submit_ctl *submit)
 {
-       struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM,
+       struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL,
                                                      &dest, 1, src_list,
                                                      src_cnt, len);
        struct dma_device *device = chan ? chan->device : NULL;
        struct dma_async_tx_descriptor *tx = NULL;
+       dma_addr_t *dma_src = NULL;
 
        BUG_ON(src_cnt <= 1);
 
-       if (device && src_cnt <= device->max_xor) {
-               dma_addr_t *dma_src = (dma_addr_t *) src_list;
-               unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+       if (submit->scribble)
+               dma_src = submit->scribble;
+       else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+               dma_src = (dma_addr_t *) src_list;
+
+       if (dma_src && device && src_cnt <= device->max_xor &&
+           is_dma_xor_aligned(device, offset, 0, len)) {
+               unsigned long dma_prep_flags = 0;
                int i;
 
                pr_debug("%s: (async) len: %zu\n", __func__, len);
 
+               if (submit->cb_fn)
+                       dma_prep_flags |= DMA_PREP_INTERRUPT;
+               if (submit->flags & ASYNC_TX_FENCE)
+                       dma_prep_flags |= DMA_PREP_FENCE;
                for (i = 0; i < src_cnt; i++)
                        dma_src[i] = dma_map_page(device->dev, src_list[i],
                                                  offset, len, DMA_TO_DEVICE);
 
-               tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt,
-                                                     len, result,
-                                                     dma_prep_flags);
+               tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt,
+                                                    len, result,
+                                                    dma_prep_flags);
                if (unlikely(!tx)) {
-                       async_tx_quiesce(&depend_tx);
+                       async_tx_quiesce(&submit->depend_tx);
 
                        while (!tx) {
                                dma_async_issue_pending(chan);
-                               tx = device->device_prep_dma_zero_sum(chan,
+                               tx = device->device_prep_dma_xor_val(chan,
                                        dma_src, src_cnt, len, result,
                                        dma_prep_flags);
                        }
                }
 
-               async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+               async_tx_submit(chan, tx, submit);
        } else {
-               unsigned long xor_flags = flags;
+               enum async_tx_flags flags_orig = submit->flags;
 
                pr_debug("%s: (sync) len: %zu\n", __func__, len);
+               WARN_ONCE(device && src_cnt <= device->max_xor,
+                         "%s: no space for dma address conversion\n",
+                         __func__);
 
-               xor_flags |= ASYNC_TX_XOR_DROP_DST;
-               xor_flags &= ~ASYNC_TX_ACK;
+               submit->flags |= ASYNC_TX_XOR_DROP_DST;
+               submit->flags &= ~ASYNC_TX_ACK;
 
-               tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags,
-                       depend_tx, NULL, NULL);
+               tx = async_xor(dest, src_list, offset, src_cnt, len, submit);
 
                async_tx_quiesce(&tx);
 
-               *result = page_is_zero(dest, offset, len) ? 0 : 1;
+               *result = !page_is_zero(dest, offset, len) << SUM_CHECK_P;
 
-               async_tx_sync_epilog(cb_fn, cb_param);
+               async_tx_sync_epilog(submit);
+               submit->flags = flags_orig;
        }
 
        return tx;
 }
-EXPORT_SYMBOL_GPL(async_xor_zero_sum);
-
-static int __init async_xor_init(void)
-{
-       #ifdef CONFIG_ASYNC_TX_DMA
-       /* To conserve stack space the input src_list (array of page pointers)
-        * is reused to hold the array of dma addresses passed to the driver.
-        * This conversion is only possible when dma_addr_t is less than the
-        * the size of a pointer.  HIGHMEM64G is known to violate this
-        * assumption.
-        */
-       BUILD_BUG_ON(sizeof(dma_addr_t) > sizeof(struct page *));
-       #endif
-
-       return 0;
-}
-
-static void __exit async_xor_exit(void)
-{
-       do { } while (0);
-}
-
-module_init(async_xor_init);
-module_exit(async_xor_exit);
+EXPORT_SYMBOL_GPL(async_xor_val);
 
 MODULE_AUTHOR("Intel Corporation");
 MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api");
diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c
new file mode 100644 (file)
index 0000000..3ec27c7
--- /dev/null
@@ -0,0 +1,240 @@
+/*
+ * asynchronous raid6 recovery self test
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * based on drivers/md/raid6test/test.c:
+ *     Copyright 2002-2007 H. Peter Anvin
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#include <linux/async_tx.h>
+#include <linux/random.h>
+
+#undef pr
+#define pr(fmt, args...) pr_info("raid6test: " fmt, ##args)
+
+#define NDISKS 16 /* Including P and Q */
+
+static struct page *dataptrs[NDISKS];
+static addr_conv_t addr_conv[NDISKS];
+static struct page *data[NDISKS+3];
+static struct page *spare;
+static struct page *recovi;
+static struct page *recovj;
+
+static void callback(void *param)
+{
+       struct completion *cmp = param;
+
+       complete(cmp);
+}
+
+static void makedata(int disks)
+{
+       int i, j;
+
+       for (i = 0; i < disks; i++) {
+               for (j = 0; j < PAGE_SIZE/sizeof(u32); j += sizeof(u32)) {
+                       u32 *p = page_address(data[i]) + j;
+
+                       *p = random32();
+               }
+
+               dataptrs[i] = data[i];
+       }
+}
+
+static char disk_type(int d, int disks)
+{
+       if (d == disks - 2)
+               return 'P';
+       else if (d == disks - 1)
+               return 'Q';
+       else
+               return 'D';
+}
+
+/* Recover two failed blocks. */
+static void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, struct page **ptrs)
+{
+       struct async_submit_ctl submit;
+       struct completion cmp;
+       struct dma_async_tx_descriptor *tx = NULL;
+       enum sum_check_flags result = ~0;
+
+       if (faila > failb)
+               swap(faila, failb);
+
+       if (failb == disks-1) {
+               if (faila == disks-2) {
+                       /* P+Q failure.  Just rebuild the syndrome. */
+                       init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
+                       tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
+               } else {
+                       struct page *blocks[disks];
+                       struct page *dest;
+                       int count = 0;
+                       int i;
+
+                       /* data+Q failure.  Reconstruct data from P,
+                        * then rebuild syndrome
+                        */
+                       for (i = disks; i-- ; ) {
+                               if (i == faila || i == failb)
+                                       continue;
+                               blocks[count++] = ptrs[i];
+                       }
+                       dest = ptrs[faila];
+                       init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
+                                         NULL, NULL, addr_conv);
+                       tx = async_xor(dest, blocks, 0, count, bytes, &submit);
+
+                       init_async_submit(&submit, 0, tx, NULL, NULL, addr_conv);
+                       tx = async_gen_syndrome(ptrs, 0, disks, bytes, &submit);
+               }
+       } else {
+               if (failb == disks-2) {
+                       /* data+P failure. */
+                       init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
+                       tx = async_raid6_datap_recov(disks, bytes, faila, ptrs, &submit);
+               } else {
+                       /* data+data failure. */
+                       init_async_submit(&submit, 0, NULL, NULL, NULL, addr_conv);
+                       tx = async_raid6_2data_recov(disks, bytes, faila, failb, ptrs, &submit);
+               }
+       }
+       init_completion(&cmp);
+       init_async_submit(&submit, ASYNC_TX_ACK, tx, callback, &cmp, addr_conv);
+       tx = async_syndrome_val(ptrs, 0, disks, bytes, &result, spare, &submit);
+       async_tx_issue_pending(tx);
+
+       if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0)
+               pr("%s: timeout! (faila: %d failb: %d disks: %d)\n",
+                  __func__, faila, failb, disks);
+
+       if (result != 0)
+               pr("%s: validation failure! faila: %d failb: %d sum_check_flags: %x\n",
+                  __func__, faila, failb, result);
+}
+
+static int test_disks(int i, int j, int disks)
+{
+       int erra, errb;
+
+       memset(page_address(recovi), 0xf0, PAGE_SIZE);
+       memset(page_address(recovj), 0xba, PAGE_SIZE);
+
+       dataptrs[i] = recovi;
+       dataptrs[j] = recovj;
+
+       raid6_dual_recov(disks, PAGE_SIZE, i, j, dataptrs);
+
+       erra = memcmp(page_address(data[i]), page_address(recovi), PAGE_SIZE);
+       errb = memcmp(page_address(data[j]), page_address(recovj), PAGE_SIZE);
+
+       pr("%s(%d, %d): faila=%3d(%c)  failb=%3d(%c)  %s\n",
+          __func__, i, j, i, disk_type(i, disks), j, disk_type(j, disks),
+          (!erra && !errb) ? "OK" : !erra ? "ERRB" : !errb ? "ERRA" : "ERRAB");
+
+       dataptrs[i] = data[i];
+       dataptrs[j] = data[j];
+
+       return erra || errb;
+}
+
+static int test(int disks, int *tests)
+{
+       struct dma_async_tx_descriptor *tx;
+       struct async_submit_ctl submit;
+       struct completion cmp;
+       int err = 0;
+       int i, j;
+
+       recovi = data[disks];
+       recovj = data[disks+1];
+       spare  = data[disks+2];
+
+       makedata(disks);
+
+       /* Nuke syndromes */
+       memset(page_address(data[disks-2]), 0xee, PAGE_SIZE);
+       memset(page_address(data[disks-1]), 0xee, PAGE_SIZE);
+
+       /* Generate assumed good syndrome */
+       init_completion(&cmp);
+       init_async_submit(&submit, ASYNC_TX_ACK, NULL, callback, &cmp, addr_conv);
+       tx = async_gen_syndrome(dataptrs, 0, disks, PAGE_SIZE, &submit);
+       async_tx_issue_pending(tx);
+
+       if (wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)) == 0) {
+               pr("error: initial gen_syndrome(%d) timed out\n", disks);
+               return 1;
+       }
+
+       pr("testing the %d-disk case...\n", disks);
+       for (i = 0; i < disks-1; i++)
+               for (j = i+1; j < disks; j++) {
+                       (*tests)++;
+                       err += test_disks(i, j, disks);
+               }
+
+       return err;
+}
+
+
+static int raid6_test(void)
+{
+       int err = 0;
+       int tests = 0;
+       int i;
+
+       for (i = 0; i < NDISKS+3; i++) {
+               data[i] = alloc_page(GFP_KERNEL);
+               if (!data[i]) {
+                       while (i--)
+                               put_page(data[i]);
+                       return -ENOMEM;
+               }
+       }
+
+       /* the 4-disk and 5-disk cases are special for the recovery code */
+       if (NDISKS > 4)
+               err += test(4, &tests);
+       if (NDISKS > 5)
+               err += test(5, &tests);
+       err += test(NDISKS, &tests);
+
+       pr("\n");
+       pr("complete (%d tests, %d failure%s)\n",
+          tests, err, err == 1 ? "" : "s");
+
+       for (i = 0; i < NDISKS+3; i++)
+               put_page(data[i]);
+
+       return 0;
+}
+
+static void raid6_test_exit(void)
+{
+}
+
+/* when compiled-in wait for drivers to load first (assumes dma drivers
+ * are also compliled-in)
+ */
+late_initcall(raid6_test);
+module_exit(raid6_test_exit);
+MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
+MODULE_DESCRIPTION("asynchronous RAID-6 recovery self tests");
+MODULE_LICENSE("GPL");
index d295bdc..9335b87 100644 (file)
@@ -115,6 +115,9 @@ static const struct file_operations acpi_button_state_fops = {
        .release = single_release,
 };
 
+static BLOCKING_NOTIFIER_HEAD(acpi_lid_notifier);
+static struct acpi_device *lid_device;
+
 /* --------------------------------------------------------------------------
                               FS Interface (/proc)
    -------------------------------------------------------------------------- */
@@ -231,11 +234,38 @@ static int acpi_button_remove_fs(struct acpi_device *device)
 /* --------------------------------------------------------------------------
                                 Driver Interface
    -------------------------------------------------------------------------- */
+int acpi_lid_notifier_register(struct notifier_block *nb)
+{
+       return blocking_notifier_chain_register(&acpi_lid_notifier, nb);
+}
+EXPORT_SYMBOL(acpi_lid_notifier_register);
+
+int acpi_lid_notifier_unregister(struct notifier_block *nb)
+{
+       return blocking_notifier_chain_unregister(&acpi_lid_notifier, nb);
+}
+EXPORT_SYMBOL(acpi_lid_notifier_unregister);
+
+int acpi_lid_open(void)
+{
+       acpi_status status;
+       unsigned long long state;
+
+       status = acpi_evaluate_integer(lid_device->handle, "_LID", NULL,
+                                      &state);
+       if (ACPI_FAILURE(status))
+               return -ENODEV;
+
+       return !!state;
+}
+EXPORT_SYMBOL(acpi_lid_open);
+
 static int acpi_lid_send_state(struct acpi_device *device)
 {
        struct acpi_button *button = acpi_driver_data(device);
        unsigned long long state;
        acpi_status status;
+       int ret;
 
        status = acpi_evaluate_integer(device->handle, "_LID", NULL, &state);
        if (ACPI_FAILURE(status))
@@ -244,7 +274,12 @@ static int acpi_lid_send_state(struct acpi_device *device)
        /* input layer checks if event is redundant */
        input_report_switch(button->input, SW_LID, !state);
        input_sync(button->input);
-       return 0;
+
+       ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device);
+       if (ret == NOTIFY_DONE)
+               ret = blocking_notifier_call_chain(&acpi_lid_notifier, state,
+                                                  device);
+       return ret;
 }
 
 static void acpi_button_notify(struct acpi_device *device, u32 event)
@@ -366,8 +401,14 @@ static int acpi_button_add(struct acpi_device *device)
        error = input_register_device(input);
        if (error)
                goto err_remove_fs;
-       if (button->type == ACPI_BUTTON_TYPE_LID)
+       if (button->type == ACPI_BUTTON_TYPE_LID) {
                acpi_lid_send_state(device);
+               /*
+                * This assumes there's only one lid device, or if there are
+                * more we only care about the last one...
+                */
+               lid_device = device;
+       }
 
        if (device->wakeup.flags.valid) {
                /* Button's GPE is run-wake GPE */
index 71d1b9b..614da5b 100644 (file)
@@ -3412,7 +3412,7 @@ static int cdrom_print_info(const char *header, int val, char *info,
        return 0;
 }
 
-static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp,
+static int cdrom_sysctl_info(ctl_table *ctl, int write,
                            void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        int pos;
@@ -3489,7 +3489,7 @@ static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp,
                goto done;
 doit:
        mutex_unlock(&cdrom_mutex);
-       return proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+       return proc_dostring(ctl, write, buffer, lenp, ppos);
 done:
        printk(KERN_INFO "cdrom: info buffer too small\n");
        goto doit;
@@ -3525,12 +3525,12 @@ static void cdrom_update_settings(void)
        mutex_unlock(&cdrom_mutex);
 }
 
-static int cdrom_sysctl_handler(ctl_table *ctl, int write, struct file * filp,
+static int cdrom_sysctl_handler(ctl_table *ctl, int write,
                                void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        int ret;
        
-       ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
        if (write) {
        
index 6a06913..08a6f50 100644 (file)
@@ -1087,6 +1087,14 @@ config MMTIMER
          The mmtimer device allows direct userspace access to the
          Altix system timer.
 
+config UV_MMTIMER
+       tristate "UV_MMTIMER Memory mapped RTC for SGI UV"
+       depends on X86_UV
+       default m
+       help
+         The uv_mmtimer device allows direct userspace access to the
+         UV system timer.
+
 source "drivers/char/tpm/Kconfig"
 
 config TELCLOCK
index 66f779a..19a79dd 100644 (file)
@@ -58,6 +58,7 @@ obj-$(CONFIG_RAW_DRIVER)      += raw.o
 obj-$(CONFIG_SGI_SNSC)         += snsc.o snsc_event.o
 obj-$(CONFIG_MSPEC)            += mspec.o
 obj-$(CONFIG_MMTIMER)          += mmtimer.o
+obj-$(CONFIG_UV_MMTIMER)       += uv_mmtimer.o
 obj-$(CONFIG_VIOTAPE)          += viotape.o
 obj-$(CONFIG_HVCS)             += hvcs.o
 obj-$(CONFIG_IBM_BSR)          += bsr.o
index 1540e69..4068467 100644 (file)
@@ -46,6 +46,8 @@
 #define PCI_DEVICE_ID_INTEL_Q35_IG          0x29B2
 #define PCI_DEVICE_ID_INTEL_Q33_HB          0x29D0
 #define PCI_DEVICE_ID_INTEL_Q33_IG          0x29D2
+#define PCI_DEVICE_ID_INTEL_B43_HB          0x2E40
+#define PCI_DEVICE_ID_INTEL_B43_IG          0x2E42
 #define PCI_DEVICE_ID_INTEL_GM45_HB         0x2A40
 #define PCI_DEVICE_ID_INTEL_GM45_IG         0x2A42
 #define PCI_DEVICE_ID_INTEL_IGD_E_HB        0x2E00
@@ -91,6 +93,7 @@
                agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G45_HB || \
                agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_GM45_HB || \
                agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_G41_HB || \
+               agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_B43_HB || \
                agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_D_HB || \
                agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_M_HB || \
                agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_IGDNG_MA_HB)
@@ -804,23 +807,39 @@ static void intel_i830_setup_flush(void)
        if (!intel_private.i8xx_page)
                return;
 
-       /* make page uncached */
-       map_page_into_agp(intel_private.i8xx_page);
-
        intel_private.i8xx_flush_page = kmap(intel_private.i8xx_page);
        if (!intel_private.i8xx_flush_page)
                intel_i830_fini_flush();
 }
 
+static void
+do_wbinvd(void *null)
+{
+       wbinvd();
+}
+
+/* The chipset_flush interface needs to get data that has already been
+ * flushed out of the CPU all the way out to main memory, because the GPU
+ * doesn't snoop those buffers.
+ *
+ * The 8xx series doesn't have the same lovely interface for flushing the
+ * chipset write buffers that the later chips do. According to the 865
+ * specs, it's 64 octwords, or 1KB.  So, to get those previous things in
+ * that buffer out, we just fill 1KB and clflush it out, on the assumption
+ * that it'll push whatever was in there out.  It appears to work.
+ */
 static void intel_i830_chipset_flush(struct agp_bridge_data *bridge)
 {
        unsigned int *pg = intel_private.i8xx_flush_page;
-       int i;
 
-       for (i = 0; i < 256; i += 2)
-               *(pg + i) = i;
+       memset(pg, 0, 1024);
 
-       wmb();
+       if (cpu_has_clflush) {
+               clflush_cache_range(pg, 1024);
+       } else {
+               if (on_each_cpu(do_wbinvd, NULL, 1) != 0)
+                       printk(KERN_ERR "Timed out waiting for cache flush.\n");
+       }
 }
 
 /* The intel i830 automatically initializes the agp aperture during POST.
@@ -1341,6 +1360,7 @@ static void intel_i965_get_gtt_range(int *gtt_offset, int *gtt_size)
        case PCI_DEVICE_ID_INTEL_Q45_HB:
        case PCI_DEVICE_ID_INTEL_G45_HB:
        case PCI_DEVICE_ID_INTEL_G41_HB:
+       case PCI_DEVICE_ID_INTEL_B43_HB:
        case PCI_DEVICE_ID_INTEL_IGDNG_D_HB:
        case PCI_DEVICE_ID_INTEL_IGDNG_M_HB:
        case PCI_DEVICE_ID_INTEL_IGDNG_MA_HB:
@@ -2335,6 +2355,8 @@ static const struct intel_driver_description {
            "Q45/Q43", NULL, &intel_i965_driver },
        { PCI_DEVICE_ID_INTEL_G45_HB, PCI_DEVICE_ID_INTEL_G45_IG, 0,
            "G45/G43", NULL, &intel_i965_driver },
+       { PCI_DEVICE_ID_INTEL_B43_HB, PCI_DEVICE_ID_INTEL_B43_IG, 0,
+           "B43", NULL, &intel_i965_driver },
        { PCI_DEVICE_ID_INTEL_G41_HB, PCI_DEVICE_ID_INTEL_G41_IG, 0,
            "G41", NULL, &intel_i965_driver },
        { PCI_DEVICE_ID_INTEL_IGDNG_D_HB, PCI_DEVICE_ID_INTEL_IGDNG_D_IG, 0,
@@ -2535,6 +2557,7 @@ static struct pci_device_id agp_intel_pci_table[] = {
        ID(PCI_DEVICE_ID_INTEL_Q45_HB),
        ID(PCI_DEVICE_ID_INTEL_G45_HB),
        ID(PCI_DEVICE_ID_INTEL_G41_HB),
+       ID(PCI_DEVICE_ID_INTEL_B43_HB),
        ID(PCI_DEVICE_ID_INTEL_IGDNG_D_HB),
        ID(PCI_DEVICE_ID_INTEL_IGDNG_M_HB),
        ID(PCI_DEVICE_ID_INTEL_IGDNG_MA_HB),
index 0a01329..e3dd24b 100644 (file)
@@ -1,8 +1,7 @@
 /*
  * Blackfin On-Chip OTP Memory Interface
- *  Supports BF52x/BF54x
  *
- * Copyright 2007-2008 Analog Devices Inc.
+ * Copyright 2007-2009 Analog Devices Inc.
  *
  * Enter bugs at http://blackfin.uclinux.org/
  *
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/types.h>
+#include <mtd/mtd-abi.h>
 
 #include <asm/blackfin.h>
+#include <asm/bfrom.h>
 #include <asm/uaccess.h>
 
 #define stamp(fmt, args...) pr_debug("%s:%i: " fmt "\n", __func__, __LINE__, ## args)
 
 static DEFINE_MUTEX(bfin_otp_lock);
 
-/* OTP Boot ROM functions */
-#define _BOOTROM_OTP_COMMAND           0xEF000018
-#define _BOOTROM_OTP_READ              0xEF00001A
-#define _BOOTROM_OTP_WRITE             0xEF00001C
-
-static u32 (* const otp_command)(u32 command, u32 value) = (void *)_BOOTROM_OTP_COMMAND;
-static u32 (* const otp_read)(u32 page, u32 flags, u64 *page_content) = (void *)_BOOTROM_OTP_READ;
-static u32 (* const otp_write)(u32 page, u32 flags, u64 *page_content) = (void *)_BOOTROM_OTP_WRITE;
-
-/* otp_command(): defines for "command" */
-#define OTP_INIT             0x00000001
-#define OTP_CLOSE            0x00000002
-
-/* otp_{read,write}(): defines for "flags" */
-#define OTP_LOWER_HALF       0x00000000 /* select upper/lower 64-bit half (bit 0) */
-#define OTP_UPPER_HALF       0x00000001
-#define OTP_NO_ECC           0x00000010 /* do not use ECC */
-#define OTP_LOCK             0x00000020 /* sets page protection bit for page */
-#define OTP_ACCESS_READ      0x00001000
-#define OTP_ACCESS_READWRITE 0x00002000
-
-/* Return values for all functions */
-#define OTP_SUCCESS          0x00000000
-#define OTP_MASTER_ERROR     0x001
-#define OTP_WRITE_ERROR      0x003
-#define OTP_READ_ERROR       0x005
-#define OTP_ACC_VIO_ERROR    0x009
-#define OTP_DATA_MULT_ERROR  0x011
-#define OTP_ECC_MULT_ERROR   0x021
-#define OTP_PREV_WR_ERROR    0x041
-#define OTP_DATA_SB_WARN     0x100
-#define OTP_ECC_SB_WARN      0x200
-
 /**
  *     bfin_otp_read - Read OTP pages
  *
@@ -86,9 +54,11 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count,
        page = *pos / (sizeof(u64) * 2);
        while (bytes_done < count) {
                flags = (*pos % (sizeof(u64) * 2) ? OTP_UPPER_HALF : OTP_LOWER_HALF);
-               stamp("processing page %i (%s)", page, (flags == OTP_UPPER_HALF ? "upper" : "lower"));
-               ret = otp_read(page, flags, &content);
+               stamp("processing page %i (0x%x:%s)", page, flags,
+                       (flags & OTP_UPPER_HALF ? "upper" : "lower"));
+               ret = bfrom_OtpRead(page, flags, &content);
                if (ret & OTP_MASTER_ERROR) {
+                       stamp("error from otp: 0x%x", ret);
                        bytes_done = -EIO;
                        break;
                }
@@ -96,7 +66,7 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count,
                        bytes_done = -EFAULT;
                        break;
                }
-               if (flags == OTP_UPPER_HALF)
+               if (flags & OTP_UPPER_HALF)
                        ++page;
                bytes_done += sizeof(content);
                *pos += sizeof(content);
@@ -108,14 +78,53 @@ static ssize_t bfin_otp_read(struct file *file, char __user *buff, size_t count,
 }
 
 #ifdef CONFIG_BFIN_OTP_WRITE_ENABLE
+static bool allow_writes;
+
+/**
+ *     bfin_otp_init_timing - setup OTP timing parameters
+ *
+ *     Required before doing any write operation.  Algorithms from HRM.
+ */
+static u32 bfin_otp_init_timing(void)
+{
+       u32 tp1, tp2, tp3, timing;
+
+       tp1 = get_sclk() / 1000000;
+       tp2 = (2 * get_sclk() / 10000000) << 8;
+       tp3 = (0x1401) << 15;
+       timing = tp1 | tp2 | tp3;
+       if (bfrom_OtpCommand(OTP_INIT, timing))
+               return 0;
+
+       return timing;
+}
+
+/**
+ *     bfin_otp_deinit_timing - set timings to only allow reads
+ *
+ *     Should be called after all writes are done.
+ */
+static void bfin_otp_deinit_timing(u32 timing)
+{
+       /* mask bits [31:15] so that any attempts to write fail */
+       bfrom_OtpCommand(OTP_CLOSE, 0);
+       bfrom_OtpCommand(OTP_INIT, timing & ~(-1 << 15));
+       bfrom_OtpCommand(OTP_CLOSE, 0);
+}
+
 /**
- *     bfin_otp_write - Write OTP pages
+ *     bfin_otp_write - write OTP pages
  *
  *     All writes must be in half page chunks (half page == 64 bits).
  */
 static ssize_t bfin_otp_write(struct file *filp, const char __user *buff, size_t count, loff_t *pos)
 {
-       stampit();
+       ssize_t bytes_done;
+       u32 timing, page, base_flags, flags, ret;
+       u64 content;
+
+       if (!allow_writes)
+               return -EACCES;
 
        if (count % sizeof(u64))
                return -EMSGSIZE;
@@ -123,20 +132,96 @@ static ssize_t bfin_otp_write(struct file *filp, const char __user *buff, size_t
        if (mutex_lock_interruptible(&bfin_otp_lock))
                return -ERESTARTSYS;
 
-       /* need otp_init() documentation before this can be implemented */
+       stampit();
+
+       timing = bfin_otp_init_timing();
+       if (timing == 0) {
+               mutex_unlock(&bfin_otp_lock);
+               return -EIO;
+       }
+
+       base_flags = OTP_CHECK_FOR_PREV_WRITE;
+
+       bytes_done = 0;
+       page = *pos / (sizeof(u64) * 2);
+       while (bytes_done < count) {
+               flags = base_flags | (*pos % (sizeof(u64) * 2) ? OTP_UPPER_HALF : OTP_LOWER_HALF);
+               stamp("processing page %i (0x%x:%s) from %p", page, flags,
+                       (flags & OTP_UPPER_HALF ? "upper" : "lower"), buff + bytes_done);
+               if (copy_from_user(&content, buff + bytes_done, sizeof(content))) {
+                       bytes_done = -EFAULT;
+                       break;
+               }
+               ret = bfrom_OtpWrite(page, flags, &content);
+               if (ret & OTP_MASTER_ERROR) {
+                       stamp("error from otp: 0x%x", ret);
+                       bytes_done = -EIO;
+                       break;
+               }
+               if (flags & OTP_UPPER_HALF)
+                       ++page;
+               bytes_done += sizeof(content);
+               *pos += sizeof(content);
+       }
+
+       bfin_otp_deinit_timing(timing);
 
        mutex_unlock(&bfin_otp_lock);
 
+       return bytes_done;
+}
+
+static long bfin_otp_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
+{
+       stampit();
+
+       switch (cmd) {
+       case OTPLOCK: {
+               u32 timing;
+               int ret = -EIO;
+
+               if (!allow_writes)
+                       return -EACCES;
+
+               if (mutex_lock_interruptible(&bfin_otp_lock))
+                       return -ERESTARTSYS;
+
+               timing = bfin_otp_init_timing();
+               if (timing) {
+                       u32 otp_result = bfrom_OtpWrite(arg, OTP_LOCK, NULL);
+                       stamp("locking page %lu resulted in 0x%x", arg, otp_result);
+                       if (!(otp_result & OTP_MASTER_ERROR))
+                               ret = 0;
+
+                       bfin_otp_deinit_timing(timing);
+               }
+
+               mutex_unlock(&bfin_otp_lock);
+
+               return ret;
+       }
+
+       case MEMLOCK:
+               allow_writes = false;
+               return 0;
+
+       case MEMUNLOCK:
+               allow_writes = true;
+               return 0;
+       }
+
        return -EINVAL;
 }
 #else
 # define bfin_otp_write NULL
+# define bfin_otp_ioctl NULL
 #endif
 
 static struct file_operations bfin_otp_fops = {
-       .owner    = THIS_MODULE,
-       .read     = bfin_otp_read,
-       .write    = bfin_otp_write,
+       .owner          = THIS_MODULE,
+       .unlocked_ioctl = bfin_otp_ioctl,
+       .read           = bfin_otp_read,
+       .write          = bfin_otp_write,
 };
 
 static struct miscdevice bfin_otp_misc_device = {
index 4a9f349..70a770a 100644 (file)
@@ -166,9 +166,8 @@ static irqreturn_t hpet_interrupt(int irq, void *data)
                unsigned long m, t;
 
                t = devp->hd_ireqfreq;
-               m = read_counter(&devp->hd_hpet->hpet_mc);
-               write_counter(t + m + devp->hd_hpets->hp_delta,
-                             &devp->hd_timer->hpet_compare);
+               m = read_counter(&devp->hd_timer->hpet_compare);
+               write_counter(t + m, &devp->hd_timer->hpet_compare);
        }
 
        if (devp->hd_flags & HPET_SHARED_IRQ)
@@ -504,21 +503,25 @@ static int hpet_ioctl_ieon(struct hpet_dev *devp)
        g = v | Tn_32MODE_CNF_MASK | Tn_INT_ENB_CNF_MASK;
 
        if (devp->hd_flags & HPET_PERIODIC) {
-               write_counter(t, &timer->hpet_compare);
                g |= Tn_TYPE_CNF_MASK;
-               v |= Tn_TYPE_CNF_MASK;
-               writeq(v, &timer->hpet_config);
-               v |= Tn_VAL_SET_CNF_MASK;
+               v |= Tn_TYPE_CNF_MASK | Tn_VAL_SET_CNF_MASK;
                writeq(v, &timer->hpet_config);
                local_irq_save(flags);
 
-               /* NOTE:  what we modify here is a hidden accumulator
+               /*
+                * NOTE: First we modify the hidden accumulator
                 * register supported by periodic-capable comparators.
                 * We never want to modify the (single) counter; that
-                * would affect all the comparators.
+                * would affect all the comparators. The value written
+                * is the counter value when the first interrupt is due.
                 */
                m = read_counter(&hpet->hpet_mc);
                write_counter(t + m + hpetp->hp_delta, &timer->hpet_compare);
+               /*
+                * Then we modify the comparator, indicating the period
+                * for subsequent interrupt.
+                */
+               write_counter(t, &timer->hpet_compare);
        } else {
                local_irq_save(flags);
                m = read_counter(&hpet->hpet_mc);
index 25ce15b..a632f25 100644 (file)
@@ -678,7 +678,7 @@ int hvc_poll(struct hvc_struct *hp)
 EXPORT_SYMBOL_GPL(hvc_poll);
 
 /**
- * hvc_resize() - Update terminal window size information.
+ * __hvc_resize() - Update terminal window size information.
  * @hp:                HVC console pointer
  * @ws:                Terminal window size structure
  *
@@ -687,12 +687,12 @@ EXPORT_SYMBOL_GPL(hvc_poll);
  *
  * Locking:    Locking free; the function MUST be called holding hp->lock
  */
-void hvc_resize(struct hvc_struct *hp, struct winsize ws)
+void __hvc_resize(struct hvc_struct *hp, struct winsize ws)
 {
        hp->ws = ws;
        schedule_work(&hp->tty_resize);
 }
-EXPORT_SYMBOL_GPL(hvc_resize);
+EXPORT_SYMBOL_GPL(__hvc_resize);
 
 /*
  * This kthread is either polling or interrupt driven.  This is determined by
index 3c85d78..10950ca 100644 (file)
@@ -28,6 +28,7 @@
 #define HVC_CONSOLE_H
 #include <linux/kref.h>
 #include <linux/tty.h>
+#include <linux/spinlock.h>
 
 /*
  * This is the max number of console adapters that can/will be found as
@@ -88,7 +89,16 @@ int hvc_poll(struct hvc_struct *hp);
 void hvc_kick(void);
 
 /* Resize hvc tty terminal window */
-extern void hvc_resize(struct hvc_struct *hp, struct winsize ws);
+extern void __hvc_resize(struct hvc_struct *hp, struct winsize ws);
+
+static inline void hvc_resize(struct hvc_struct *hp, struct winsize ws)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&hp->lock, flags);
+       __hvc_resize(hp, ws);
+       spin_unlock_irqrestore(&hp->lock, flags);
+}
 
 /* default notifier for irq based notification */
 extern int notifier_add_irq(struct hvc_struct *hp, int data);
index 0ecac7e..b8a5d65 100644 (file)
@@ -273,7 +273,9 @@ static int hvc_iucv_write(struct hvc_iucv_private *priv,
        case MSG_TYPE_WINSIZE:
                if (rb->mbuf->datalen != sizeof(struct winsize))
                        break;
-               hvc_resize(priv->hvc, *((struct winsize *) rb->mbuf->data));
+               /* The caller must ensure that the hvc is locked, which
+                * is the case when called from hvc_iucv_get_chars() */
+               __hvc_resize(priv->hvc, *((struct winsize *) rb->mbuf->data));
                break;
 
        case MSG_TYPE_ERROR:    /* ignored ... */
index 0aede1d..6c8b65d 100644 (file)
@@ -690,7 +690,7 @@ static ssize_t read_zero(struct file * file, char __user * buf,
 
                if (chunk > PAGE_SIZE)
                        chunk = PAGE_SIZE;      /* Just for latency reasons */
-               unwritten = clear_user(buf, chunk);
+               unwritten = __clear_user(buf, chunk);
                written += chunk - unwritten;
                if (unwritten)
                        break;
index 94ad2c3..a4ec50c 100644 (file)
@@ -281,12 +281,6 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd,
                case IOCTL_MW_REGISTER_IPC: {
                        unsigned int ipcnum = (unsigned int) ioarg;
        
-                       PRINTK_3(TRACE_MWAVE,
-                               "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC"
-                               " ipcnum %x entry usIntCount %x\n",
-                               ipcnum,
-                               pDrvData->IPCs[ipcnum].usIntCount);
-       
                        if (ipcnum >= ARRAY_SIZE(pDrvData->IPCs)) {
                                PRINTK_ERROR(KERN_ERR_MWAVE
                                                "mwavedd::mwave_ioctl:"
@@ -295,6 +289,12 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd,
                                                ipcnum);
                                return -EINVAL;
                        }
+                       PRINTK_3(TRACE_MWAVE,
+                               "mwavedd::mwave_ioctl IOCTL_MW_REGISTER_IPC"
+                               " ipcnum %x entry usIntCount %x\n",
+                               ipcnum,
+                               pDrvData->IPCs[ipcnum].usIntCount);
+
                        lock_kernel();
                        pDrvData->IPCs[ipcnum].bIsHere = FALSE;
                        pDrvData->IPCs[ipcnum].bIsEnabled = TRUE;
@@ -310,11 +310,6 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd,
                case IOCTL_MW_GET_IPC: {
                        unsigned int ipcnum = (unsigned int) ioarg;
        
-                       PRINTK_3(TRACE_MWAVE,
-                               "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC"
-                               " ipcnum %x, usIntCount %x\n",
-                               ipcnum,
-                               pDrvData->IPCs[ipcnum].usIntCount);
                        if (ipcnum >= ARRAY_SIZE(pDrvData->IPCs)) {
                                PRINTK_ERROR(KERN_ERR_MWAVE
                                                "mwavedd::mwave_ioctl:"
@@ -322,6 +317,11 @@ static long mwave_ioctl(struct file *file, unsigned int iocmd,
                                                " Invalid ipcnum %x\n", ipcnum);
                                return -EINVAL;
                        }
+                       PRINTK_3(TRACE_MWAVE,
+                               "mwavedd::mwave_ioctl IOCTL_MW_GET_IPC"
+                               " ipcnum %x, usIntCount %x\n",
+                               ipcnum,
+                               pDrvData->IPCs[ipcnum].usIntCount);
        
                        lock_kernel();
                        if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) {
index d8a9255..04b505e 100644 (file)
@@ -1231,7 +1231,7 @@ static char sysctl_bootid[16];
  * as an ASCII string in the standard UUID format.  If accesses via the
  * sysctl system call, it is returned as 16 bytes of binary data.
  */
-static int proc_do_uuid(ctl_table *table, int write, struct file *filp,
+static int proc_do_uuid(ctl_table *table, int write,
                        void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        ctl_table fake_table;
@@ -1254,7 +1254,7 @@ static int proc_do_uuid(ctl_table *table, int write, struct file *filp,
        fake_table.data = buf;
        fake_table.maxlen = sizeof(buf);
 
-       return proc_dostring(&fake_table, write, filp, buffer, lenp, ppos);
+       return proc_dostring(&fake_table, write, buffer, lenp, ppos);
 }
 
 static int uuid_strategy(ctl_table *table,
index eecee0f..7433955 100644 (file)
@@ -873,7 +873,7 @@ int riocontrol(struct rio_info *p, dev_t dev, int cmd, unsigned long arg, int su
                /*
                 ** It is important that the product code is an unsigned object!
                 */
-               if (DownLoad.ProductCode > MAX_PRODUCT) {
+               if (DownLoad.ProductCode >= MAX_PRODUCT) {
                        rio_dprintk(RIO_DEBUG_CTRL, "RIO_DOWNLOAD: Bad product code %d passed\n", DownLoad.ProductCode);
                        p->RIOError.Error = NO_SUCH_PRODUCT;
                        return -ENXIO;
diff --git a/drivers/char/uv_mmtimer.c b/drivers/char/uv_mmtimer.c
new file mode 100644 (file)
index 0000000..867b67b
--- /dev/null
@@ -0,0 +1,216 @@
+/*
+ * Timer device implementation for SGI UV platform.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2009 Silicon Graphics, Inc.  All rights reserved.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/ioctl.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/mmtimer.h>
+#include <linux/miscdevice.h>
+#include <linux/posix-timers.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/math64.h>
+#include <linux/smp_lock.h>
+
+#include <asm/genapic.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/bios.h>
+#include <asm/uv/uv.h>
+
+MODULE_AUTHOR("Dimitri Sivanich <sivanich@sgi.com>");
+MODULE_DESCRIPTION("SGI UV Memory Mapped RTC Timer");
+MODULE_LICENSE("GPL");
+
+/* name of the device, usually in /dev */
+#define UV_MMTIMER_NAME "mmtimer"
+#define UV_MMTIMER_DESC "SGI UV Memory Mapped RTC Timer"
+#define UV_MMTIMER_VERSION "1.0"
+
+static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd,
+                                               unsigned long arg);
+static int uv_mmtimer_mmap(struct file *file, struct vm_area_struct *vma);
+
+/*
+ * Period in femtoseconds (10^-15 s)
+ */
+static unsigned long uv_mmtimer_femtoperiod;
+
+static const struct file_operations uv_mmtimer_fops = {
+       .owner = THIS_MODULE,
+       .mmap = uv_mmtimer_mmap,
+       .unlocked_ioctl = uv_mmtimer_ioctl,
+};
+
+/**
+ * uv_mmtimer_ioctl - ioctl interface for /dev/uv_mmtimer
+ * @file: file structure for the device
+ * @cmd: command to execute
+ * @arg: optional argument to command
+ *
+ * Executes the command specified by @cmd.  Returns 0 for success, < 0 for
+ * failure.
+ *
+ * Valid commands:
+ *
+ * %MMTIMER_GETOFFSET - Should return the offset (relative to the start
+ * of the page where the registers are mapped) for the counter in question.
+ *
+ * %MMTIMER_GETRES - Returns the resolution of the clock in femto (10^-15)
+ * seconds
+ *
+ * %MMTIMER_GETFREQ - Copies the frequency of the clock in Hz to the address
+ * specified by @arg
+ *
+ * %MMTIMER_GETBITS - Returns the number of bits in the clock's counter
+ *
+ * %MMTIMER_MMAPAVAIL - Returns 1 if registers can be mmap'd into userspace
+ *
+ * %MMTIMER_GETCOUNTER - Gets the current value in the counter and places it
+ * in the address specified by @arg.
+ */
+static long uv_mmtimer_ioctl(struct file *file, unsigned int cmd,
+                                               unsigned long arg)
+{
+       int ret = 0;
+
+       switch (cmd) {
+       case MMTIMER_GETOFFSET: /* offset of the counter */
+               /*
+                * UV RTC register is on its own page
+                */
+               if (PAGE_SIZE <= (1 << 16))
+                       ret = ((UV_LOCAL_MMR_BASE | UVH_RTC) & (PAGE_SIZE-1))
+                               / 8;
+               else
+                       ret = -ENOSYS;
+               break;
+
+       case MMTIMER_GETRES: /* resolution of the clock in 10^-15 s */
+               if (copy_to_user((unsigned long __user *)arg,
+                               &uv_mmtimer_femtoperiod, sizeof(unsigned long)))
+                       ret = -EFAULT;
+               break;
+
+       case MMTIMER_GETFREQ: /* frequency in Hz */
+               if (copy_to_user((unsigned long __user *)arg,
+                               &sn_rtc_cycles_per_second,
+                               sizeof(unsigned long)))
+                       ret = -EFAULT;
+               break;
+
+       case MMTIMER_GETBITS: /* number of bits in the clock */
+               ret = hweight64(UVH_RTC_REAL_TIME_CLOCK_MASK);
+               break;
+
+       case MMTIMER_MMAPAVAIL: /* can we mmap the clock into userspace? */
+               ret = (PAGE_SIZE <= (1 << 16)) ? 1 : 0;
+               break;
+
+       case MMTIMER_GETCOUNTER:
+               if (copy_to_user((unsigned long __user *)arg,
+                               (unsigned long *)uv_local_mmr_address(UVH_RTC),
+                               sizeof(unsigned long)))
+                       ret = -EFAULT;
+               break;
+       default:
+               ret = -ENOTTY;
+               break;
+       }
+       return ret;
+}
+
+/**
+ * uv_mmtimer_mmap - maps the clock's registers into userspace
+ * @file: file structure for the device
+ * @vma: VMA to map the registers into
+ *
+ * Calls remap_pfn_range() to map the clock's registers into
+ * the calling process' address space.
+ */
+static int uv_mmtimer_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       unsigned long uv_mmtimer_addr;
+
+       if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+               return -EINVAL;
+
+       if (vma->vm_flags & VM_WRITE)
+               return -EPERM;
+
+       if (PAGE_SIZE > (1 << 16))
+               return -ENOSYS;
+
+       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+       uv_mmtimer_addr = UV_LOCAL_MMR_BASE | UVH_RTC;
+       uv_mmtimer_addr &= ~(PAGE_SIZE - 1);
+       uv_mmtimer_addr &= 0xfffffffffffffffUL;
+
+       if (remap_pfn_range(vma, vma->vm_start, uv_mmtimer_addr >> PAGE_SHIFT,
+                                       PAGE_SIZE, vma->vm_page_prot)) {
+               printk(KERN_ERR "remap_pfn_range failed in uv_mmtimer_mmap\n");
+               return -EAGAIN;
+       }
+
+       return 0;
+}
+
+static struct miscdevice uv_mmtimer_miscdev = {
+       MISC_DYNAMIC_MINOR,
+       UV_MMTIMER_NAME,
+       &uv_mmtimer_fops
+};
+
+
+/**
+ * uv_mmtimer_init - device initialization routine
+ *
+ * Does initial setup for the uv_mmtimer device.
+ */
+static int __init uv_mmtimer_init(void)
+{
+       if (!is_uv_system()) {
+               printk(KERN_ERR "%s: Hardware unsupported\n", UV_MMTIMER_NAME);
+               return -1;
+       }
+
+       /*
+        * Sanity check the cycles/sec variable
+        */
+       if (sn_rtc_cycles_per_second < 100000) {
+               printk(KERN_ERR "%s: unable to determine clock frequency\n",
+                      UV_MMTIMER_NAME);
+               return -1;
+       }
+
+       uv_mmtimer_femtoperiod = ((unsigned long)1E15 +
+                               sn_rtc_cycles_per_second / 2) /
+                               sn_rtc_cycles_per_second;
+
+       if (misc_register(&uv_mmtimer_miscdev)) {
+               printk(KERN_ERR "%s: failed to register device\n",
+                      UV_MMTIMER_NAME);
+               return -1;
+       }
+
+       printk(KERN_INFO "%s: v%s, %ld MHz\n", UV_MMTIMER_DESC,
+               UV_MMTIMER_VERSION,
+               sn_rtc_cycles_per_second/(unsigned long)1E6);
+
+       return 0;
+}
+
+module_init(uv_mmtimer_init);
index 25b743a..52e6bb7 100644 (file)
@@ -28,7 +28,7 @@
 #include <linux/device.h>
 #include <linux/dca.h>
 
-#define DCA_VERSION "1.8"
+#define DCA_VERSION "1.12.1"
 
 MODULE_VERSION(DCA_VERSION);
 MODULE_LICENSE("GPL");
@@ -36,20 +36,92 @@ MODULE_AUTHOR("Intel Corporation");
 
 static DEFINE_SPINLOCK(dca_lock);
 
-static LIST_HEAD(dca_providers);
+static LIST_HEAD(dca_domains);
 
-static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
+static struct pci_bus *dca_pci_rc_from_dev(struct device *dev)
 {
-       struct dca_provider *dca, *ret = NULL;
+       struct pci_dev *pdev = to_pci_dev(dev);
+       struct pci_bus *bus = pdev->bus;
 
-       list_for_each_entry(dca, &dca_providers, node) {
-               if ((!dev) || (dca->ops->dev_managed(dca, dev))) {
-                       ret = dca;
-                       break;
-               }
+       while (bus->parent)
+               bus = bus->parent;
+
+       return bus;
+}
+
+static struct dca_domain *dca_allocate_domain(struct pci_bus *rc)
+{
+       struct dca_domain *domain;
+
+       domain = kzalloc(sizeof(*domain), GFP_NOWAIT);
+       if (!domain)
+               return NULL;
+
+       INIT_LIST_HEAD(&domain->dca_providers);
+       domain->pci_rc = rc;
+
+       return domain;
+}
+
+static void dca_free_domain(struct dca_domain *domain)
+{
+       list_del(&domain->node);
+       kfree(domain);
+}
+
+static struct dca_domain *dca_find_domain(struct pci_bus *rc)
+{
+       struct dca_domain *domain;
+
+       list_for_each_entry(domain, &dca_domains, node)
+               if (domain->pci_rc == rc)
+                       return domain;
+
+       return NULL;
+}
+
+static struct dca_domain *dca_get_domain(struct device *dev)
+{
+       struct pci_bus *rc;
+       struct dca_domain *domain;
+
+       rc = dca_pci_rc_from_dev(dev);
+       domain = dca_find_domain(rc);
+
+       if (!domain) {
+               domain = dca_allocate_domain(rc);
+               if (domain)
+                       list_add(&domain->node, &dca_domains);
+       }
+
+       return domain;
+}
+
+static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
+{
+       struct dca_provider *dca;
+       struct pci_bus *rc;
+       struct dca_domain *domain;
+
+       if (dev) {
+               rc = dca_pci_rc_from_dev(dev);
+               domain = dca_find_domain(rc);
+               if (!domain)
+                       return NULL;
+       } else {
+               if (!list_empty(&dca_domains))
+                       domain = list_first_entry(&dca_domains,
+                                                 struct dca_domain,
+                                                 node);
+               else
+                       return NULL;
        }
 
-       return ret;
+       list_for_each_entry(dca, &domain->dca_providers, node)
+               if ((!dev) || (dca->ops->dev_managed(dca, dev)))
+                       return dca;
+
+       return NULL;
 }
 
 /**
@@ -61,6 +133,8 @@ int dca_add_requester(struct device *dev)
        struct dca_provider *dca;
        int err, slot = -ENODEV;
        unsigned long flags;
+       struct pci_bus *pci_rc;
+       struct dca_domain *domain;
 
        if (!dev)
                return -EFAULT;
@@ -74,7 +148,14 @@ int dca_add_requester(struct device *dev)
                return -EEXIST;
        }
 
-       list_for_each_entry(dca, &dca_providers, node) {
+       pci_rc = dca_pci_rc_from_dev(dev);
+       domain = dca_find_domain(pci_rc);
+       if (!domain) {
+               spin_unlock_irqrestore(&dca_lock, flags);
+               return -ENODEV;
+       }
+
+       list_for_each_entry(dca, &domain->dca_providers, node) {
                slot = dca->ops->add_requester(dca, dev);
                if (slot >= 0)
                        break;
@@ -222,13 +303,19 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev)
 {
        int err;
        unsigned long flags;
+       struct dca_domain *domain;
 
        err = dca_sysfs_add_provider(dca, dev);
        if (err)
                return err;
 
        spin_lock_irqsave(&dca_lock, flags);
-       list_add(&dca->node, &dca_providers);
+       domain = dca_get_domain(dev);
+       if (!domain) {
+               spin_unlock_irqrestore(&dca_lock, flags);
+               return -ENODEV;
+       }
+       list_add(&dca->node, &domain->dca_providers);
        spin_unlock_irqrestore(&dca_lock, flags);
 
        blocking_notifier_call_chain(&dca_provider_chain,
@@ -241,15 +328,24 @@ EXPORT_SYMBOL_GPL(register_dca_provider);
  * unregister_dca_provider - remove a dca provider
  * @dca - struct created by alloc_dca_provider()
  */
-void unregister_dca_provider(struct dca_provider *dca)
+void unregister_dca_provider(struct dca_provider *dca, struct device *dev)
 {
        unsigned long flags;
+       struct pci_bus *pci_rc;
+       struct dca_domain *domain;
 
        blocking_notifier_call_chain(&dca_provider_chain,
                                     DCA_PROVIDER_REMOVE, NULL);
 
        spin_lock_irqsave(&dca_lock, flags);
+
        list_del(&dca->node);
+
+       pci_rc = dca_pci_rc_from_dev(dev);
+       domain = dca_find_domain(pci_rc);
+       if (list_empty(&domain->dca_providers))
+               dca_free_domain(domain);
+
        spin_unlock_irqrestore(&dca_lock, flags);
 
        dca_sysfs_remove_provider(dca);
@@ -276,7 +372,7 @@ EXPORT_SYMBOL_GPL(dca_unregister_notify);
 
 static int __init dca_init(void)
 {
-       printk(KERN_ERR "dca service started, version %s\n", DCA_VERSION);
+       pr_info("dca service started, version %s\n", DCA_VERSION);
        return dca_sysfs_init();
 }
 
index 81e1020..5903a88 100644 (file)
@@ -17,11 +17,15 @@ if DMADEVICES
 
 comment "DMA Devices"
 
+config ASYNC_TX_DISABLE_CHANNEL_SWITCH
+       bool
+
 config INTEL_IOATDMA
        tristate "Intel I/OAT DMA support"
        depends on PCI && X86
        select DMA_ENGINE
        select DCA
+       select ASYNC_TX_DISABLE_CHANNEL_SWITCH
        help
          Enable support for the Intel(R) I/OAT DMA engine present
          in recent Intel Xeon chipsets.
@@ -97,6 +101,14 @@ config TXX9_DMAC
          Support the TXx9 SoC internal DMA controller.  This can be
          integrated in chips such as the Toshiba TX4927/38/39.
 
+config SH_DMAE
+       tristate "Renesas SuperH DMAC support"
+       depends on SUPERH && SH_DMA
+       depends on !SH_DMA_API
+       select DMA_ENGINE
+       help
+         Enable support for the Renesas SuperH DMA controllers.
+
 config DMA_ENGINE
        bool
 
@@ -116,7 +128,7 @@ config NET_DMA
 
 config ASYNC_TX_DMA
        bool "Async_tx: Offload support for the async_tx api"
-       depends on DMA_ENGINE && !HIGHMEM64G
+       depends on DMA_ENGINE
        help
          This allows the async_tx api to take advantage of offload engines for
          memcpy, memset, xor, and raid6 p+q operations.  If your platform has
index 40e1e00..eca71ba 100644 (file)
@@ -1,8 +1,7 @@
 obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
 obj-$(CONFIG_NET_DMA) += iovlock.o
 obj-$(CONFIG_DMATEST) += dmatest.o
-obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
-ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o
+obj-$(CONFIG_INTEL_IOATDMA) += ioat/
 obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
 obj-$(CONFIG_FSL_DMA) += fsldma.o
 obj-$(CONFIG_MV_XOR) += mv_xor.o
@@ -10,3 +9,4 @@ obj-$(CONFIG_DW_DMAC) += dw_dmac.o
 obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
 obj-$(CONFIG_MX3_IPU) += ipu/
 obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
+obj-$(CONFIG_SH_DMAE) += shdma.o
index c8522e6..7585c41 100644 (file)
@@ -87,6 +87,7 @@ static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan,
        desc = dma_pool_alloc(atdma->dma_desc_pool, gfp_flags, &phys);
        if (desc) {
                memset(desc, 0, sizeof(struct at_desc));
+               INIT_LIST_HEAD(&desc->tx_list);
                dma_async_tx_descriptor_init(&desc->txd, chan);
                /* txd.flags will be overwritten in prep functions */
                desc->txd.flags = DMA_CTRL_ACK;
@@ -150,11 +151,11 @@ static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc)
                struct at_desc *child;
 
                spin_lock_bh(&atchan->lock);
-               list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+               list_for_each_entry(child, &desc->tx_list, desc_node)
                        dev_vdbg(chan2dev(&atchan->chan_common),
                                        "moving child desc %p to freelist\n",
                                        child);
-               list_splice_init(&desc->txd.tx_list, &atchan->free_list);
+               list_splice_init(&desc->tx_list, &atchan->free_list);
                dev_vdbg(chan2dev(&atchan->chan_common),
                         "moving desc %p to freelist\n", desc);
                list_add(&desc->desc_node, &atchan->free_list);
@@ -247,30 +248,33 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
        param = txd->callback_param;
 
        /* move children to free_list */
-       list_splice_init(&txd->tx_list, &atchan->free_list);
+       list_splice_init(&desc->tx_list, &atchan->free_list);
        /* move myself to free_list */
        list_move(&desc->desc_node, &atchan->free_list);
 
        /* unmap dma addresses */
-       if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-               if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-                       dma_unmap_single(chan2parent(&atchan->chan_common),
-                                       desc->lli.daddr,
-                                       desc->len, DMA_FROM_DEVICE);
-               else
-                       dma_unmap_page(chan2parent(&atchan->chan_common),
-                                       desc->lli.daddr,
-                                       desc->len, DMA_FROM_DEVICE);
-       }
-       if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-               if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-                       dma_unmap_single(chan2parent(&atchan->chan_common),
-                                       desc->lli.saddr,
-                                       desc->len, DMA_TO_DEVICE);
-               else
-                       dma_unmap_page(chan2parent(&atchan->chan_common),
-                                       desc->lli.saddr,
-                                       desc->len, DMA_TO_DEVICE);
+       if (!atchan->chan_common.private) {
+               struct device *parent = chan2parent(&atchan->chan_common);
+               if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+                       if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+                               dma_unmap_single(parent,
+                                               desc->lli.daddr,
+                                               desc->len, DMA_FROM_DEVICE);
+                       else
+                               dma_unmap_page(parent,
+                                               desc->lli.daddr,
+                                               desc->len, DMA_FROM_DEVICE);
+               }
+               if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+                       if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+                               dma_unmap_single(parent,
+                                               desc->lli.saddr,
+                                               desc->len, DMA_TO_DEVICE);
+                       else
+                               dma_unmap_page(parent,
+                                               desc->lli.saddr,
+                                               desc->len, DMA_TO_DEVICE);
+               }
        }
 
        /*
@@ -334,7 +338,7 @@ static void atc_cleanup_descriptors(struct at_dma_chan *atchan)
                        /* This one is currently in progress */
                        return;
 
-               list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+               list_for_each_entry(child, &desc->tx_list, desc_node)
                        if (!(child->lli.ctrla & ATC_DONE))
                                /* Currently in progress */
                                return;
@@ -407,7 +411,7 @@ static void atc_handle_error(struct at_dma_chan *atchan)
        dev_crit(chan2dev(&atchan->chan_common),
                        "  cookie: %d\n", bad_desc->txd.cookie);
        atc_dump_lli(atchan, &bad_desc->lli);
-       list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node)
+       list_for_each_entry(child, &bad_desc->tx_list, desc_node)
                atc_dump_lli(atchan, &child->lli);
 
        /* Pretend the descriptor completed successfully */
@@ -587,7 +591,7 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
                        prev->lli.dscr = desc->txd.phys;
                        /* insert the link descriptor to the LD ring */
                        list_add_tail(&desc->desc_node,
-                                       &first->txd.tx_list);
+                                       &first->tx_list);
                }
                prev = desc;
        }
@@ -646,8 +650,6 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
        reg_width = atslave->reg_width;
 
-       sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction);
-
        ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla;
        ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN;
 
@@ -687,7 +689,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
                                prev->lli.dscr = desc->txd.phys;
                                /* insert the link descriptor to the LD ring */
                                list_add_tail(&desc->desc_node,
-                                               &first->txd.tx_list);
+                                               &first->tx_list);
                        }
                        prev = desc;
                        total_len += len;
@@ -729,7 +731,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
                                prev->lli.dscr = desc->txd.phys;
                                /* insert the link descriptor to the LD ring */
                                list_add_tail(&desc->desc_node,
-                                               &first->txd.tx_list);
+                                               &first->tx_list);
                        }
                        prev = desc;
                        total_len += len;
index 4c972af..495457e 100644 (file)
@@ -165,6 +165,7 @@ struct at_desc {
        struct at_lli                   lli;
 
        /* THEN values for driver housekeeping */
+       struct list_head                tx_list;
        struct dma_async_tx_descriptor  txd;
        struct list_head                desc_node;
        size_t                          len;
index 5a87384..bd0b248 100644 (file)
@@ -608,6 +608,40 @@ void dmaengine_put(void)
 }
 EXPORT_SYMBOL(dmaengine_put);
 
+static bool device_has_all_tx_types(struct dma_device *device)
+{
+       /* A device that satisfies this test has channels that will never cause
+        * an async_tx channel switch event as all possible operation types can
+        * be handled.
+        */
+       #ifdef CONFIG_ASYNC_TX_DMA
+       if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask))
+               return false;
+       #endif
+
+       #if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE)
+       if (!dma_has_cap(DMA_MEMCPY, device->cap_mask))
+               return false;
+       #endif
+
+       #if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE)
+       if (!dma_has_cap(DMA_MEMSET, device->cap_mask))
+               return false;
+       #endif
+
+       #if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE)
+       if (!dma_has_cap(DMA_XOR, device->cap_mask))
+               return false;
+       #endif
+
+       #if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE)
+       if (!dma_has_cap(DMA_PQ, device->cap_mask))
+               return false;
+       #endif
+
+       return true;
+}
+
 static int get_dma_id(struct dma_device *device)
 {
        int rc;
@@ -644,8 +678,12 @@ int dma_async_device_register(struct dma_device *device)
                !device->device_prep_dma_memcpy);
        BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
                !device->device_prep_dma_xor);
-       BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) &&
-               !device->device_prep_dma_zero_sum);
+       BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) &&
+               !device->device_prep_dma_xor_val);
+       BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) &&
+               !device->device_prep_dma_pq);
+       BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
+               !device->device_prep_dma_pq_val);
        BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
                !device->device_prep_dma_memset);
        BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
@@ -661,6 +699,12 @@ int dma_async_device_register(struct dma_device *device)
        BUG_ON(!device->device_issue_pending);
        BUG_ON(!device->dev);
 
+       /* note: this only matters in the
+        * CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case
+        */
+       if (device_has_all_tx_types(device))
+               dma_cap_set(DMA_ASYNC_TX, device->cap_mask);
+
        idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
        if (!idr_ref)
                return -ENOMEM;
@@ -933,55 +977,29 @@ void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
 {
        tx->chan = chan;
        spin_lock_init(&tx->lock);
-       INIT_LIST_HEAD(&tx->tx_list);
 }
 EXPORT_SYMBOL(dma_async_tx_descriptor_init);
 
 /* dma_wait_for_async_tx - spin wait for a transaction to complete
  * @tx: in-flight transaction to wait on
- *
- * This routine assumes that tx was obtained from a call to async_memcpy,
- * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped
- * and submitted).  Walking the parent chain is only meant to cover for DMA
- * drivers that do not implement the DMA_INTERRUPT capability and may race with
- * the driver's descriptor cleanup routine.
  */
 enum dma_status
 dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
 {
-       enum dma_status status;
-       struct dma_async_tx_descriptor *iter;
-       struct dma_async_tx_descriptor *parent;
+       unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
 
        if (!tx)
                return DMA_SUCCESS;
 
-       WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for"
-                 " %s\n", __func__, dma_chan_name(tx->chan));
-
-       /* poll through the dependency chain, return when tx is complete */
-       do {
-               iter = tx;
-
-               /* find the root of the unsubmitted dependency chain */
-               do {
-                       parent = iter->parent;
-                       if (!parent)
-                               break;
-                       else
-                               iter = parent;
-               } while (parent);
-
-               /* there is a small window for ->parent == NULL and
-                * ->cookie == -EBUSY
-                */
-               while (iter->cookie == -EBUSY)
-                       cpu_relax();
-
-               status = dma_sync_wait(iter->chan, iter->cookie);
-       } while (status == DMA_IN_PROGRESS || (iter != tx));
-
-       return status;
+       while (tx->cookie == -EBUSY) {
+               if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
+                       pr_err("%s timeout waiting for descriptor submission\n",
+                               __func__);
+                       return DMA_ERROR;
+               }
+               cpu_relax();
+       }
+       return dma_sync_wait(tx->chan, tx->cookie);
 }
 EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
 
index d93017f..a32a4cf 100644 (file)
@@ -48,6 +48,11 @@ module_param(xor_sources, uint, S_IRUGO);
 MODULE_PARM_DESC(xor_sources,
                "Number of xor source buffers (default: 3)");
 
+static unsigned int pq_sources = 3;
+module_param(pq_sources, uint, S_IRUGO);
+MODULE_PARM_DESC(pq_sources,
+               "Number of p+q source buffers (default: 3)");
+
 /*
  * Initialization patterns. All bytes in the source buffer has bit 7
  * set, all bytes in the destination buffer has bit 7 cleared.
@@ -232,6 +237,7 @@ static int dmatest_func(void *data)
        dma_cookie_t            cookie;
        enum dma_status         status;
        enum dma_ctrl_flags     flags;
+       u8                      pq_coefs[pq_sources];
        int                     ret;
        int                     src_cnt;
        int                     dst_cnt;
@@ -248,6 +254,11 @@ static int dmatest_func(void *data)
        else if (thread->type == DMA_XOR) {
                src_cnt = xor_sources | 1; /* force odd to ensure dst = src */
                dst_cnt = 1;
+       } else if (thread->type == DMA_PQ) {
+               src_cnt = pq_sources | 1; /* force odd to ensure dst = src */
+               dst_cnt = 2;
+               for (i = 0; i < pq_sources; i++)
+                       pq_coefs[i] = 1;
        } else
                goto err_srcs;
 
@@ -283,6 +294,7 @@ static int dmatest_func(void *data)
                dma_addr_t dma_dsts[dst_cnt];
                struct completion cmp;
                unsigned long tmo = msecs_to_jiffies(3000);
+               u8 align = 0;
 
                total_tests++;
 
@@ -290,6 +302,18 @@ static int dmatest_func(void *data)
                src_off = dmatest_random() % (test_buf_size - len + 1);
                dst_off = dmatest_random() % (test_buf_size - len + 1);
 
+               /* honor alignment restrictions */
+               if (thread->type == DMA_MEMCPY)
+                       align = dev->copy_align;
+               else if (thread->type == DMA_XOR)
+                       align = dev->xor_align;
+               else if (thread->type == DMA_PQ)
+                       align = dev->pq_align;
+
+               len = (len >> align) << align;
+               src_off = (src_off >> align) << align;
+               dst_off = (dst_off >> align) << align;
+
                dmatest_init_srcs(thread->srcs, src_off, len);
                dmatest_init_dsts(thread->dsts, dst_off, len);
 
@@ -306,6 +330,7 @@ static int dmatest_func(void *data)
                                                     DMA_BIDIRECTIONAL);
                }
 
+
                if (thread->type == DMA_MEMCPY)
                        tx = dev->device_prep_dma_memcpy(chan,
                                                         dma_dsts[0] + dst_off,
@@ -316,6 +341,15 @@ static int dmatest_func(void *data)
                                                      dma_dsts[0] + dst_off,
                                                      dma_srcs, xor_sources,
                                                      len, flags);
+               else if (thread->type == DMA_PQ) {
+                       dma_addr_t dma_pq[dst_cnt];
+
+                       for (i = 0; i < dst_cnt; i++)
+                               dma_pq[i] = dma_dsts[i] + dst_off;
+                       tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs,
+                                                    pq_sources, pq_coefs,
+                                                    len, flags);
+               }
 
                if (!tx) {
                        for (i = 0; i < src_cnt; i++)
@@ -459,6 +493,8 @@ static int dmatest_add_threads(struct dmatest_chan *dtc, enum dma_transaction_ty
                op = "copy";
        else if (type == DMA_XOR)
                op = "xor";
+       else if (type == DMA_PQ)
+               op = "pq";
        else
                return -EINVAL;
 
@@ -514,6 +550,10 @@ static int dmatest_add_channel(struct dma_chan *chan)
                cnt = dmatest_add_threads(dtc, DMA_XOR);
                thread_count += cnt > 0 ? cnt : 0;
        }
+       if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+               cnt = dmatest_add_threads(dtc, DMA_PQ);
+               thread_count += cnt > 0 ?: 0;
+       }
 
        pr_info("dmatest: Started %u threads using %s\n",
                thread_count, dma_chan_name(chan));
index 933c143..2eea823 100644 (file)
@@ -116,7 +116,7 @@ static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc)
 {
        struct dw_desc  *child;
 
-       list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+       list_for_each_entry(child, &desc->tx_list, desc_node)
                dma_sync_single_for_cpu(chan2parent(&dwc->chan),
                                child->txd.phys, sizeof(child->lli),
                                DMA_TO_DEVICE);
@@ -137,11 +137,11 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
                dwc_sync_desc_for_cpu(dwc, desc);
 
                spin_lock_bh(&dwc->lock);
-               list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+               list_for_each_entry(child, &desc->tx_list, desc_node)
                        dev_vdbg(chan2dev(&dwc->chan),
                                        "moving child desc %p to freelist\n",
                                        child);
-               list_splice_init(&desc->txd.tx_list, &dwc->free_list);
+               list_splice_init(&desc->tx_list, &dwc->free_list);
                dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc);
                list_add(&desc->desc_node, &dwc->free_list);
                spin_unlock_bh(&dwc->lock);
@@ -209,19 +209,28 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc)
        param = txd->callback_param;
 
        dwc_sync_desc_for_cpu(dwc, desc);
-       list_splice_init(&txd->tx_list, &dwc->free_list);
+       list_splice_init(&desc->tx_list, &dwc->free_list);
        list_move(&desc->desc_node, &dwc->free_list);
 
-       /*
-        * We use dma_unmap_page() regardless of how the buffers were
-        * mapped before they were submitted...
-        */
-       if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP))
-               dma_unmap_page(chan2parent(&dwc->chan), desc->lli.dar,
-                              desc->len, DMA_FROM_DEVICE);
-       if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP))
-               dma_unmap_page(chan2parent(&dwc->chan), desc->lli.sar,
-                              desc->len, DMA_TO_DEVICE);
+       if (!dwc->chan.private) {
+               struct device *parent = chan2parent(&dwc->chan);
+               if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+                       if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+                               dma_unmap_single(parent, desc->lli.dar,
+                                               desc->len, DMA_FROM_DEVICE);
+                       else
+                               dma_unmap_page(parent, desc->lli.dar,
+                                               desc->len, DMA_FROM_DEVICE);
+               }
+               if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+                       if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+                               dma_unmap_single(parent, desc->lli.sar,
+                                               desc->len, DMA_TO_DEVICE);
+                       else
+                               dma_unmap_page(parent, desc->lli.sar,
+                                               desc->len, DMA_TO_DEVICE);
+               }
+       }
 
        /*
         * The API requires that no submissions are done from a
@@ -289,7 +298,7 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
                        /* This one is currently in progress */
                        return;
 
-               list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+               list_for_each_entry(child, &desc->tx_list, desc_node)
                        if (child->lli.llp == llp)
                                /* Currently in progress */
                                return;
@@ -356,7 +365,7 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
        dev_printk(KERN_CRIT, chan2dev(&dwc->chan),
                        "  cookie: %d\n", bad_desc->txd.cookie);
        dwc_dump_lli(dwc, &bad_desc->lli);
-       list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node)
+       list_for_each_entry(child, &bad_desc->tx_list, desc_node)
                dwc_dump_lli(dwc, &child->lli);
 
        /* Pretend the descriptor completed successfully */
@@ -608,7 +617,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
                                        prev->txd.phys, sizeof(prev->lli),
                                        DMA_TO_DEVICE);
                        list_add_tail(&desc->desc_node,
-                                       &first->txd.tx_list);
+                                       &first->tx_list);
                }
                prev = desc;
        }
@@ -658,8 +667,6 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
        reg_width = dws->reg_width;
        prev = first = NULL;
 
-       sg_len = dma_map_sg(chan2parent(chan), sgl, sg_len, direction);
-
        switch (direction) {
        case DMA_TO_DEVICE:
                ctllo = (DWC_DEFAULT_CTLLO
@@ -700,7 +707,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
                                                sizeof(prev->lli),
                                                DMA_TO_DEVICE);
                                list_add_tail(&desc->desc_node,
-                                               &first->txd.tx_list);
+                                               &first->tx_list);
                        }
                        prev = desc;
                        total_len += len;
@@ -746,7 +753,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
                                                sizeof(prev->lli),
                                                DMA_TO_DEVICE);
                                list_add_tail(&desc->desc_node,
-                                               &first->txd.tx_list);
+                                               &first->tx_list);
                        }
                        prev = desc;
                        total_len += len;
@@ -902,6 +909,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
                        break;
                }
 
+               INIT_LIST_HEAD(&desc->tx_list);
                dma_async_tx_descriptor_init(&desc->txd, chan);
                desc->txd.tx_submit = dwc_tx_submit;
                desc->txd.flags = DMA_CTRL_ACK;
index 13a5807..d9a939f 100644 (file)
@@ -217,6 +217,7 @@ struct dw_desc {
 
        /* THEN values for driver housekeeping */
        struct list_head                desc_node;
+       struct list_head                tx_list;
        struct dma_async_tx_descriptor  txd;
        size_t                          len;
 };
index ef87a89..296f9e7 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/dmapool.h>
 #include <linux/of_platform.h>
 
+#include <asm/fsldma.h>
 #include "fsldma.h"
 
 static void dma_init(struct fsl_dma_chan *fsl_chan)
@@ -280,28 +281,40 @@ static void fsl_chan_set_dest_loop_size(struct fsl_dma_chan *fsl_chan, int size)
 }
 
 /**
- * fsl_chan_toggle_ext_pause - Toggle channel external pause status
+ * fsl_chan_set_request_count - Set DMA Request Count for external control
  * @fsl_chan : Freescale DMA channel
- * @size     : Pause control size, 0 for disable external pause control.
- *             The maximum is 1024.
+ * @size     : Number of bytes to transfer in a single request
+ *
+ * The Freescale DMA channel can be controlled by the external signal DREQ#.
+ * The DMA request count is how many bytes are allowed to transfer before
+ * pausing the channel, after which a new assertion of DREQ# resumes channel
+ * operation.
  *
- * The Freescale DMA channel can be controlled by the external
- * signal DREQ#. The pause control size is how many bytes are allowed
- * to transfer before pausing the channel, after which a new assertion
- * of DREQ# resumes channel operation.
+ * A size of 0 disables external pause control. The maximum size is 1024.
  */
-static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int size)
+static void fsl_chan_set_request_count(struct fsl_dma_chan *fsl_chan, int size)
 {
-       if (size > 1024)
-               return;
+       BUG_ON(size > 1024);
+       DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+               DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
+                       | ((__ilog2(size) << 24) & 0x0f000000),
+               32);
+}
 
-       if (size) {
-               DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
-                       DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
-                               | ((__ilog2(size) << 24) & 0x0f000000),
-                       32);
+/**
+ * fsl_chan_toggle_ext_pause - Toggle channel external pause status
+ * @fsl_chan : Freescale DMA channel
+ * @enable   : 0 is disabled, 1 is enabled.
+ *
+ * The Freescale DMA channel can be controlled by the external signal DREQ#.
+ * The DMA Request Count feature should be used in addition to this feature
+ * to set the number of bytes to transfer before pausing the channel.
+ */
+static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int enable)
+{
+       if (enable)
                fsl_chan->feature |= FSL_DMA_CHAN_PAUSE_EXT;
-       else
+       else
                fsl_chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT;
 }
 
@@ -326,7 +339,8 @@ static void fsl_chan_toggle_ext_start(struct fsl_dma_chan *fsl_chan, int enable)
 static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
 {
        struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan);
-       struct fsl_desc_sw *desc;
+       struct fsl_desc_sw *desc = tx_to_fsl_desc(tx);
+       struct fsl_desc_sw *child;
        unsigned long flags;
        dma_cookie_t cookie;
 
@@ -334,7 +348,7 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
        spin_lock_irqsave(&fsl_chan->desc_lock, flags);
 
        cookie = fsl_chan->common.cookie;
-       list_for_each_entry(desc, &tx->tx_list, node) {
+       list_for_each_entry(child, &desc->tx_list, node) {
                cookie++;
                if (cookie < 0)
                        cookie = 1;
@@ -343,8 +357,8 @@ static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
        }
 
        fsl_chan->common.cookie = cookie;
-       append_ld_queue(fsl_chan, tx_to_fsl_desc(tx));
-       list_splice_init(&tx->tx_list, fsl_chan->ld_queue.prev);
+       append_ld_queue(fsl_chan, desc);
+       list_splice_init(&desc->tx_list, fsl_chan->ld_queue.prev);
 
        spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
 
@@ -366,6 +380,7 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(
        desc_sw = dma_pool_alloc(fsl_chan->desc_pool, GFP_ATOMIC, &pdesc);
        if (desc_sw) {
                memset(desc_sw, 0, sizeof(struct fsl_desc_sw));
+               INIT_LIST_HEAD(&desc_sw->tx_list);
                dma_async_tx_descriptor_init(&desc_sw->async_tx,
                                                &fsl_chan->common);
                desc_sw->async_tx.tx_submit = fsl_dma_tx_submit;
@@ -455,7 +470,7 @@ fsl_dma_prep_interrupt(struct dma_chan *chan, unsigned long flags)
        new->async_tx.flags = flags;
 
        /* Insert the link descriptor to the LD ring */
-       list_add_tail(&new->node, &new->async_tx.tx_list);
+       list_add_tail(&new->node, &new->tx_list);
 
        /* Set End-of-link to the last link descriptor of new list*/
        set_ld_eol(fsl_chan, new);
@@ -513,7 +528,7 @@ static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
                dma_dest += copy;
 
                /* Insert the link descriptor to the LD ring */
-               list_add_tail(&new->node, &first->async_tx.tx_list);
+               list_add_tail(&new->node, &first->tx_list);
        } while (len);
 
        new->async_tx.flags = flags; /* client is in control of this ack */
@@ -528,7 +543,7 @@ fail:
        if (!first)
                return NULL;
 
-       list = &first->async_tx.tx_list;
+       list = &first->tx_list;
        list_for_each_entry_safe_reverse(new, prev, list, node) {
                list_del(&new->node);
                dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys);
@@ -537,6 +552,229 @@ fail:
        return NULL;
 }
 
+/**
+ * fsl_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
+ * @chan: DMA channel
+ * @sgl: scatterlist to transfer to/from
+ * @sg_len: number of entries in @scatterlist
+ * @direction: DMA direction
+ * @flags: DMAEngine flags
+ *
+ * Prepare a set of descriptors for a DMA_SLAVE transaction. Following the
+ * DMA_SLAVE API, this gets the device-specific information from the
+ * chan->private variable.
+ */
+static struct dma_async_tx_descriptor *fsl_dma_prep_slave_sg(
+       struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
+       enum dma_data_direction direction, unsigned long flags)
+{
+       struct fsl_dma_chan *fsl_chan;
+       struct fsl_desc_sw *first = NULL, *prev = NULL, *new = NULL;
+       struct fsl_dma_slave *slave;
+       struct list_head *tx_list;
+       size_t copy;
+
+       int i;
+       struct scatterlist *sg;
+       size_t sg_used;
+       size_t hw_used;
+       struct fsl_dma_hw_addr *hw;
+       dma_addr_t dma_dst, dma_src;
+
+       if (!chan)
+               return NULL;
+
+       if (!chan->private)
+               return NULL;
+
+       fsl_chan = to_fsl_chan(chan);
+       slave = chan->private;
+
+       if (list_empty(&slave->addresses))
+               return NULL;
+
+       hw = list_first_entry(&slave->addresses, struct fsl_dma_hw_addr, entry);
+       hw_used = 0;
+
+       /*
+        * Build the hardware transaction to copy from the scatterlist to
+        * the hardware, or from the hardware to the scatterlist
+        *
+        * If you are copying from the hardware to the scatterlist and it
+        * takes two hardware entries to fill an entire page, then both
+        * hardware entries will be coalesced into the same page
+        *
+        * If you are copying from the scatterlist to the hardware and a
+        * single page can fill two hardware entries, then the data will
+        * be read out of the page into the first hardware entry, and so on
+        */
+       for_each_sg(sgl, sg, sg_len, i) {
+               sg_used = 0;
+
+               /* Loop until the entire scatterlist entry is used */
+               while (sg_used < sg_dma_len(sg)) {
+
+                       /*
+                        * If we've used up the current hardware address/length
+                        * pair, we need to load a new one
+                        *
+                        * This is done in a while loop so that descriptors with
+                        * length == 0 will be skipped
+                        */
+                       while (hw_used >= hw->length) {
+
+                               /*
+                                * If the current hardware entry is the last
+                                * entry in the list, we're finished
+                                */
+                               if (list_is_last(&hw->entry, &slave->addresses))
+                                       goto finished;
+
+                               /* Get the next hardware address/length pair */
+                               hw = list_entry(hw->entry.next,
+                                               struct fsl_dma_hw_addr, entry);
+                               hw_used = 0;
+                       }
+
+                       /* Allocate the link descriptor from DMA pool */
+                       new = fsl_dma_alloc_descriptor(fsl_chan);
+                       if (!new) {
+                               dev_err(fsl_chan->dev, "No free memory for "
+                                                      "link descriptor\n");
+                               goto fail;
+                       }
+#ifdef FSL_DMA_LD_DEBUG
+                       dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new);
+#endif
+
+                       /*
+                        * Calculate the maximum number of bytes to transfer,
+                        * making sure it is less than the DMA controller limit
+                        */
+                       copy = min_t(size_t, sg_dma_len(sg) - sg_used,
+                                            hw->length - hw_used);
+                       copy = min_t(size_t, copy, FSL_DMA_BCR_MAX_CNT);
+
+                       /*
+                        * DMA_FROM_DEVICE
+                        * from the hardware to the scatterlist
+                        *
+                        * DMA_TO_DEVICE
+                        * from the scatterlist to the hardware
+                        */
+                       if (direction == DMA_FROM_DEVICE) {
+                               dma_src = hw->address + hw_used;
+                               dma_dst = sg_dma_address(sg) + sg_used;
+                       } else {
+                               dma_src = sg_dma_address(sg) + sg_used;
+                               dma_dst = hw->address + hw_used;
+                       }
+
+                       /* Fill in the descriptor */
+                       set_desc_cnt(fsl_chan, &new->hw, copy);
+                       set_desc_src(fsl_chan, &new->hw, dma_src);
+                       set_desc_dest(fsl_chan, &new->hw, dma_dst);
+
+                       /*
+                        * If this is not the first descriptor, chain the
+                        * current descriptor after the previous descriptor
+                        */
+                       if (!first) {
+                               first = new;
+                       } else {
+                               set_desc_next(fsl_chan, &prev->hw,
+                                             new->async_tx.phys);
+                       }
+
+                       new->async_tx.cookie = 0;
+                       async_tx_ack(&new->async_tx);
+
+                       prev = new;
+                       sg_used += copy;
+                       hw_used += copy;
+
+                       /* Insert the link descriptor into the LD ring */
+                       list_add_tail(&new->node, &first->tx_list);
+               }
+       }
+
+finished:
+
+       /* All of the hardware address/length pairs had length == 0 */
+       if (!first || !new)
+               return NULL;
+
+       new->async_tx.flags = flags;
+       new->async_tx.cookie = -EBUSY;
+
+       /* Set End-of-link to the last link descriptor of new list */
+       set_ld_eol(fsl_chan, new);
+
+       /* Enable extra controller features */
+       if (fsl_chan->set_src_loop_size)
+               fsl_chan->set_src_loop_size(fsl_chan, slave->src_loop_size);
+
+       if (fsl_chan->set_dest_loop_size)
+               fsl_chan->set_dest_loop_size(fsl_chan, slave->dst_loop_size);
+
+       if (fsl_chan->toggle_ext_start)
+               fsl_chan->toggle_ext_start(fsl_chan, slave->external_start);
+
+       if (fsl_chan->toggle_ext_pause)
+               fsl_chan->toggle_ext_pause(fsl_chan, slave->external_pause);
+
+       if (fsl_chan->set_request_count)
+               fsl_chan->set_request_count(fsl_chan, slave->request_count);
+
+       return &first->async_tx;
+
+fail:
+       /* If first was not set, then we failed to allocate the very first
+        * descriptor, and we're done */
+       if (!first)
+               return NULL;
+
+       /*
+        * First is set, so all of the descriptors we allocated have been added
+        * to first->tx_list, INCLUDING "first" itself. Therefore we
+        * must traverse the list backwards freeing each descriptor in turn
+        *
+        * We're re-using variables for the loop, oh well
+        */
+       tx_list = &first->tx_list;
+       list_for_each_entry_safe_reverse(new, prev, tx_list, node) {
+               list_del_init(&new->node);
+               dma_pool_free(fsl_chan->desc_pool, new, new->async_tx.phys);
+       }
+
+       return NULL;
+}
+
+static void fsl_dma_device_terminate_all(struct dma_chan *chan)
+{
+       struct fsl_dma_chan *fsl_chan;
+       struct fsl_desc_sw *desc, *tmp;
+       unsigned long flags;
+
+       if (!chan)
+               return;
+
+       fsl_chan = to_fsl_chan(chan);
+
+       /* Halt the DMA engine */
+       dma_halt(fsl_chan);
+
+       spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+
+       /* Remove and free all of the descriptors in the LD queue */
+       list_for_each_entry_safe(desc, tmp, &fsl_chan->ld_queue, node) {
+               list_del(&desc->node);
+               dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys);
+       }
+
+       spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+}
+
 /**
  * fsl_dma_update_completed_cookie - Update the completed cookie.
  * @fsl_chan : Freescale DMA channel
@@ -883,6 +1121,7 @@ static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev,
                new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start;
                new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size;
                new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size;
+               new_fsl_chan->set_request_count = fsl_chan_set_request_count;
        }
 
        spin_lock_init(&new_fsl_chan->desc_lock);
@@ -962,12 +1201,15 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev,
 
        dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
        dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
+       dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
        fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
        fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
        fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt;
        fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy;
        fdev->common.device_is_tx_complete = fsl_dma_is_complete;
        fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
+       fdev->common.device_prep_slave_sg = fsl_dma_prep_slave_sg;
+       fdev->common.device_terminate_all = fsl_dma_device_terminate_all;
        fdev->common.dev = &dev->dev;
 
        fdev->irq = irq_of_parse_and_map(dev->node, 0);
index dc7f268..0df14cb 100644 (file)
@@ -90,6 +90,7 @@ struct fsl_dma_ld_hw {
 struct fsl_desc_sw {
        struct fsl_dma_ld_hw hw;
        struct list_head node;
+       struct list_head tx_list;
        struct dma_async_tx_descriptor async_tx;
        struct list_head *ld;
        void *priv;
@@ -143,10 +144,11 @@ struct fsl_dma_chan {
        struct tasklet_struct tasklet;
        u32 feature;
 
-       void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int size);
+       void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int enable);
        void (*toggle_ext_start)(struct fsl_dma_chan *fsl_chan, int enable);
        void (*set_src_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
        void (*set_dest_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
+       void (*set_request_count)(struct fsl_dma_chan *fsl_chan, int size);
 };
 
 #define to_fsl_chan(chan) container_of(chan, struct fsl_dma_chan, common)
diff --git a/drivers/dma/ioat.c b/drivers/dma/ioat.c
deleted file mode 100644 (file)
index 2225bb6..0000000
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Intel I/OAT DMA Linux driver
- * Copyright(c) 2007 - 2009 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- */
-
-/*
- * This driver supports an Intel I/OAT DMA engine, which does asynchronous
- * copy operations.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/dca.h>
-#include "ioatdma.h"
-#include "ioatdma_registers.h"
-#include "ioatdma_hw.h"
-
-MODULE_VERSION(IOAT_DMA_VERSION);
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Intel Corporation");
-
-static struct pci_device_id ioat_pci_tbl[] = {
-       /* I/OAT v1 platforms */
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB)  },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
-       { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
-
-       /* I/OAT v2 platforms */
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
-
-       /* I/OAT v3 platforms */
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
-       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
-       { 0, }
-};
-
-struct ioat_device {
-       struct pci_dev          *pdev;
-       void __iomem            *iobase;
-       struct ioatdma_device   *dma;
-       struct dca_provider     *dca;
-};
-
-static int __devinit ioat_probe(struct pci_dev *pdev,
-                               const struct pci_device_id *id);
-static void __devexit ioat_remove(struct pci_dev *pdev);
-
-static int ioat_dca_enabled = 1;
-module_param(ioat_dca_enabled, int, 0644);
-MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
-
-static struct pci_driver ioat_pci_driver = {
-       .name           = "ioatdma",
-       .id_table       = ioat_pci_tbl,
-       .probe          = ioat_probe,
-       .remove         = __devexit_p(ioat_remove),
-};
-
-static int __devinit ioat_probe(struct pci_dev *pdev,
-                               const struct pci_device_id *id)
-{
-       void __iomem *iobase;
-       struct ioat_device *device;
-       unsigned long mmio_start, mmio_len;
-       int err;
-
-       err = pci_enable_device(pdev);
-       if (err)
-               goto err_enable_device;
-
-       err = pci_request_regions(pdev, ioat_pci_driver.name);
-       if (err)
-               goto err_request_regions;
-
-       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
-       if (err)
-               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-       if (err)
-               goto err_set_dma_mask;
-
-       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-       if (err)
-               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-       if (err)
-               goto err_set_dma_mask;
-
-       mmio_start = pci_resource_start(pdev, 0);
-       mmio_len = pci_resource_len(pdev, 0);
-       iobase = ioremap(mmio_start, mmio_len);
-       if (!iobase) {
-               err = -ENOMEM;
-               goto err_ioremap;
-       }
-
-       device = kzalloc(sizeof(*device), GFP_KERNEL);
-       if (!device) {
-               err = -ENOMEM;
-               goto err_kzalloc;
-       }
-       device->pdev = pdev;
-       pci_set_drvdata(pdev, device);
-       device->iobase = iobase;
-
-       pci_set_master(pdev);
-
-       switch (readb(iobase + IOAT_VER_OFFSET)) {
-       case IOAT_VER_1_2:
-               device->dma = ioat_dma_probe(pdev, iobase);
-               if (device->dma && ioat_dca_enabled)
-                       device->dca = ioat_dca_init(pdev, iobase);
-               break;
-       case IOAT_VER_2_0:
-               device->dma = ioat_dma_probe(pdev, iobase);
-               if (device->dma && ioat_dca_enabled)
-                       device->dca = ioat2_dca_init(pdev, iobase);
-               break;
-       case IOAT_VER_3_0:
-               device->dma = ioat_dma_probe(pdev, iobase);
-               if (device->dma && ioat_dca_enabled)
-                       device->dca = ioat3_dca_init(pdev, iobase);
-               break;
-       default:
-               err = -ENODEV;
-               break;
-       }
-       if (!device->dma)
-               err = -ENODEV;
-
-       if (err)
-               goto err_version;
-
-       return 0;
-
-err_version:
-       kfree(device);
-err_kzalloc:
-       iounmap(iobase);
-err_ioremap:
-err_set_dma_mask:
-       pci_release_regions(pdev);
-       pci_disable_device(pdev);
-err_request_regions:
-err_enable_device:
-       return err;
-}
-
-static void __devexit ioat_remove(struct pci_dev *pdev)
-{
-       struct ioat_device *device = pci_get_drvdata(pdev);
-
-       dev_err(&pdev->dev, "Removing dma and dca services\n");
-       if (device->dca) {
-               unregister_dca_provider(device->dca);
-               free_dca_provider(device->dca);
-               device->dca = NULL;
-       }
-
-       if (device->dma) {
-               ioat_dma_remove(device->dma);
-               device->dma = NULL;
-       }
-
-       kfree(device);
-}
-
-static int __init ioat_init_module(void)
-{
-       return pci_register_driver(&ioat_pci_driver);
-}
-module_init(ioat_init_module);
-
-static void __exit ioat_exit_module(void)
-{
-       pci_unregister_driver(&ioat_pci_driver);
-}
-module_exit(ioat_exit_module);
diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile
new file mode 100644 (file)
index 0000000..8997d3f
--- /dev/null
@@ -0,0 +1,2 @@
+obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
+ioatdma-objs := pci.o dma.o dma_v2.o dma_v3.o dca.o
diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c
new file mode 100644 (file)
index 0000000..69d0261
--- /dev/null
@@ -0,0 +1,684 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2007 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/dca.h>
+
+/* either a kernel change is needed, or we need something like this in kernel */
+#ifndef CONFIG_SMP
+#include <asm/smp.h>
+#undef cpu_physical_id
+#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24)
+#endif
+
+#include "dma.h"
+#include "registers.h"
+
+/*
+ * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
+ * contain the bit number of the APIC ID to map into the DCA tag.  If the valid
+ * bit is not set, then the value must be 0 or 1 and defines the bit in the tag.
+ */
+#define DCA_TAG_MAP_VALID 0x80
+
+#define DCA3_TAG_MAP_BIT_TO_INV 0x80
+#define DCA3_TAG_MAP_BIT_TO_SEL 0x40
+#define DCA3_TAG_MAP_LITERAL_VAL 0x1
+
+#define DCA_TAG_MAP_MASK 0xDF
+
+/* expected tag map bytes for I/OAT ver.2 */
+#define DCA2_TAG_MAP_BYTE0 0x80
+#define DCA2_TAG_MAP_BYTE1 0x0
+#define DCA2_TAG_MAP_BYTE2 0x81
+#define DCA2_TAG_MAP_BYTE3 0x82
+#define DCA2_TAG_MAP_BYTE4 0x82
+
+/* verify if tag map matches expected values */
+static inline int dca2_tag_map_valid(u8 *tag_map)
+{
+       return ((tag_map[0] == DCA2_TAG_MAP_BYTE0) &&
+               (tag_map[1] == DCA2_TAG_MAP_BYTE1) &&
+               (tag_map[2] == DCA2_TAG_MAP_BYTE2) &&
+               (tag_map[3] == DCA2_TAG_MAP_BYTE3) &&
+               (tag_map[4] == DCA2_TAG_MAP_BYTE4));
+}
+
+/*
+ * "Legacy" DCA systems do not implement the DCA register set in the
+ * I/OAT device.  Software needs direct support for their tag mappings.
+ */
+
+#define APICID_BIT(x)          (DCA_TAG_MAP_VALID | (x))
+#define IOAT_TAG_MAP_LEN       8
+
+static u8 ioat_tag_map_BNB[IOAT_TAG_MAP_LEN] = {
+       1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
+static u8 ioat_tag_map_SCNB[IOAT_TAG_MAP_LEN] = {
+       1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
+static u8 ioat_tag_map_CNB[IOAT_TAG_MAP_LEN] = {
+       1, APICID_BIT(1), APICID_BIT(3), APICID_BIT(4), APICID_BIT(2), };
+static u8 ioat_tag_map_UNISYS[IOAT_TAG_MAP_LEN] = { 0 };
+
+/* pack PCI B/D/F into a u16 */
+static inline u16 dcaid_from_pcidev(struct pci_dev *pci)
+{
+       return (pci->bus->number << 8) | pci->devfn;
+}
+
+static int dca_enabled_in_bios(struct pci_dev *pdev)
+{
+       /* CPUID level 9 returns DCA configuration */
+       /* Bit 0 indicates DCA enabled by the BIOS */
+       unsigned long cpuid_level_9;
+       int res;
+
+       cpuid_level_9 = cpuid_eax(9);
+       res = test_bit(0, &cpuid_level_9);
+       if (!res)
+               dev_err(&pdev->dev, "DCA is disabled in BIOS\n");
+
+       return res;
+}
+
+static int system_has_dca_enabled(struct pci_dev *pdev)
+{
+       if (boot_cpu_has(X86_FEATURE_DCA))
+               return dca_enabled_in_bios(pdev);
+
+       dev_err(&pdev->dev, "boot cpu doesn't have X86_FEATURE_DCA\n");
+       return 0;
+}
+
+struct ioat_dca_slot {
+       struct pci_dev *pdev;   /* requester device */
+       u16 rid;                /* requester id, as used by IOAT */
+};
+
+#define IOAT_DCA_MAX_REQ 6
+#define IOAT3_DCA_MAX_REQ 2
+
+struct ioat_dca_priv {
+       void __iomem            *iobase;
+       void __iomem            *dca_base;
+       int                      max_requesters;
+       int                      requester_count;
+       u8                       tag_map[IOAT_TAG_MAP_LEN];
+       struct ioat_dca_slot     req_slots[0];
+};
+
+/* 5000 series chipset DCA Port Requester ID Table Entry Format
+ * [15:8]      PCI-Express Bus Number
+ * [7:3]       PCI-Express Device Number
+ * [2:0]       PCI-Express Function Number
+ *
+ * 5000 series chipset DCA control register format
+ * [7:1]       Reserved (0)
+ * [0]         Ignore Function Number
+ */
+
+static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       struct pci_dev *pdev;
+       int i;
+       u16 id;
+
+       /* This implementation only supports PCI-Express */
+       if (dev->bus != &pci_bus_type)
+               return -ENODEV;
+       pdev = to_pci_dev(dev);
+       id = dcaid_from_pcidev(pdev);
+
+       if (ioatdca->requester_count == ioatdca->max_requesters)
+               return -ENODEV;
+
+       for (i = 0; i < ioatdca->max_requesters; i++) {
+               if (ioatdca->req_slots[i].pdev == NULL) {
+                       /* found an empty slot */
+                       ioatdca->requester_count++;
+                       ioatdca->req_slots[i].pdev = pdev;
+                       ioatdca->req_slots[i].rid = id;
+                       writew(id, ioatdca->dca_base + (i * 4));
+                       /* make sure the ignore function bit is off */
+                       writeb(0, ioatdca->dca_base + (i * 4) + 2);
+                       return i;
+               }
+       }
+       /* Error, ioatdma->requester_count is out of whack */
+       return -EFAULT;
+}
+
+static int ioat_dca_remove_requester(struct dca_provider *dca,
+                                    struct device *dev)
+{
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       struct pci_dev *pdev;
+       int i;
+
+       /* This implementation only supports PCI-Express */
+       if (dev->bus != &pci_bus_type)
+               return -ENODEV;
+       pdev = to_pci_dev(dev);
+
+       for (i = 0; i < ioatdca->max_requesters; i++) {
+               if (ioatdca->req_slots[i].pdev == pdev) {
+                       writew(0, ioatdca->dca_base + (i * 4));
+                       ioatdca->req_slots[i].pdev = NULL;
+                       ioatdca->req_slots[i].rid = 0;
+                       ioatdca->requester_count--;
+                       return i;
+               }
+       }
+       return -ENODEV;
+}
+
+static u8 ioat_dca_get_tag(struct dca_provider *dca,
+                          struct device *dev,
+                          int cpu)
+{
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       int i, apic_id, bit, value;
+       u8 entry, tag;
+
+       tag = 0;
+       apic_id = cpu_physical_id(cpu);
+
+       for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
+               entry = ioatdca->tag_map[i];
+               if (entry & DCA_TAG_MAP_VALID) {
+                       bit = entry & ~DCA_TAG_MAP_VALID;
+                       value = (apic_id & (1 << bit)) ? 1 : 0;
+               } else {
+                       value = entry ? 1 : 0;
+               }
+               tag |= (value << i);
+       }
+       return tag;
+}
+
+static int ioat_dca_dev_managed(struct dca_provider *dca,
+                               struct device *dev)
+{
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       struct pci_dev *pdev;
+       int i;
+
+       pdev = to_pci_dev(dev);
+       for (i = 0; i < ioatdca->max_requesters; i++) {
+               if (ioatdca->req_slots[i].pdev == pdev)
+                       return 1;
+       }
+       return 0;
+}
+
+static struct dca_ops ioat_dca_ops = {
+       .add_requester          = ioat_dca_add_requester,
+       .remove_requester       = ioat_dca_remove_requester,
+       .get_tag                = ioat_dca_get_tag,
+       .dev_managed            = ioat_dca_dev_managed,
+};
+
+
+struct dca_provider * __devinit
+ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+       struct dca_provider *dca;
+       struct ioat_dca_priv *ioatdca;
+       u8 *tag_map = NULL;
+       int i;
+       int err;
+       u8 version;
+       u8 max_requesters;
+
+       if (!system_has_dca_enabled(pdev))
+               return NULL;
+
+       /* I/OAT v1 systems must have a known tag_map to support DCA */
+       switch (pdev->vendor) {
+       case PCI_VENDOR_ID_INTEL:
+               switch (pdev->device) {
+               case PCI_DEVICE_ID_INTEL_IOAT:
+                       tag_map = ioat_tag_map_BNB;
+                       break;
+               case PCI_DEVICE_ID_INTEL_IOAT_CNB:
+                       tag_map = ioat_tag_map_CNB;
+                       break;
+               case PCI_DEVICE_ID_INTEL_IOAT_SCNB:
+                       tag_map = ioat_tag_map_SCNB;
+                       break;
+               }
+               break;
+       case PCI_VENDOR_ID_UNISYS:
+               switch (pdev->device) {
+               case PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR:
+                       tag_map = ioat_tag_map_UNISYS;
+                       break;
+               }
+               break;
+       }
+       if (tag_map == NULL)
+               return NULL;
+
+       version = readb(iobase + IOAT_VER_OFFSET);
+       if (version == IOAT_VER_3_0)
+               max_requesters = IOAT3_DCA_MAX_REQ;
+       else
+               max_requesters = IOAT_DCA_MAX_REQ;
+
+       dca = alloc_dca_provider(&ioat_dca_ops,
+                       sizeof(*ioatdca) +
+                       (sizeof(struct ioat_dca_slot) * max_requesters));
+       if (!dca)
+               return NULL;
+
+       ioatdca = dca_priv(dca);
+       ioatdca->max_requesters = max_requesters;
+       ioatdca->dca_base = iobase + 0x54;
+
+       /* copy over the APIC ID to DCA tag mapping */
+       for (i = 0; i < IOAT_TAG_MAP_LEN; i++)
+               ioatdca->tag_map[i] = tag_map[i];
+
+       err = register_dca_provider(dca, &pdev->dev);
+       if (err) {
+               free_dca_provider(dca);
+               return NULL;
+       }
+
+       return dca;
+}
+
+
+static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       struct pci_dev *pdev;
+       int i;
+       u16 id;
+       u16 global_req_table;
+
+       /* This implementation only supports PCI-Express */
+       if (dev->bus != &pci_bus_type)
+               return -ENODEV;
+       pdev = to_pci_dev(dev);
+       id = dcaid_from_pcidev(pdev);
+
+       if (ioatdca->requester_count == ioatdca->max_requesters)
+               return -ENODEV;
+
+       for (i = 0; i < ioatdca->max_requesters; i++) {
+               if (ioatdca->req_slots[i].pdev == NULL) {
+                       /* found an empty slot */
+                       ioatdca->requester_count++;
+                       ioatdca->req_slots[i].pdev = pdev;
+                       ioatdca->req_slots[i].rid = id;
+                       global_req_table =
+                             readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
+                       writel(id | IOAT_DCA_GREQID_VALID,
+                              ioatdca->iobase + global_req_table + (i * 4));
+                       return i;
+               }
+       }
+       /* Error, ioatdma->requester_count is out of whack */
+       return -EFAULT;
+}
+
+static int ioat2_dca_remove_requester(struct dca_provider *dca,
+                                     struct device *dev)
+{
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       struct pci_dev *pdev;
+       int i;
+       u16 global_req_table;
+
+       /* This implementation only supports PCI-Express */
+       if (dev->bus != &pci_bus_type)
+               return -ENODEV;
+       pdev = to_pci_dev(dev);
+
+       for (i = 0; i < ioatdca->max_requesters; i++) {
+               if (ioatdca->req_slots[i].pdev == pdev) {
+                       global_req_table =
+                             readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
+                       writel(0, ioatdca->iobase + global_req_table + (i * 4));
+                       ioatdca->req_slots[i].pdev = NULL;
+                       ioatdca->req_slots[i].rid = 0;
+                       ioatdca->requester_count--;
+                       return i;
+               }
+       }
+       return -ENODEV;
+}
+
+static u8 ioat2_dca_get_tag(struct dca_provider *dca,
+                           struct device *dev,
+                           int cpu)
+{
+       u8 tag;
+
+       tag = ioat_dca_get_tag(dca, dev, cpu);
+       tag = (~tag) & 0x1F;
+       return tag;
+}
+
+static struct dca_ops ioat2_dca_ops = {
+       .add_requester          = ioat2_dca_add_requester,
+       .remove_requester       = ioat2_dca_remove_requester,
+       .get_tag                = ioat2_dca_get_tag,
+       .dev_managed            = ioat_dca_dev_managed,
+};
+
+static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset)
+{
+       int slots = 0;
+       u32 req;
+       u16 global_req_table;
+
+       global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET);
+       if (global_req_table == 0)
+               return 0;
+       do {
+               req = readl(iobase + global_req_table + (slots * sizeof(u32)));
+               slots++;
+       } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
+
+       return slots;
+}
+
+struct dca_provider * __devinit
+ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+       struct dca_provider *dca;
+       struct ioat_dca_priv *ioatdca;
+       int slots;
+       int i;
+       int err;
+       u32 tag_map;
+       u16 dca_offset;
+       u16 csi_fsb_control;
+       u16 pcie_control;
+       u8 bit;
+
+       if (!system_has_dca_enabled(pdev))
+               return NULL;
+
+       dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
+       if (dca_offset == 0)
+               return NULL;
+
+       slots = ioat2_dca_count_dca_slots(iobase, dca_offset);
+       if (slots == 0)
+               return NULL;
+
+       dca = alloc_dca_provider(&ioat2_dca_ops,
+                                sizeof(*ioatdca)
+                                     + (sizeof(struct ioat_dca_slot) * slots));
+       if (!dca)
+               return NULL;
+
+       ioatdca = dca_priv(dca);
+       ioatdca->iobase = iobase;
+       ioatdca->dca_base = iobase + dca_offset;
+       ioatdca->max_requesters = slots;
+
+       /* some bios might not know to turn these on */
+       csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
+       if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) {
+               csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH;
+               writew(csi_fsb_control,
+                      ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
+       }
+       pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
+       if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) {
+               pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR;
+               writew(pcie_control,
+                      ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
+       }
+
+
+       /* TODO version, compatibility and configuration checks */
+
+       /* copy out the APIC to DCA tag map */
+       tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET);
+       for (i = 0; i < 5; i++) {
+               bit = (tag_map >> (4 * i)) & 0x0f;
+               if (bit < 8)
+                       ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID;
+               else
+                       ioatdca->tag_map[i] = 0;
+       }
+
+       if (!dca2_tag_map_valid(ioatdca->tag_map)) {
+               dev_err(&pdev->dev, "APICID_TAG_MAP set incorrectly by BIOS, "
+                       "disabling DCA\n");
+               free_dca_provider(dca);
+               return NULL;
+       }
+
+       err = register_dca_provider(dca, &pdev->dev);
+       if (err) {
+               free_dca_provider(dca);
+               return NULL;
+       }
+
+       return dca;
+}
+
+static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       struct pci_dev *pdev;
+       int i;
+       u16 id;
+       u16 global_req_table;
+
+       /* This implementation only supports PCI-Express */
+       if (dev->bus != &pci_bus_type)
+               return -ENODEV;
+       pdev = to_pci_dev(dev);
+       id = dcaid_from_pcidev(pdev);
+
+       if (ioatdca->requester_count == ioatdca->max_requesters)
+               return -ENODEV;
+
+       for (i = 0; i < ioatdca->max_requesters; i++) {
+               if (ioatdca->req_slots[i].pdev == NULL) {
+                       /* found an empty slot */
+                       ioatdca->requester_count++;
+                       ioatdca->req_slots[i].pdev = pdev;
+                       ioatdca->req_slots[i].rid = id;
+                       global_req_table =
+                             readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
+                       writel(id | IOAT_DCA_GREQID_VALID,
+                              ioatdca->iobase + global_req_table + (i * 4));
+                       return i;
+               }
+       }
+       /* Error, ioatdma->requester_count is out of whack */
+       return -EFAULT;
+}
+
+static int ioat3_dca_remove_requester(struct dca_provider *dca,
+                                     struct device *dev)
+{
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       struct pci_dev *pdev;
+       int i;
+       u16 global_req_table;
+
+       /* This implementation only supports PCI-Express */
+       if (dev->bus != &pci_bus_type)
+               return -ENODEV;
+       pdev = to_pci_dev(dev);
+
+       for (i = 0; i < ioatdca->max_requesters; i++) {
+               if (ioatdca->req_slots[i].pdev == pdev) {
+                       global_req_table =
+                             readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
+                       writel(0, ioatdca->iobase + global_req_table + (i * 4));
+                       ioatdca->req_slots[i].pdev = NULL;
+                       ioatdca->req_slots[i].rid = 0;
+                       ioatdca->requester_count--;
+                       return i;
+               }
+       }
+       return -ENODEV;
+}
+
+static u8 ioat3_dca_get_tag(struct dca_provider *dca,
+                           struct device *dev,
+                           int cpu)
+{
+       u8 tag;
+
+       struct ioat_dca_priv *ioatdca = dca_priv(dca);
+       int i, apic_id, bit, value;
+       u8 entry;
+
+       tag = 0;
+       apic_id = cpu_physical_id(cpu);
+
+       for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
+               entry = ioatdca->tag_map[i];
+               if (entry & DCA3_TAG_MAP_BIT_TO_SEL) {
+                       bit = entry &
+                               ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV);
+                       value = (apic_id & (1 << bit)) ? 1 : 0;
+               } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) {
+                       bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV;
+                       value = (apic_id & (1 << bit)) ? 0 : 1;
+               } else {
+                       value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0;
+               }
+               tag |= (value << i);
+       }
+
+       return tag;
+}
+
+static struct dca_ops ioat3_dca_ops = {
+       .add_requester          = ioat3_dca_add_requester,
+       .remove_requester       = ioat3_dca_remove_requester,
+       .get_tag                = ioat3_dca_get_tag,
+       .dev_managed            = ioat_dca_dev_managed,
+};
+
+static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset)
+{
+       int slots = 0;
+       u32 req;
+       u16 global_req_table;
+
+       global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET);
+       if (global_req_table == 0)
+               return 0;
+
+       do {
+               req = readl(iobase + global_req_table + (slots * sizeof(u32)));
+               slots++;
+       } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
+
+       return slots;
+}
+
+struct dca_provider * __devinit
+ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+       struct dca_provider *dca;
+       struct ioat_dca_priv *ioatdca;
+       int slots;
+       int i;
+       int err;
+       u16 dca_offset;
+       u16 csi_fsb_control;
+       u16 pcie_control;
+       u8 bit;
+
+       union {
+               u64 full;
+               struct {
+                       u32 low;
+                       u32 high;
+               };
+       } tag_map;
+
+       if (!system_has_dca_enabled(pdev))
+               return NULL;
+
+       dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
+       if (dca_offset == 0)
+               return NULL;
+
+       slots = ioat3_dca_count_dca_slots(iobase, dca_offset);
+       if (slots == 0)
+               return NULL;
+
+       dca = alloc_dca_provider(&ioat3_dca_ops,
+                                sizeof(*ioatdca)
+                                     + (sizeof(struct ioat_dca_slot) * slots));
+       if (!dca)
+               return NULL;
+
+       ioatdca = dca_priv(dca);
+       ioatdca->iobase = iobase;
+       ioatdca->dca_base = iobase + dca_offset;
+       ioatdca->max_requesters = slots;
+
+       /* some bios might not know to turn these on */
+       csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
+       if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) {
+               csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH;
+               writew(csi_fsb_control,
+                      ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
+       }
+       pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
+       if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) {
+               pcie_control |= IOAT3_PCI_CONTROL_MEMWR;
+               writew(pcie_control,
+                      ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
+       }
+
+
+       /* TODO version, compatibility and configuration checks */
+
+       /* copy out the APIC to DCA tag map */
+       tag_map.low =
+               readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW);
+       tag_map.high =
+               readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH);
+       for (i = 0; i < 8; i++) {
+               bit = tag_map.full >> (8 * i);
+               ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK;
+       }
+
+       err = register_dca_provider(dca, &pdev->dev);
+       if (err) {
+               free_dca_provider(dca);
+               return NULL;
+       }
+
+       return dca;
+}
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
new file mode 100644 (file)
index 0000000..c524d36
--- /dev/null
@@ -0,0 +1,1238 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/i7300_idle.h>
+#include "dma.h"
+#include "registers.h"
+#include "hw.h"
+
+int ioat_pending_level = 4;
+module_param(ioat_pending_level, int, 0644);
+MODULE_PARM_DESC(ioat_pending_level,
+                "high-water mark for pushing ioat descriptors (default: 4)");
+
+/* internal functions */
+static void ioat1_cleanup(struct ioat_dma_chan *ioat);
+static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat);
+
+/**
+ * ioat_dma_do_interrupt - handler used for single vector interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
+{
+       struct ioatdma_device *instance = data;
+       struct ioat_chan_common *chan;
+       unsigned long attnstatus;
+       int bit;
+       u8 intrctrl;
+
+       intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
+
+       if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
+               return IRQ_NONE;
+
+       if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
+               writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+               return IRQ_NONE;
+       }
+
+       attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
+       for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
+               chan = ioat_chan_by_index(instance, bit);
+               tasklet_schedule(&chan->cleanup_task);
+       }
+
+       writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+       return IRQ_HANDLED;
+}
+
+/**
+ * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
+{
+       struct ioat_chan_common *chan = data;
+
+       tasklet_schedule(&chan->cleanup_task);
+
+       return IRQ_HANDLED;
+}
+
+static void ioat1_cleanup_tasklet(unsigned long data);
+
+/* common channel initialization */
+void ioat_init_channel(struct ioatdma_device *device,
+                      struct ioat_chan_common *chan, int idx,
+                      void (*timer_fn)(unsigned long),
+                      void (*tasklet)(unsigned long),
+                      unsigned long ioat)
+{
+       struct dma_device *dma = &device->common;
+
+       chan->device = device;
+       chan->reg_base = device->reg_base + (0x80 * (idx + 1));
+       spin_lock_init(&chan->cleanup_lock);
+       chan->common.device = dma;
+       list_add_tail(&chan->common.device_node, &dma->channels);
+       device->idx[idx] = chan;
+       init_timer(&chan->timer);
+       chan->timer.function = timer_fn;
+       chan->timer.data = ioat;
+       tasklet_init(&chan->cleanup_task, tasklet, ioat);
+       tasklet_disable(&chan->cleanup_task);
+}
+
+static void ioat1_timer_event(unsigned long data);
+
+/**
+ * ioat1_dma_enumerate_channels - find and initialize the device's channels
+ * @device: the device to be enumerated
+ */
+static int ioat1_enumerate_channels(struct ioatdma_device *device)
+{
+       u8 xfercap_scale;
+       u32 xfercap;
+       int i;
+       struct ioat_dma_chan *ioat;
+       struct device *dev = &device->pdev->dev;
+       struct dma_device *dma = &device->common;
+
+       INIT_LIST_HEAD(&dma->channels);
+       dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
+       dma->chancnt &= 0x1f; /* bits [4:0] valid */
+       if (dma->chancnt > ARRAY_SIZE(device->idx)) {
+               dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
+                        dma->chancnt, ARRAY_SIZE(device->idx));
+               dma->chancnt = ARRAY_SIZE(device->idx);
+       }
+       xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
+       xfercap_scale &= 0x1f; /* bits [4:0] valid */
+       xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
+       dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap);
+
+#ifdef  CONFIG_I7300_IDLE_IOAT_CHANNEL
+       if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
+               dma->chancnt--;
+#endif
+       for (i = 0; i < dma->chancnt; i++) {
+               ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
+               if (!ioat)
+                       break;
+
+               ioat_init_channel(device, &ioat->base, i,
+                                 ioat1_timer_event,
+                                 ioat1_cleanup_tasklet,
+                                 (unsigned long) ioat);
+               ioat->xfercap = xfercap;
+               spin_lock_init(&ioat->desc_lock);
+               INIT_LIST_HEAD(&ioat->free_desc);
+               INIT_LIST_HEAD(&ioat->used_desc);
+       }
+       dma->chancnt = i;
+       return i;
+}
+
+/**
+ * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
+ *                                 descriptors to hw
+ * @chan: DMA channel handle
+ */
+static inline void
+__ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat)
+{
+       void __iomem *reg_base = ioat->base.reg_base;
+
+       dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n",
+               __func__, ioat->pending);
+       ioat->pending = 0;
+       writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET);
+}
+
+static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
+{
+       struct ioat_dma_chan *ioat = to_ioat_chan(chan);
+
+       if (ioat->pending > 0) {
+               spin_lock_bh(&ioat->desc_lock);
+               __ioat1_dma_memcpy_issue_pending(ioat);
+               spin_unlock_bh(&ioat->desc_lock);
+       }
+}
+
+/**
+ * ioat1_reset_channel - restart a channel
+ * @ioat: IOAT DMA channel handle
+ */
+static void ioat1_reset_channel(struct ioat_dma_chan *ioat)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       void __iomem *reg_base = chan->reg_base;
+       u32 chansts, chanerr;
+
+       dev_warn(to_dev(chan), "reset\n");
+       chanerr = readl(reg_base + IOAT_CHANERR_OFFSET);
+       chansts = *chan->completion & IOAT_CHANSTS_STATUS;
+       if (chanerr) {
+               dev_err(to_dev(chan),
+                       "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
+                       chan_num(chan), chansts, chanerr);
+               writel(chanerr, reg_base + IOAT_CHANERR_OFFSET);
+       }
+
+       /*
+        * whack it upside the head with a reset
+        * and wait for things to settle out.
+        * force the pending count to a really big negative
+        * to make sure no one forces an issue_pending
+        * while we're waiting.
+        */
+
+       ioat->pending = INT_MIN;
+       writeb(IOAT_CHANCMD_RESET,
+              reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
+       set_bit(IOAT_RESET_PENDING, &chan->state);
+       mod_timer(&chan->timer, jiffies + RESET_DELAY);
+}
+
+static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+       struct dma_chan *c = tx->chan;
+       struct ioat_dma_chan *ioat = to_ioat_chan(c);
+       struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
+       struct ioat_chan_common *chan = &ioat->base;
+       struct ioat_desc_sw *first;
+       struct ioat_desc_sw *chain_tail;
+       dma_cookie_t cookie;
+
+       spin_lock_bh(&ioat->desc_lock);
+       /* cookie incr and addition to used_list must be atomic */
+       cookie = c->cookie;
+       cookie++;
+       if (cookie < 0)
+               cookie = 1;
+       c->cookie = cookie;
+       tx->cookie = cookie;
+       dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
+
+       /* write address into NextDescriptor field of last desc in chain */
+       first = to_ioat_desc(desc->tx_list.next);
+       chain_tail = to_ioat_desc(ioat->used_desc.prev);
+       /* make descriptor updates globally visible before chaining */
+       wmb();
+       chain_tail->hw->next = first->txd.phys;
+       list_splice_tail_init(&desc->tx_list, &ioat->used_desc);
+       dump_desc_dbg(ioat, chain_tail);
+       dump_desc_dbg(ioat, first);
+
+       if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
+               mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+       ioat->active += desc->hw->tx_cnt;
+       ioat->pending += desc->hw->tx_cnt;
+       if (ioat->pending >= ioat_pending_level)
+               __ioat1_dma_memcpy_issue_pending(ioat);
+       spin_unlock_bh(&ioat->desc_lock);
+
+       return cookie;
+}
+
+/**
+ * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
+ * @ioat: the channel supplying the memory pool for the descriptors
+ * @flags: allocation flags
+ */
+static struct ioat_desc_sw *
+ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags)
+{
+       struct ioat_dma_descriptor *desc;
+       struct ioat_desc_sw *desc_sw;
+       struct ioatdma_device *ioatdma_device;
+       dma_addr_t phys;
+
+       ioatdma_device = ioat->base.device;
+       desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
+       if (unlikely(!desc))
+               return NULL;
+
+       desc_sw = kzalloc(sizeof(*desc_sw), flags);
+       if (unlikely(!desc_sw)) {
+               pci_pool_free(ioatdma_device->dma_pool, desc, phys);
+               return NULL;
+       }
+
+       memset(desc, 0, sizeof(*desc));
+
+       INIT_LIST_HEAD(&desc_sw->tx_list);
+       dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common);
+       desc_sw->txd.tx_submit = ioat1_tx_submit;
+       desc_sw->hw = desc;
+       desc_sw->txd.phys = phys;
+       set_desc_id(desc_sw, -1);
+
+       return desc_sw;
+}
+
+static int ioat_initial_desc_count = 256;
+module_param(ioat_initial_desc_count, int, 0644);
+MODULE_PARM_DESC(ioat_initial_desc_count,
+                "ioat1: initial descriptors per channel (default: 256)");
+/**
+ * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors
+ * @chan: the channel to be filled out
+ */
+static int ioat1_dma_alloc_chan_resources(struct dma_chan *c)
+{
+       struct ioat_dma_chan *ioat = to_ioat_chan(c);
+       struct ioat_chan_common *chan = &ioat->base;
+       struct ioat_desc_sw *desc;
+       u32 chanerr;
+       int i;
+       LIST_HEAD(tmp_list);
+
+       /* have we already been set up? */
+       if (!list_empty(&ioat->free_desc))
+               return ioat->desccount;
+
+       /* Setup register to interrupt and write completion status on error */
+       writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
+
+       chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+       if (chanerr) {
+               dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
+               writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
+       }
+
+       /* Allocate descriptors */
+       for (i = 0; i < ioat_initial_desc_count; i++) {
+               desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL);
+               if (!desc) {
+                       dev_err(to_dev(chan), "Only %d initial descriptors\n", i);
+                       break;
+               }
+               set_desc_id(desc, i);
+               list_add_tail(&desc->node, &tmp_list);
+       }
+       spin_lock_bh(&ioat->desc_lock);
+       ioat->desccount = i;
+       list_splice(&tmp_list, &ioat->free_desc);
+       spin_unlock_bh(&ioat->desc_lock);
+
+       /* allocate a completion writeback area */
+       /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
+       chan->completion = pci_pool_alloc(chan->device->completion_pool,
+                                         GFP_KERNEL, &chan->completion_dma);
+       memset(chan->completion, 0, sizeof(*chan->completion));
+       writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
+              chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+       writel(((u64) chan->completion_dma) >> 32,
+              chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+       tasklet_enable(&chan->cleanup_task);
+       ioat1_dma_start_null_desc(ioat);  /* give chain to dma device */
+       dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
+               __func__, ioat->desccount);
+       return ioat->desccount;
+}
+
+/**
+ * ioat1_dma_free_chan_resources - release all the descriptors
+ * @chan: the channel to be cleaned
+ */
+static void ioat1_dma_free_chan_resources(struct dma_chan *c)
+{
+       struct ioat_dma_chan *ioat = to_ioat_chan(c);
+       struct ioat_chan_common *chan = &ioat->base;
+       struct ioatdma_device *ioatdma_device = chan->device;
+       struct ioat_desc_sw *desc, *_desc;
+       int in_use_descs = 0;
+
+       /* Before freeing channel resources first check
+        * if they have been previously allocated for this channel.
+        */
+       if (ioat->desccount == 0)
+               return;
+
+       tasklet_disable(&chan->cleanup_task);
+       del_timer_sync(&chan->timer);
+       ioat1_cleanup(ioat);
+
+       /* Delay 100ms after reset to allow internal DMA logic to quiesce
+        * before removing DMA descriptor resources.
+        */
+       writeb(IOAT_CHANCMD_RESET,
+              chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
+       mdelay(100);
+
+       spin_lock_bh(&ioat->desc_lock);
+       list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) {
+               dev_dbg(to_dev(chan), "%s: freeing %d from used list\n",
+                       __func__, desc_id(desc));
+               dump_desc_dbg(ioat, desc);
+               in_use_descs++;
+               list_del(&desc->node);
+               pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+                             desc->txd.phys);
+               kfree(desc);
+       }
+       list_for_each_entry_safe(desc, _desc,
+                                &ioat->free_desc, node) {
+               list_del(&desc->node);
+               pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+                             desc->txd.phys);
+               kfree(desc);
+       }
+       spin_unlock_bh(&ioat->desc_lock);
+
+       pci_pool_free(ioatdma_device->completion_pool,
+                     chan->completion,
+                     chan->completion_dma);
+
+       /* one is ok since we left it on there on purpose */
+       if (in_use_descs > 1)
+               dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
+                       in_use_descs - 1);
+
+       chan->last_completion = 0;
+       chan->completion_dma = 0;
+       ioat->pending = 0;
+       ioat->desccount = 0;
+}
+
+/**
+ * ioat1_dma_get_next_descriptor - return the next available descriptor
+ * @ioat: IOAT DMA channel handle
+ *
+ * Gets the next descriptor from the chain, and must be called with the
+ * channel's desc_lock held.  Allocates more descriptors if the channel
+ * has run out.
+ */
+static struct ioat_desc_sw *
+ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat)
+{
+       struct ioat_desc_sw *new;
+
+       if (!list_empty(&ioat->free_desc)) {
+               new = to_ioat_desc(ioat->free_desc.next);
+               list_del(&new->node);
+       } else {
+               /* try to get another desc */
+               new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC);
+               if (!new) {
+                       dev_err(to_dev(&ioat->base), "alloc failed\n");
+                       return NULL;
+               }
+       }
+       dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n",
+               __func__, desc_id(new));
+       prefetch(new->hw);
+       return new;
+}
+
+static struct dma_async_tx_descriptor *
+ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest,
+                     dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+       struct ioat_dma_chan *ioat = to_ioat_chan(c);
+       struct ioat_desc_sw *desc;
+       size_t copy;
+       LIST_HEAD(chain);
+       dma_addr_t src = dma_src;
+       dma_addr_t dest = dma_dest;
+       size_t total_len = len;
+       struct ioat_dma_descriptor *hw = NULL;
+       int tx_cnt = 0;
+
+       spin_lock_bh(&ioat->desc_lock);
+       desc = ioat1_dma_get_next_descriptor(ioat);
+       do {
+               if (!desc)
+                       break;
+
+               tx_cnt++;
+               copy = min_t(size_t, len, ioat->xfercap);
+
+               hw = desc->hw;
+               hw->size = copy;
+               hw->ctl = 0;
+               hw->src_addr = src;
+               hw->dst_addr = dest;
+
+               list_add_tail(&desc->node, &chain);
+
+               len -= copy;
+               dest += copy;
+               src += copy;
+               if (len) {
+                       struct ioat_desc_sw *next;
+
+                       async_tx_ack(&desc->txd);
+                       next = ioat1_dma_get_next_descriptor(ioat);
+                       hw->next = next ? next->txd.phys : 0;
+                       dump_desc_dbg(ioat, desc);
+                       desc = next;
+               } else
+                       hw->next = 0;
+       } while (len);
+
+       if (!desc) {
+               struct ioat_chan_common *chan = &ioat->base;
+
+               dev_err(to_dev(chan),
+                       "chan%d - get_next_desc failed\n", chan_num(chan));
+               list_splice(&chain, &ioat->free_desc);
+               spin_unlock_bh(&ioat->desc_lock);
+               return NULL;
+       }
+       spin_unlock_bh(&ioat->desc_lock);
+
+       desc->txd.flags = flags;
+       desc->len = total_len;
+       list_splice(&chain, &desc->tx_list);
+       hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+       hw->ctl_f.compl_write = 1;
+       hw->tx_cnt = tx_cnt;
+       dump_desc_dbg(ioat, desc);
+
+       return &desc->txd;
+}
+
+static void ioat1_cleanup_tasklet(unsigned long data)
+{
+       struct ioat_dma_chan *chan = (void *)data;
+
+       ioat1_cleanup(chan);
+       writew(IOAT_CHANCTRL_RUN, chan->base.reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
+                   size_t len, struct ioat_dma_descriptor *hw)
+{
+       struct pci_dev *pdev = chan->device->pdev;
+       size_t offset = len - hw->size;
+
+       if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
+               ioat_unmap(pdev, hw->dst_addr - offset, len,
+                          PCI_DMA_FROMDEVICE, flags, 1);
+
+       if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP))
+               ioat_unmap(pdev, hw->src_addr - offset, len,
+                          PCI_DMA_TODEVICE, flags, 0);
+}
+
+unsigned long ioat_get_current_completion(struct ioat_chan_common *chan)
+{
+       unsigned long phys_complete;
+       u64 completion;
+
+       completion = *chan->completion;
+       phys_complete = ioat_chansts_to_addr(completion);
+
+       dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__,
+               (unsigned long long) phys_complete);
+
+       if (is_ioat_halted(completion)) {
+               u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+               dev_err(to_dev(chan), "Channel halted, chanerr = %x\n",
+                       chanerr);
+
+               /* TODO do something to salvage the situation */
+       }
+
+       return phys_complete;
+}
+
+bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
+                          unsigned long *phys_complete)
+{
+       *phys_complete = ioat_get_current_completion(chan);
+       if (*phys_complete == chan->last_completion)
+               return false;
+       clear_bit(IOAT_COMPLETION_ACK, &chan->state);
+       mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+       return true;
+}
+
+static void __cleanup(struct ioat_dma_chan *ioat, unsigned long phys_complete)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       struct list_head *_desc, *n;
+       struct dma_async_tx_descriptor *tx;
+
+       dev_dbg(to_dev(chan), "%s: phys_complete: %lx\n",
+                __func__, phys_complete);
+       list_for_each_safe(_desc, n, &ioat->used_desc) {
+               struct ioat_desc_sw *desc;
+
+               prefetch(n);
+               desc = list_entry(_desc, typeof(*desc), node);
+               tx = &desc->txd;
+               /*
+                * Incoming DMA requests may use multiple descriptors,
+                * due to exceeding xfercap, perhaps. If so, only the
+                * last one will have a cookie, and require unmapping.
+                */
+               dump_desc_dbg(ioat, desc);
+               if (tx->cookie) {
+                       chan->completed_cookie = tx->cookie;
+                       tx->cookie = 0;
+                       ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+                       ioat->active -= desc->hw->tx_cnt;
+                       if (tx->callback) {
+                               tx->callback(tx->callback_param);
+                               tx->callback = NULL;
+                       }
+               }
+
+               if (tx->phys != phys_complete) {
+                       /*
+                        * a completed entry, but not the last, so clean
+                        * up if the client is done with the descriptor
+                        */
+                       if (async_tx_test_ack(tx))
+                               list_move_tail(&desc->node, &ioat->free_desc);
+               } else {
+                       /*
+                        * last used desc. Do not remove, so we can
+                        * append from it.
+                        */
+
+                       /* if nothing else is pending, cancel the
+                        * completion timeout
+                        */
+                       if (n == &ioat->used_desc) {
+                               dev_dbg(to_dev(chan),
+                                       "%s cancel completion timeout\n",
+                                       __func__);
+                               clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
+                       }
+
+                       /* TODO check status bits? */
+                       break;
+               }
+       }
+
+       chan->last_completion = phys_complete;
+}
+
+/**
+ * ioat1_cleanup - cleanup up finished descriptors
+ * @chan: ioat channel to be cleaned up
+ *
+ * To prevent lock contention we defer cleanup when the locks are
+ * contended with a terminal timeout that forces cleanup and catches
+ * completion notification errors.
+ */
+static void ioat1_cleanup(struct ioat_dma_chan *ioat)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       unsigned long phys_complete;
+
+       prefetch(chan->completion);
+
+       if (!spin_trylock_bh(&chan->cleanup_lock))
+               return;
+
+       if (!ioat_cleanup_preamble(chan, &phys_complete)) {
+               spin_unlock_bh(&chan->cleanup_lock);
+               return;
+       }
+
+       if (!spin_trylock_bh(&ioat->desc_lock)) {
+               spin_unlock_bh(&chan->cleanup_lock);
+               return;
+       }
+
+       __cleanup(ioat, phys_complete);
+
+       spin_unlock_bh(&ioat->desc_lock);
+       spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static void ioat1_timer_event(unsigned long data)
+{
+       struct ioat_dma_chan *ioat = (void *) data;
+       struct ioat_chan_common *chan = &ioat->base;
+
+       dev_dbg(to_dev(chan), "%s: state: %lx\n", __func__, chan->state);
+
+       spin_lock_bh(&chan->cleanup_lock);
+       if (test_and_clear_bit(IOAT_RESET_PENDING, &chan->state)) {
+               struct ioat_desc_sw *desc;
+
+               spin_lock_bh(&ioat->desc_lock);
+
+               /* restart active descriptors */
+               desc = to_ioat_desc(ioat->used_desc.prev);
+               ioat_set_chainaddr(ioat, desc->txd.phys);
+               ioat_start(chan);
+
+               ioat->pending = 0;
+               set_bit(IOAT_COMPLETION_PENDING, &chan->state);
+               mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+               spin_unlock_bh(&ioat->desc_lock);
+       } else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
+               unsigned long phys_complete;
+
+               spin_lock_bh(&ioat->desc_lock);
+               /* if we haven't made progress and we have already
+                * acknowledged a pending completion once, then be more
+                * forceful with a restart
+                */
+               if (ioat_cleanup_preamble(chan, &phys_complete))
+                       __cleanup(ioat, phys_complete);
+               else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
+                       ioat1_reset_channel(ioat);
+               else {
+                       u64 status = ioat_chansts(chan);
+
+                       /* manually update the last completion address */
+                       if (ioat_chansts_to_addr(status) != 0)
+                               *chan->completion = status;
+
+                       set_bit(IOAT_COMPLETION_ACK, &chan->state);
+                       mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+               }
+               spin_unlock_bh(&ioat->desc_lock);
+       }
+       spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static enum dma_status
+ioat1_dma_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+                     dma_cookie_t *done, dma_cookie_t *used)
+{
+       struct ioat_dma_chan *ioat = to_ioat_chan(c);
+
+       if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
+               return DMA_SUCCESS;
+
+       ioat1_cleanup(ioat);
+
+       return ioat_is_complete(c, cookie, done, used);
+}
+
+static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       struct ioat_desc_sw *desc;
+       struct ioat_dma_descriptor *hw;
+
+       spin_lock_bh(&ioat->desc_lock);
+
+       desc = ioat1_dma_get_next_descriptor(ioat);
+
+       if (!desc) {
+               dev_err(to_dev(chan),
+                       "Unable to start null desc - get next desc failed\n");
+               spin_unlock_bh(&ioat->desc_lock);
+               return;
+       }
+
+       hw = desc->hw;
+       hw->ctl = 0;
+       hw->ctl_f.null = 1;
+       hw->ctl_f.int_en = 1;
+       hw->ctl_f.compl_write = 1;
+       /* set size to non-zero value (channel returns error when size is 0) */
+       hw->size = NULL_DESC_BUFFER_SIZE;
+       hw->src_addr = 0;
+       hw->dst_addr = 0;
+       async_tx_ack(&desc->txd);
+       hw->next = 0;
+       list_add_tail(&desc->node, &ioat->used_desc);
+       dump_desc_dbg(ioat, desc);
+
+       ioat_set_chainaddr(ioat, desc->txd.phys);
+       ioat_start(chan);
+       spin_unlock_bh(&ioat->desc_lock);
+}
+
+/*
+ * Perform a IOAT transaction to verify the HW works.
+ */
+#define IOAT_TEST_SIZE 2000
+
+static void __devinit ioat_dma_test_callback(void *dma_async_param)
+{
+       struct completion *cmp = dma_async_param;
+
+       complete(cmp);
+}
+
+/**
+ * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
+ * @device: device to be tested
+ */
+int __devinit ioat_dma_self_test(struct ioatdma_device *device)
+{
+       int i;
+       u8 *src;
+       u8 *dest;
+       struct dma_device *dma = &device->common;
+       struct device *dev = &device->pdev->dev;
+       struct dma_chan *dma_chan;
+       struct dma_async_tx_descriptor *tx;
+       dma_addr_t dma_dest, dma_src;
+       dma_cookie_t cookie;
+       int err = 0;
+       struct completion cmp;
+       unsigned long tmo;
+       unsigned long flags;
+
+       src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
+       if (!src)
+               return -ENOMEM;
+       dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
+       if (!dest) {
+               kfree(src);
+               return -ENOMEM;
+       }
+
+       /* Fill in src buffer */
+       for (i = 0; i < IOAT_TEST_SIZE; i++)
+               src[i] = (u8)i;
+
+       /* Start copy, using first DMA channel */
+       dma_chan = container_of(dma->channels.next, struct dma_chan,
+                               device_node);
+       if (dma->device_alloc_chan_resources(dma_chan) < 1) {
+               dev_err(dev, "selftest cannot allocate chan resource\n");
+               err = -ENODEV;
+               goto out;
+       }
+
+       dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
+       dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
+       flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE |
+               DMA_PREP_INTERRUPT;
+       tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
+                                                  IOAT_TEST_SIZE, flags);
+       if (!tx) {
+               dev_err(dev, "Self-test prep failed, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       async_tx_ack(tx);
+       init_completion(&cmp);
+       tx->callback = ioat_dma_test_callback;
+       tx->callback_param = &cmp;
+       cookie = tx->tx_submit(tx);
+       if (cookie < 0) {
+               dev_err(dev, "Self-test setup failed, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+       dma->device_issue_pending(dma_chan);
+
+       tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+       if (tmo == 0 ||
+           dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL)
+                                       != DMA_SUCCESS) {
+               dev_err(dev, "Self-test copy timed out, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+       if (memcmp(src, dest, IOAT_TEST_SIZE)) {
+               dev_err(dev, "Self-test copy failed compare, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+free_resources:
+       dma->device_free_chan_resources(dma_chan);
+out:
+       kfree(src);
+       kfree(dest);
+       return err;
+}
+
+static char ioat_interrupt_style[32] = "msix";
+module_param_string(ioat_interrupt_style, ioat_interrupt_style,
+                   sizeof(ioat_interrupt_style), 0644);
+MODULE_PARM_DESC(ioat_interrupt_style,
+                "set ioat interrupt style: msix (default), "
+                "msix-single-vector, msi, intx)");
+
+/**
+ * ioat_dma_setup_interrupts - setup interrupt handler
+ * @device: ioat device
+ */
+static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
+{
+       struct ioat_chan_common *chan;
+       struct pci_dev *pdev = device->pdev;
+       struct device *dev = &pdev->dev;
+       struct msix_entry *msix;
+       int i, j, msixcnt;
+       int err = -EINVAL;
+       u8 intrctrl = 0;
+
+       if (!strcmp(ioat_interrupt_style, "msix"))
+               goto msix;
+       if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
+               goto msix_single_vector;
+       if (!strcmp(ioat_interrupt_style, "msi"))
+               goto msi;
+       if (!strcmp(ioat_interrupt_style, "intx"))
+               goto intx;
+       dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style);
+       goto err_no_irq;
+
+msix:
+       /* The number of MSI-X vectors should equal the number of channels */
+       msixcnt = device->common.chancnt;
+       for (i = 0; i < msixcnt; i++)
+               device->msix_entries[i].entry = i;
+
+       err = pci_enable_msix(pdev, device->msix_entries, msixcnt);
+       if (err < 0)
+               goto msi;
+       if (err > 0)
+               goto msix_single_vector;
+
+       for (i = 0; i < msixcnt; i++) {
+               msix = &device->msix_entries[i];
+               chan = ioat_chan_by_index(device, i);
+               err = devm_request_irq(dev, msix->vector,
+                                      ioat_dma_do_interrupt_msix, 0,
+                                      "ioat-msix", chan);
+               if (err) {
+                       for (j = 0; j < i; j++) {
+                               msix = &device->msix_entries[j];
+                               chan = ioat_chan_by_index(device, j);
+                               devm_free_irq(dev, msix->vector, chan);
+                       }
+                       goto msix_single_vector;
+               }
+       }
+       intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
+       goto done;
+
+msix_single_vector:
+       msix = &device->msix_entries[0];
+       msix->entry = 0;
+       err = pci_enable_msix(pdev, device->msix_entries, 1);
+       if (err)
+               goto msi;
+
+       err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0,
+                              "ioat-msix", device);
+       if (err) {
+               pci_disable_msix(pdev);
+               goto msi;
+       }
+       goto done;
+
+msi:
+       err = pci_enable_msi(pdev);
+       if (err)
+               goto intx;
+
+       err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0,
+                              "ioat-msi", device);
+       if (err) {
+               pci_disable_msi(pdev);
+               goto intx;
+       }
+       goto done;
+
+intx:
+       err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt,
+                              IRQF_SHARED, "ioat-intx", device);
+       if (err)
+               goto err_no_irq;
+
+done:
+       if (device->intr_quirk)
+               device->intr_quirk(device);
+       intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
+       writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
+       return 0;
+
+err_no_irq:
+       /* Disable all interrupt generation */
+       writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
+       dev_err(dev, "no usable interrupts\n");
+       return err;
+}
+
+static void ioat_disable_interrupts(struct ioatdma_device *device)
+{
+       /* Disable all interrupt generation */
+       writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
+}
+
+int __devinit ioat_probe(struct ioatdma_device *device)
+{
+       int err = -ENODEV;
+       struct dma_device *dma = &device->common;
+       struct pci_dev *pdev = device->pdev;
+       struct device *dev = &pdev->dev;
+
+       /* DMA coherent memory pool for DMA descriptor allocations */
+       device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
+                                          sizeof(struct ioat_dma_descriptor),
+                                          64, 0);
+       if (!device->dma_pool) {
+               err = -ENOMEM;
+               goto err_dma_pool;
+       }
+
+       device->completion_pool = pci_pool_create("completion_pool", pdev,
+                                                 sizeof(u64), SMP_CACHE_BYTES,
+                                                 SMP_CACHE_BYTES);
+
+       if (!device->completion_pool) {
+               err = -ENOMEM;
+               goto err_completion_pool;
+       }
+
+       device->enumerate_channels(device);
+
+       dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+       dma->dev = &pdev->dev;
+
+       if (!dma->chancnt) {
+               dev_err(dev, "zero channels detected\n");
+               goto err_setup_interrupts;
+       }
+
+       err = ioat_dma_setup_interrupts(device);
+       if (err)
+               goto err_setup_interrupts;
+
+       err = device->self_test(device);
+       if (err)
+               goto err_self_test;
+
+       return 0;
+
+err_self_test:
+       ioat_disable_interrupts(device);
+err_setup_interrupts:
+       pci_pool_destroy(device->completion_pool);
+err_completion_pool:
+       pci_pool_destroy(device->dma_pool);
+err_dma_pool:
+       return err;
+}
+
+int __devinit ioat_register(struct ioatdma_device *device)
+{
+       int err = dma_async_device_register(&device->common);
+
+       if (err) {
+               ioat_disable_interrupts(device);
+               pci_pool_destroy(device->completion_pool);
+               pci_pool_destroy(device->dma_pool);
+       }
+
+       return err;
+}
+
+/* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */
+static void ioat1_intr_quirk(struct ioatdma_device *device)
+{
+       struct pci_dev *pdev = device->pdev;
+       u32 dmactrl;
+
+       pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
+       if (pdev->msi_enabled)
+               dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
+       else
+               dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN;
+       pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl);
+}
+
+static ssize_t ring_size_show(struct dma_chan *c, char *page)
+{
+       struct ioat_dma_chan *ioat = to_ioat_chan(c);
+
+       return sprintf(page, "%d\n", ioat->desccount);
+}
+static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
+
+static ssize_t ring_active_show(struct dma_chan *c, char *page)
+{
+       struct ioat_dma_chan *ioat = to_ioat_chan(c);
+
+       return sprintf(page, "%d\n", ioat->active);
+}
+static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
+
+static ssize_t cap_show(struct dma_chan *c, char *page)
+{
+       struct dma_device *dma = c->device;
+
+       return sprintf(page, "copy%s%s%s%s%s%s\n",
+                      dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "",
+                      dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "",
+                      dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "",
+                      dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "",
+                      dma_has_cap(DMA_MEMSET, dma->cap_mask)  ? " fill" : "",
+                      dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : "");
+
+}
+struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap);
+
+static ssize_t version_show(struct dma_chan *c, char *page)
+{
+       struct dma_device *dma = c->device;
+       struct ioatdma_device *device = to_ioatdma_device(dma);
+
+       return sprintf(page, "%d.%d\n",
+                      device->version >> 4, device->version & 0xf);
+}
+struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version);
+
+static struct attribute *ioat1_attrs[] = {
+       &ring_size_attr.attr,
+       &ring_active_attr.attr,
+       &ioat_cap_attr.attr,
+       &ioat_version_attr.attr,
+       NULL,
+};
+
+static ssize_t
+ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+       struct ioat_sysfs_entry *entry;
+       struct ioat_chan_common *chan;
+
+       entry = container_of(attr, struct ioat_sysfs_entry, attr);
+       chan = container_of(kobj, struct ioat_chan_common, kobj);
+
+       if (!entry->show)
+               return -EIO;
+       return entry->show(&chan->common, page);
+}
+
+struct sysfs_ops ioat_sysfs_ops = {
+       .show   = ioat_attr_show,
+};
+
+static struct kobj_type ioat1_ktype = {
+       .sysfs_ops = &ioat_sysfs_ops,
+       .default_attrs = ioat1_attrs,
+};
+
+void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type)
+{
+       struct dma_device *dma = &device->common;
+       struct dma_chan *c;
+
+       list_for_each_entry(c, &dma->channels, device_node) {
+               struct ioat_chan_common *chan = to_chan_common(c);
+               struct kobject *parent = &c->dev->device.kobj;
+               int err;
+
+               err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata");
+               if (err) {
+                       dev_warn(to_dev(chan),
+                                "sysfs init error (%d), continuing...\n", err);
+                       kobject_put(&chan->kobj);
+                       set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state);
+               }
+       }
+}
+
+void ioat_kobject_del(struct ioatdma_device *device)
+{
+       struct dma_device *dma = &device->common;
+       struct dma_chan *c;
+
+       list_for_each_entry(c, &dma->channels, device_node) {
+               struct ioat_chan_common *chan = to_chan_common(c);
+
+               if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) {
+                       kobject_del(&chan->kobj);
+                       kobject_put(&chan->kobj);
+               }
+       }
+}
+
+int __devinit ioat1_dma_probe(struct ioatdma_device *device, int dca)
+{
+       struct pci_dev *pdev = device->pdev;
+       struct dma_device *dma;
+       int err;
+
+       device->intr_quirk = ioat1_intr_quirk;
+       device->enumerate_channels = ioat1_enumerate_channels;
+       device->self_test = ioat_dma_self_test;
+       dma = &device->common;
+       dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
+       dma->device_issue_pending = ioat1_dma_memcpy_issue_pending;
+       dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources;
+       dma->device_free_chan_resources = ioat1_dma_free_chan_resources;
+       dma->device_is_tx_complete = ioat1_dma_is_complete;
+
+       err = ioat_probe(device);
+       if (err)
+               return err;
+       ioat_set_tcp_copy_break(4096);
+       err = ioat_register(device);
+       if (err)
+               return err;
+       ioat_kobject_add(device, &ioat1_ktype);
+
+       if (dca)
+               device->dca = ioat_dca_init(pdev, device->reg_base);
+
+       return err;
+}
+
+void __devexit ioat_dma_remove(struct ioatdma_device *device)
+{
+       struct dma_device *dma = &device->common;
+
+       ioat_disable_interrupts(device);
+
+       ioat_kobject_del(device);
+
+       dma_async_device_unregister(dma);
+
+       pci_pool_destroy(device->dma_pool);
+       pci_pool_destroy(device->completion_pool);
+
+       INIT_LIST_HEAD(&dma->channels);
+}
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
new file mode 100644 (file)
index 0000000..c14fdfe
--- /dev/null
@@ -0,0 +1,337 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef IOATDMA_H
+#define IOATDMA_H
+
+#include <linux/dmaengine.h>
+#include "hw.h"
+#include "registers.h"
+#include <linux/init.h>
+#include <linux/dmapool.h>
+#include <linux/cache.h>
+#include <linux/pci_ids.h>
+#include <net/tcp.h>
+
+#define IOAT_DMA_VERSION  "4.00"
+
+#define IOAT_LOW_COMPLETION_MASK       0xffffffc0
+#define IOAT_DMA_DCA_ANY_CPU           ~0
+
+#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
+#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
+#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd)
+#define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev)
+
+#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
+
+/*
+ * workaround for IOAT ver.3.0 null descriptor issue
+ * (channel returns error when size is 0)
+ */
+#define NULL_DESC_BUFFER_SIZE 1
+
+/**
+ * struct ioatdma_device - internal representation of a IOAT device
+ * @pdev: PCI-Express device
+ * @reg_base: MMIO register space base address
+ * @dma_pool: for allocating DMA descriptors
+ * @common: embedded struct dma_device
+ * @version: version of ioatdma device
+ * @msix_entries: irq handlers
+ * @idx: per channel data
+ * @dca: direct cache access context
+ * @intr_quirk: interrupt setup quirk (for ioat_v1 devices)
+ * @enumerate_channels: hw version specific channel enumeration
+ * @cleanup_tasklet: select between the v2 and v3 cleanup routines
+ * @timer_fn: select between the v2 and v3 timer watchdog routines
+ * @self_test: hardware version specific self test for each supported op type
+ *
+ * Note: the v3 cleanup routine supports raid operations
+ */
+struct ioatdma_device {
+       struct pci_dev *pdev;
+       void __iomem *reg_base;
+       struct pci_pool *dma_pool;
+       struct pci_pool *completion_pool;
+       struct dma_device common;
+       u8 version;
+       struct msix_entry msix_entries[4];
+       struct ioat_chan_common *idx[4];
+       struct dca_provider *dca;
+       void (*intr_quirk)(struct ioatdma_device *device);
+       int (*enumerate_channels)(struct ioatdma_device *device);
+       void (*cleanup_tasklet)(unsigned long data);
+       void (*timer_fn)(unsigned long data);
+       int (*self_test)(struct ioatdma_device *device);
+};
+
+struct ioat_chan_common {
+       struct dma_chan common;
+       void __iomem *reg_base;
+       unsigned long last_completion;
+       spinlock_t cleanup_lock;
+       dma_cookie_t completed_cookie;
+       unsigned long state;
+       #define IOAT_COMPLETION_PENDING 0
+       #define IOAT_COMPLETION_ACK 1
+       #define IOAT_RESET_PENDING 2
+       #define IOAT_KOBJ_INIT_FAIL 3
+       struct timer_list timer;
+       #define COMPLETION_TIMEOUT msecs_to_jiffies(100)
+       #define IDLE_TIMEOUT msecs_to_jiffies(2000)
+       #define RESET_DELAY msecs_to_jiffies(100)
+       struct ioatdma_device *device;
+       dma_addr_t completion_dma;
+       u64 *completion;
+       struct tasklet_struct cleanup_task;
+       struct kobject kobj;
+};
+
+struct ioat_sysfs_entry {
+       struct attribute attr;
+       ssize_t (*show)(struct dma_chan *, char *);
+};
+
+/**
+ * struct ioat_dma_chan - internal representation of a DMA channel
+ */
+struct ioat_dma_chan {
+       struct ioat_chan_common base;
+
+       size_t xfercap; /* XFERCAP register value expanded out */
+
+       spinlock_t desc_lock;
+       struct list_head free_desc;
+       struct list_head used_desc;
+
+       int pending;
+       u16 desccount;
+       u16 active;
+};
+
+static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c)
+{
+       return container_of(c, struct ioat_chan_common, common);
+}
+
+static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c)
+{
+       struct ioat_chan_common *chan = to_chan_common(c);
+
+       return container_of(chan, struct ioat_dma_chan, base);
+}
+
+/**
+ * ioat_is_complete - poll the status of an ioat transaction
+ * @c: channel handle
+ * @cookie: transaction identifier
+ * @done: if set, updated with last completed transaction
+ * @used: if set, updated with last used transaction
+ */
+static inline enum dma_status
+ioat_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+                dma_cookie_t *done, dma_cookie_t *used)
+{
+       struct ioat_chan_common *chan = to_chan_common(c);
+       dma_cookie_t last_used;
+       dma_cookie_t last_complete;
+
+       last_used = c->cookie;
+       last_complete = chan->completed_cookie;
+
+       if (done)
+               *done = last_complete;
+       if (used)
+               *used = last_used;
+
+       return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+/* wrapper around hardware descriptor format + additional software fields */
+
+/**
+ * struct ioat_desc_sw - wrapper around hardware descriptor
+ * @hw: hardware DMA descriptor (for memcpy)
+ * @node: this descriptor will either be on the free list,
+ *     or attached to a transaction list (tx_list)
+ * @txd: the generic software descriptor for all engines
+ * @id: identifier for debug
+ */
+struct ioat_desc_sw {
+       struct ioat_dma_descriptor *hw;
+       struct list_head node;
+       size_t len;
+       struct list_head tx_list;
+       struct dma_async_tx_descriptor txd;
+       #ifdef DEBUG
+       int id;
+       #endif
+};
+
+#ifdef DEBUG
+#define set_desc_id(desc, i) ((desc)->id = (i))
+#define desc_id(desc) ((desc)->id)
+#else
+#define set_desc_id(desc, i)
+#define desc_id(desc) (0)
+#endif
+
+static inline void
+__dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw,
+               struct dma_async_tx_descriptor *tx, int id)
+{
+       struct device *dev = to_dev(chan);
+
+       dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x"
+               " ctl: %#x (op: %d int_en: %d compl: %d)\n", id,
+               (unsigned long long) tx->phys,
+               (unsigned long long) hw->next, tx->cookie, tx->flags,
+               hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write);
+}
+
+#define dump_desc_dbg(c, d) \
+       ({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; })
+
+static inline void ioat_set_tcp_copy_break(unsigned long copybreak)
+{
+       #ifdef CONFIG_NET_DMA
+       sysctl_tcp_dma_copybreak = copybreak;
+       #endif
+}
+
+static inline struct ioat_chan_common *
+ioat_chan_by_index(struct ioatdma_device *device, int index)
+{
+       return device->idx[index];
+}
+
+static inline u64 ioat_chansts(struct ioat_chan_common *chan)
+{
+       u8 ver = chan->device->version;
+       u64 status;
+       u32 status_lo;
+
+       /* We need to read the low address first as this causes the
+        * chipset to latch the upper bits for the subsequent read
+        */
+       status_lo = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver));
+       status = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver));
+       status <<= 32;
+       status |= status_lo;
+
+       return status;
+}
+
+static inline void ioat_start(struct ioat_chan_common *chan)
+{
+       u8 ver = chan->device->version;
+
+       writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline u64 ioat_chansts_to_addr(u64 status)
+{
+       return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
+}
+
+static inline u32 ioat_chanerr(struct ioat_chan_common *chan)
+{
+       return readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+}
+
+static inline void ioat_suspend(struct ioat_chan_common *chan)
+{
+       u8 ver = chan->device->version;
+
+       writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+
+       writel(addr & 0x00000000FFFFFFFF,
+              chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
+       writel(addr >> 32,
+              chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
+}
+
+static inline bool is_ioat_active(unsigned long status)
+{
+       return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
+}
+
+static inline bool is_ioat_idle(unsigned long status)
+{
+       return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_DONE);
+}
+
+static inline bool is_ioat_halted(unsigned long status)
+{
+       return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
+}
+
+static inline bool is_ioat_suspended(unsigned long status)
+{
+       return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
+}
+
+/* channel was fatally programmed */
+static inline bool is_ioat_bug(unsigned long err)
+{
+       return !!(err & (IOAT_CHANERR_SRC_ADDR_ERR|IOAT_CHANERR_DEST_ADDR_ERR|
+                        IOAT_CHANERR_NEXT_ADDR_ERR|IOAT_CHANERR_CONTROL_ERR|
+                        IOAT_CHANERR_LENGTH_ERR));
+}
+
+static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
+                             int direction, enum dma_ctrl_flags flags, bool dst)
+{
+       if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
+           (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
+               pci_unmap_single(pdev, addr, len, direction);
+       else
+               pci_unmap_page(pdev, addr, len, direction);
+}
+
+int __devinit ioat_probe(struct ioatdma_device *device);
+int __devinit ioat_register(struct ioatdma_device *device);
+int __devinit ioat1_dma_probe(struct ioatdma_device *dev, int dca);
+int __devinit ioat_dma_self_test(struct ioatdma_device *device);
+void __devexit ioat_dma_remove(struct ioatdma_device *device);
+struct dca_provider * __devinit ioat_dca_init(struct pci_dev *pdev,
+                                             void __iomem *iobase);
+unsigned long ioat_get_current_completion(struct ioat_chan_common *chan);
+void ioat_init_channel(struct ioatdma_device *device,
+                      struct ioat_chan_common *chan, int idx,
+                      void (*timer_fn)(unsigned long),
+                      void (*tasklet)(unsigned long),
+                      unsigned long ioat);
+void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
+                   size_t len, struct ioat_dma_descriptor *hw);
+bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
+                          unsigned long *phys_complete);
+void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
+void ioat_kobject_del(struct ioatdma_device *device);
+extern struct sysfs_ops ioat_sysfs_ops;
+extern struct ioat_sysfs_entry ioat_version_attr;
+extern struct ioat_sysfs_entry ioat_cap_attr;
+#endif /* IOATDMA_H */
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
new file mode 100644 (file)
index 0000000..96ffab7
--- /dev/null
@@ -0,0 +1,871 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine (versions >= 2), which
+ * does asynchronous data movement and checksumming operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/i7300_idle.h>
+#include "dma.h"
+#include "dma_v2.h"
+#include "registers.h"
+#include "hw.h"
+
+int ioat_ring_alloc_order = 8;
+module_param(ioat_ring_alloc_order, int, 0644);
+MODULE_PARM_DESC(ioat_ring_alloc_order,
+                "ioat2+: allocate 2^n descriptors per channel"
+                " (default: 8 max: 16)");
+static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER;
+module_param(ioat_ring_max_alloc_order, int, 0644);
+MODULE_PARM_DESC(ioat_ring_max_alloc_order,
+                "ioat2+: upper limit for ring size (default: 16)");
+
+void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
+{
+       void * __iomem reg_base = ioat->base.reg_base;
+
+       ioat->pending = 0;
+       ioat->dmacount += ioat2_ring_pending(ioat);
+       ioat->issued = ioat->head;
+       /* make descriptor updates globally visible before notifying channel */
+       wmb();
+       writew(ioat->dmacount, reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
+       dev_dbg(to_dev(&ioat->base),
+               "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
+               __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
+}
+
+void ioat2_issue_pending(struct dma_chan *chan)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(chan);
+
+       spin_lock_bh(&ioat->ring_lock);
+       if (ioat->pending == 1)
+               __ioat2_issue_pending(ioat);
+       spin_unlock_bh(&ioat->ring_lock);
+}
+
+/**
+ * ioat2_update_pending - log pending descriptors
+ * @ioat: ioat2+ channel
+ *
+ * set pending to '1' unless pending is already set to '2', pending == 2
+ * indicates that submission is temporarily blocked due to an in-flight
+ * reset.  If we are already above the ioat_pending_level threshold then
+ * just issue pending.
+ *
+ * called with ring_lock held
+ */
+static void ioat2_update_pending(struct ioat2_dma_chan *ioat)
+{
+       if (unlikely(ioat->pending == 2))
+               return;
+       else if (ioat2_ring_pending(ioat) > ioat_pending_level)
+               __ioat2_issue_pending(ioat);
+       else
+               ioat->pending = 1;
+}
+
+static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
+{
+       struct ioat_ring_ent *desc;
+       struct ioat_dma_descriptor *hw;
+       int idx;
+
+       if (ioat2_ring_space(ioat) < 1) {
+               dev_err(to_dev(&ioat->base),
+                       "Unable to start null desc - ring full\n");
+               return;
+       }
+
+       dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n",
+               __func__, ioat->head, ioat->tail, ioat->issued);
+       idx = ioat2_desc_alloc(ioat, 1);
+       desc = ioat2_get_ring_ent(ioat, idx);
+
+       hw = desc->hw;
+       hw->ctl = 0;
+       hw->ctl_f.null = 1;
+       hw->ctl_f.int_en = 1;
+       hw->ctl_f.compl_write = 1;
+       /* set size to non-zero value (channel returns error when size is 0) */
+       hw->size = NULL_DESC_BUFFER_SIZE;
+       hw->src_addr = 0;
+       hw->dst_addr = 0;
+       async_tx_ack(&desc->txd);
+       ioat2_set_chainaddr(ioat, desc->txd.phys);
+       dump_desc_dbg(ioat, desc);
+       __ioat2_issue_pending(ioat);
+}
+
+static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
+{
+       spin_lock_bh(&ioat->ring_lock);
+       __ioat2_start_null_desc(ioat);
+       spin_unlock_bh(&ioat->ring_lock);
+}
+
+static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       struct dma_async_tx_descriptor *tx;
+       struct ioat_ring_ent *desc;
+       bool seen_current = false;
+       u16 active;
+       int i;
+
+       dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
+               __func__, ioat->head, ioat->tail, ioat->issued);
+
+       active = ioat2_ring_active(ioat);
+       for (i = 0; i < active && !seen_current; i++) {
+               prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
+               desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
+               tx = &desc->txd;
+               dump_desc_dbg(ioat, desc);
+               if (tx->cookie) {
+                       ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+                       chan->completed_cookie = tx->cookie;
+                       tx->cookie = 0;
+                       if (tx->callback) {
+                               tx->callback(tx->callback_param);
+                               tx->callback = NULL;
+                       }
+               }
+
+               if (tx->phys == phys_complete)
+                       seen_current = true;
+       }
+       ioat->tail += i;
+       BUG_ON(!seen_current); /* no active descs have written a completion? */
+
+       chan->last_completion = phys_complete;
+       if (ioat->head == ioat->tail) {
+               dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
+                       __func__);
+               clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
+               mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+       }
+}
+
+/**
+ * ioat2_cleanup - clean finished descriptors (advance tail pointer)
+ * @chan: ioat channel to be cleaned up
+ */
+static void ioat2_cleanup(struct ioat2_dma_chan *ioat)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       unsigned long phys_complete;
+
+       prefetch(chan->completion);
+
+       if (!spin_trylock_bh(&chan->cleanup_lock))
+               return;
+
+       if (!ioat_cleanup_preamble(chan, &phys_complete)) {
+               spin_unlock_bh(&chan->cleanup_lock);
+               return;
+       }
+
+       if (!spin_trylock_bh(&ioat->ring_lock)) {
+               spin_unlock_bh(&chan->cleanup_lock);
+               return;
+       }
+
+       __cleanup(ioat, phys_complete);
+
+       spin_unlock_bh(&ioat->ring_lock);
+       spin_unlock_bh(&chan->cleanup_lock);
+}
+
+void ioat2_cleanup_tasklet(unsigned long data)
+{
+       struct ioat2_dma_chan *ioat = (void *) data;
+
+       ioat2_cleanup(ioat);
+       writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+void __ioat2_restart_chan(struct ioat2_dma_chan *ioat)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+
+       /* set the tail to be re-issued */
+       ioat->issued = ioat->tail;
+       ioat->dmacount = 0;
+       set_bit(IOAT_COMPLETION_PENDING, &chan->state);
+       mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+       dev_dbg(to_dev(chan),
+               "%s: head: %#x tail: %#x issued: %#x count: %#x\n",
+               __func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
+
+       if (ioat2_ring_pending(ioat)) {
+               struct ioat_ring_ent *desc;
+
+               desc = ioat2_get_ring_ent(ioat, ioat->tail);
+               ioat2_set_chainaddr(ioat, desc->txd.phys);
+               __ioat2_issue_pending(ioat);
+       } else
+               __ioat2_start_null_desc(ioat);
+}
+
+static void ioat2_restart_channel(struct ioat2_dma_chan *ioat)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       unsigned long phys_complete;
+       u32 status;
+
+       status = ioat_chansts(chan);
+       if (is_ioat_active(status) || is_ioat_idle(status))
+               ioat_suspend(chan);
+       while (is_ioat_active(status) || is_ioat_idle(status)) {
+               status = ioat_chansts(chan);
+               cpu_relax();
+       }
+
+       if (ioat_cleanup_preamble(chan, &phys_complete))
+               __cleanup(ioat, phys_complete);
+
+       __ioat2_restart_chan(ioat);
+}
+
+void ioat2_timer_event(unsigned long data)
+{
+       struct ioat2_dma_chan *ioat = (void *) data;
+       struct ioat_chan_common *chan = &ioat->base;
+
+       spin_lock_bh(&chan->cleanup_lock);
+       if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
+               unsigned long phys_complete;
+               u64 status;
+
+               spin_lock_bh(&ioat->ring_lock);
+               status = ioat_chansts(chan);
+
+               /* when halted due to errors check for channel
+                * programming errors before advancing the completion state
+                */
+               if (is_ioat_halted(status)) {
+                       u32 chanerr;
+
+                       chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+                       BUG_ON(is_ioat_bug(chanerr));
+               }
+
+               /* if we haven't made progress and we have already
+                * acknowledged a pending completion once, then be more
+                * forceful with a restart
+                */
+               if (ioat_cleanup_preamble(chan, &phys_complete))
+                       __cleanup(ioat, phys_complete);
+               else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
+                       ioat2_restart_channel(ioat);
+               else {
+                       set_bit(IOAT_COMPLETION_ACK, &chan->state);
+                       mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+               }
+               spin_unlock_bh(&ioat->ring_lock);
+       } else {
+               u16 active;
+
+               /* if the ring is idle, empty, and oversized try to step
+                * down the size
+                */
+               spin_lock_bh(&ioat->ring_lock);
+               active = ioat2_ring_active(ioat);
+               if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
+                       reshape_ring(ioat, ioat->alloc_order-1);
+               spin_unlock_bh(&ioat->ring_lock);
+
+               /* keep shrinking until we get back to our minimum
+                * default size
+                */
+               if (ioat->alloc_order > ioat_get_alloc_order())
+                       mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+       }
+       spin_unlock_bh(&chan->cleanup_lock);
+}
+
+/**
+ * ioat2_enumerate_channels - find and initialize the device's channels
+ * @device: the device to be enumerated
+ */
+int ioat2_enumerate_channels(struct ioatdma_device *device)
+{
+       struct ioat2_dma_chan *ioat;
+       struct device *dev = &device->pdev->dev;
+       struct dma_device *dma = &device->common;
+       u8 xfercap_log;
+       int i;
+
+       INIT_LIST_HEAD(&dma->channels);
+       dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
+       dma->chancnt &= 0x1f; /* bits [4:0] valid */
+       if (dma->chancnt > ARRAY_SIZE(device->idx)) {
+               dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
+                        dma->chancnt, ARRAY_SIZE(device->idx));
+               dma->chancnt = ARRAY_SIZE(device->idx);
+       }
+       xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
+       xfercap_log &= 0x1f; /* bits [4:0] valid */
+       if (xfercap_log == 0)
+               return 0;
+       dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log);
+
+       /* FIXME which i/oat version is i7300? */
+#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
+       if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
+               dma->chancnt--;
+#endif
+       for (i = 0; i < dma->chancnt; i++) {
+               ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
+               if (!ioat)
+                       break;
+
+               ioat_init_channel(device, &ioat->base, i,
+                                 device->timer_fn,
+                                 device->cleanup_tasklet,
+                                 (unsigned long) ioat);
+               ioat->xfercap_log = xfercap_log;
+               spin_lock_init(&ioat->ring_lock);
+       }
+       dma->chancnt = i;
+       return i;
+}
+
+static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
+{
+       struct dma_chan *c = tx->chan;
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+       struct ioat_chan_common *chan = &ioat->base;
+       dma_cookie_t cookie = c->cookie;
+
+       cookie++;
+       if (cookie < 0)
+               cookie = 1;
+       tx->cookie = cookie;
+       c->cookie = cookie;
+       dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
+
+       if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
+               mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+       ioat2_update_pending(ioat);
+       spin_unlock_bh(&ioat->ring_lock);
+
+       return cookie;
+}
+
+static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
+{
+       struct ioat_dma_descriptor *hw;
+       struct ioat_ring_ent *desc;
+       struct ioatdma_device *dma;
+       dma_addr_t phys;
+
+       dma = to_ioatdma_device(chan->device);
+       hw = pci_pool_alloc(dma->dma_pool, flags, &phys);
+       if (!hw)
+               return NULL;
+       memset(hw, 0, sizeof(*hw));
+
+       desc = kmem_cache_alloc(ioat2_cache, flags);
+       if (!desc) {
+               pci_pool_free(dma->dma_pool, hw, phys);
+               return NULL;
+       }
+       memset(desc, 0, sizeof(*desc));
+
+       dma_async_tx_descriptor_init(&desc->txd, chan);
+       desc->txd.tx_submit = ioat2_tx_submit_unlock;
+       desc->hw = hw;
+       desc->txd.phys = phys;
+       return desc;
+}
+
+static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
+{
+       struct ioatdma_device *dma;
+
+       dma = to_ioatdma_device(chan->device);
+       pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys);
+       kmem_cache_free(ioat2_cache, desc);
+}
+
+static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
+{
+       struct ioat_ring_ent **ring;
+       int descs = 1 << order;
+       int i;
+
+       if (order > ioat_get_max_alloc_order())
+               return NULL;
+
+       /* allocate the array to hold the software ring */
+       ring = kcalloc(descs, sizeof(*ring), flags);
+       if (!ring)
+               return NULL;
+       for (i = 0; i < descs; i++) {
+               ring[i] = ioat2_alloc_ring_ent(c, flags);
+               if (!ring[i]) {
+                       while (i--)
+                               ioat2_free_ring_ent(ring[i], c);
+                       kfree(ring);
+                       return NULL;
+               }
+               set_desc_id(ring[i], i);
+       }
+
+       /* link descs */
+       for (i = 0; i < descs-1; i++) {
+               struct ioat_ring_ent *next = ring[i+1];
+               struct ioat_dma_descriptor *hw = ring[i]->hw;
+
+               hw->next = next->txd.phys;
+       }
+       ring[i]->hw->next = ring[0]->txd.phys;
+
+       return ring;
+}
+
+/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring
+ * @chan: channel to be initialized
+ */
+int ioat2_alloc_chan_resources(struct dma_chan *c)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+       struct ioat_chan_common *chan = &ioat->base;
+       struct ioat_ring_ent **ring;
+       u32 chanerr;
+       int order;
+
+       /* have we already been set up? */
+       if (ioat->ring)
+               return 1 << ioat->alloc_order;
+
+       /* Setup register to interrupt and write completion status on error */
+       writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
+
+       chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+       if (chanerr) {
+               dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
+               writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
+       }
+
+       /* allocate a completion writeback area */
+       /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
+       chan->completion = pci_pool_alloc(chan->device->completion_pool,
+                                         GFP_KERNEL, &chan->completion_dma);
+       if (!chan->completion)
+               return -ENOMEM;
+
+       memset(chan->completion, 0, sizeof(*chan->completion));
+       writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
+              chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+       writel(((u64) chan->completion_dma) >> 32,
+              chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+       order = ioat_get_alloc_order();
+       ring = ioat2_alloc_ring(c, order, GFP_KERNEL);
+       if (!ring)
+               return -ENOMEM;
+
+       spin_lock_bh(&ioat->ring_lock);
+       ioat->ring = ring;
+       ioat->head = 0;
+       ioat->issued = 0;
+       ioat->tail = 0;
+       ioat->pending = 0;
+       ioat->alloc_order = order;
+       spin_unlock_bh(&ioat->ring_lock);
+
+       tasklet_enable(&chan->cleanup_task);
+       ioat2_start_null_desc(ioat);
+
+       return 1 << ioat->alloc_order;
+}
+
+bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
+{
+       /* reshape differs from normal ring allocation in that we want
+        * to allocate a new software ring while only
+        * extending/truncating the hardware ring
+        */
+       struct ioat_chan_common *chan = &ioat->base;
+       struct dma_chan *c = &chan->common;
+       const u16 curr_size = ioat2_ring_mask(ioat) + 1;
+       const u16 active = ioat2_ring_active(ioat);
+       const u16 new_size = 1 << order;
+       struct ioat_ring_ent **ring;
+       u16 i;
+
+       if (order > ioat_get_max_alloc_order())
+               return false;
+
+       /* double check that we have at least 1 free descriptor */
+       if (active == curr_size)
+               return false;
+
+       /* when shrinking, verify that we can hold the current active
+        * set in the new ring
+        */
+       if (active >= new_size)
+               return false;
+
+       /* allocate the array to hold the software ring */
+       ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT);
+       if (!ring)
+               return false;
+
+       /* allocate/trim descriptors as needed */
+       if (new_size > curr_size) {
+               /* copy current descriptors to the new ring */
+               for (i = 0; i < curr_size; i++) {
+                       u16 curr_idx = (ioat->tail+i) & (curr_size-1);
+                       u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+                       ring[new_idx] = ioat->ring[curr_idx];
+                       set_desc_id(ring[new_idx], new_idx);
+               }
+
+               /* add new descriptors to the ring */
+               for (i = curr_size; i < new_size; i++) {
+                       u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+                       ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT);
+                       if (!ring[new_idx]) {
+                               while (i--) {
+                                       u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+                                       ioat2_free_ring_ent(ring[new_idx], c);
+                               }
+                               kfree(ring);
+                               return false;
+                       }
+                       set_desc_id(ring[new_idx], new_idx);
+               }
+
+               /* hw link new descriptors */
+               for (i = curr_size-1; i < new_size; i++) {
+                       u16 new_idx = (ioat->tail+i) & (new_size-1);
+                       struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)];
+                       struct ioat_dma_descriptor *hw = ring[new_idx]->hw;
+
+                       hw->next = next->txd.phys;
+               }
+       } else {
+               struct ioat_dma_descriptor *hw;
+               struct ioat_ring_ent *next;
+
+               /* copy current descriptors to the new ring, dropping the
+                * removed descriptors
+                */
+               for (i = 0; i < new_size; i++) {
+                       u16 curr_idx = (ioat->tail+i) & (curr_size-1);
+                       u16 new_idx = (ioat->tail+i) & (new_size-1);
+
+                       ring[new_idx] = ioat->ring[curr_idx];
+                       set_desc_id(ring[new_idx], new_idx);
+               }
+
+               /* free deleted descriptors */
+               for (i = new_size; i < curr_size; i++) {
+                       struct ioat_ring_ent *ent;
+
+                       ent = ioat2_get_ring_ent(ioat, ioat->tail+i);
+                       ioat2_free_ring_ent(ent, c);
+               }
+
+               /* fix up hardware ring */
+               hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw;
+               next = ring[(ioat->tail+new_size) & (new_size-1)];
+               hw->next = next->txd.phys;
+       }
+
+       dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
+               __func__, new_size);
+
+       kfree(ioat->ring);
+       ioat->ring = ring;
+       ioat->alloc_order = order;
+
+       return true;
+}
+
+/**
+ * ioat2_alloc_and_lock - common descriptor alloc boilerplate for ioat2,3 ops
+ * @idx: gets starting descriptor index on successful allocation
+ * @ioat: ioat2,3 channel (ring) to operate on
+ * @num_descs: allocation length
+ */
+int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+
+       spin_lock_bh(&ioat->ring_lock);
+       /* never allow the last descriptor to be consumed, we need at
+        * least one free at all times to allow for on-the-fly ring
+        * resizing.
+        */
+       while (unlikely(ioat2_ring_space(ioat) <= num_descs)) {
+               if (reshape_ring(ioat, ioat->alloc_order + 1) &&
+                   ioat2_ring_space(ioat) > num_descs)
+                               break;
+
+               if (printk_ratelimit())
+                       dev_dbg(to_dev(chan),
+                               "%s: ring full! num_descs: %d (%x:%x:%x)\n",
+                               __func__, num_descs, ioat->head, ioat->tail,
+                               ioat->issued);
+               spin_unlock_bh(&ioat->ring_lock);
+
+               /* progress reclaim in the allocation failure case we
+                * may be called under bh_disabled so we need to trigger
+                * the timer event directly
+                */
+               spin_lock_bh(&chan->cleanup_lock);
+               if (jiffies > chan->timer.expires &&
+                   timer_pending(&chan->timer)) {
+                       struct ioatdma_device *device = chan->device;
+
+                       mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+                       spin_unlock_bh(&chan->cleanup_lock);
+                       device->timer_fn((unsigned long) ioat);
+               } else
+                       spin_unlock_bh(&chan->cleanup_lock);
+               return -ENOMEM;
+       }
+
+       dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n",
+               __func__, num_descs, ioat->head, ioat->tail, ioat->issued);
+
+       *idx = ioat2_desc_alloc(ioat, num_descs);
+       return 0;  /* with ioat->ring_lock held */
+}
+
+struct dma_async_tx_descriptor *
+ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+                          dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+       struct ioat_dma_descriptor *hw;
+       struct ioat_ring_ent *desc;
+       dma_addr_t dst = dma_dest;
+       dma_addr_t src = dma_src;
+       size_t total_len = len;
+       int num_descs;
+       u16 idx;
+       int i;
+
+       num_descs = ioat2_xferlen_to_descs(ioat, len);
+       if (likely(num_descs) &&
+           ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
+               /* pass */;
+       else
+               return NULL;
+       i = 0;
+       do {
+               size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log);
+
+               desc = ioat2_get_ring_ent(ioat, idx + i);
+               hw = desc->hw;
+
+               hw->size = copy;
+               hw->ctl = 0;
+               hw->src_addr = src;
+               hw->dst_addr = dst;
+
+               len -= copy;
+               dst += copy;
+               src += copy;
+               dump_desc_dbg(ioat, desc);
+       } while (++i < num_descs);
+
+       desc->txd.flags = flags;
+       desc->len = total_len;
+       hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+       hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+       hw->ctl_f.compl_write = 1;
+       dump_desc_dbg(ioat, desc);
+       /* we leave the channel locked to ensure in order submission */
+
+       return &desc->txd;
+}
+
+/**
+ * ioat2_free_chan_resources - release all the descriptors
+ * @chan: the channel to be cleaned
+ */
+void ioat2_free_chan_resources(struct dma_chan *c)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+       struct ioat_chan_common *chan = &ioat->base;
+       struct ioatdma_device *device = chan->device;
+       struct ioat_ring_ent *desc;
+       const u16 total_descs = 1 << ioat->alloc_order;
+       int descs;
+       int i;
+
+       /* Before freeing channel resources first check
+        * if they have been previously allocated for this channel.
+        */
+       if (!ioat->ring)
+               return;
+
+       tasklet_disable(&chan->cleanup_task);
+       del_timer_sync(&chan->timer);
+       device->cleanup_tasklet((unsigned long) ioat);
+
+       /* Delay 100ms after reset to allow internal DMA logic to quiesce
+        * before removing DMA descriptor resources.
+        */
+       writeb(IOAT_CHANCMD_RESET,
+              chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
+       mdelay(100);
+
+       spin_lock_bh(&ioat->ring_lock);
+       descs = ioat2_ring_space(ioat);
+       dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs);
+       for (i = 0; i < descs; i++) {
+               desc = ioat2_get_ring_ent(ioat, ioat->head + i);
+               ioat2_free_ring_ent(desc, c);
+       }
+
+       if (descs < total_descs)
+               dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
+                       total_descs - descs);
+
+       for (i = 0; i < total_descs - descs; i++) {
+               desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
+               dump_desc_dbg(ioat, desc);
+               ioat2_free_ring_ent(desc, c);
+       }
+
+       kfree(ioat->ring);
+       ioat->ring = NULL;
+       ioat->alloc_order = 0;
+       pci_pool_free(device->completion_pool, chan->completion,
+                     chan->completion_dma);
+       spin_unlock_bh(&ioat->ring_lock);
+
+       chan->last_completion = 0;
+       chan->completion_dma = 0;
+       ioat->pending = 0;
+       ioat->dmacount = 0;
+}
+
+enum dma_status
+ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+                    dma_cookie_t *done, dma_cookie_t *used)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+       struct ioatdma_device *device = ioat->base.device;
+
+       if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
+               return DMA_SUCCESS;
+
+       device->cleanup_tasklet((unsigned long) ioat);
+
+       return ioat_is_complete(c, cookie, done, used);
+}
+
+static ssize_t ring_size_show(struct dma_chan *c, char *page)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+
+       return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1);
+}
+static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
+
+static ssize_t ring_active_show(struct dma_chan *c, char *page)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+
+       /* ...taken outside the lock, no need to be precise */
+       return sprintf(page, "%d\n", ioat2_ring_active(ioat));
+}
+static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
+
+static struct attribute *ioat2_attrs[] = {
+       &ring_size_attr.attr,
+       &ring_active_attr.attr,
+       &ioat_cap_attr.attr,
+       &ioat_version_attr.attr,
+       NULL,
+};
+
+struct kobj_type ioat2_ktype = {
+       .sysfs_ops = &ioat_sysfs_ops,
+       .default_attrs = ioat2_attrs,
+};
+
+int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca)
+{
+       struct pci_dev *pdev = device->pdev;
+       struct dma_device *dma;
+       struct dma_chan *c;
+       struct ioat_chan_common *chan;
+       int err;
+
+       device->enumerate_channels = ioat2_enumerate_channels;
+       device->cleanup_tasklet = ioat2_cleanup_tasklet;
+       device->timer_fn = ioat2_timer_event;
+       device->self_test = ioat_dma_self_test;
+       dma = &device->common;
+       dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
+       dma->device_issue_pending = ioat2_issue_pending;
+       dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
+       dma->device_free_chan_resources = ioat2_free_chan_resources;
+       dma->device_is_tx_complete = ioat2_is_complete;
+
+       err = ioat_probe(device);
+       if (err)
+               return err;
+       ioat_set_tcp_copy_break(2048);
+
+       list_for_each_entry(c, &dma->channels, device_node) {
+               chan = to_chan_common(c);
+               writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU,
+                      chan->reg_base + IOAT_DCACTRL_OFFSET);
+       }
+
+       err = ioat_register(device);
+       if (err)
+               return err;
+
+       ioat_kobject_add(device, &ioat2_ktype);
+
+       if (dca)
+               device->dca = ioat2_dca_init(pdev, device->reg_base);
+
+       return err;
+}
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h
new file mode 100644 (file)
index 0000000..1d849ef
--- /dev/null
@@ -0,0 +1,190 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef IOATDMA_V2_H
+#define IOATDMA_V2_H
+
+#include <linux/dmaengine.h>
+#include "dma.h"
+#include "hw.h"
+
+
+extern int ioat_pending_level;
+extern int ioat_ring_alloc_order;
+
+/*
+ * workaround for IOAT ver.3.0 null descriptor issue
+ * (channel returns error when size is 0)
+ */
+#define NULL_DESC_BUFFER_SIZE 1
+
+#define IOAT_MAX_ORDER 16
+#define ioat_get_alloc_order() \
+       (min(ioat_ring_alloc_order, IOAT_MAX_ORDER))
+#define ioat_get_max_alloc_order() \
+       (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER))
+
+/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes
+ * @base: common ioat channel parameters
+ * @xfercap_log; log2 of channel max transfer length (for fast division)
+ * @head: allocated index
+ * @issued: hardware notification point
+ * @tail: cleanup index
+ * @pending: lock free indicator for issued != head
+ * @dmacount: identical to 'head' except for occasionally resetting to zero
+ * @alloc_order: log2 of the number of allocated descriptors
+ * @ring: software ring buffer implementation of hardware ring
+ * @ring_lock: protects ring attributes
+ */
+struct ioat2_dma_chan {
+       struct ioat_chan_common base;
+       size_t xfercap_log;
+       u16 head;
+       u16 issued;
+       u16 tail;
+       u16 dmacount;
+       u16 alloc_order;
+       int pending;
+       struct ioat_ring_ent **ring;
+       spinlock_t ring_lock;
+};
+
+static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c)
+{
+       struct ioat_chan_common *chan = to_chan_common(c);
+
+       return container_of(chan, struct ioat2_dma_chan, base);
+}
+
+static inline u16 ioat2_ring_mask(struct ioat2_dma_chan *ioat)
+{
+       return (1 << ioat->alloc_order) - 1;
+}
+
+/* count of descriptors in flight with the engine */
+static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat)
+{
+       return (ioat->head - ioat->tail) & ioat2_ring_mask(ioat);
+}
+
+/* count of descriptors pending submission to hardware */
+static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat)
+{
+       return (ioat->head - ioat->issued) & ioat2_ring_mask(ioat);
+}
+
+static inline u16 ioat2_ring_space(struct ioat2_dma_chan *ioat)
+{
+       u16 num_descs = ioat2_ring_mask(ioat) + 1;
+       u16 active = ioat2_ring_active(ioat);
+
+       BUG_ON(active > num_descs);
+
+       return num_descs - active;
+}
+
+/* assumes caller already checked space */
+static inline u16 ioat2_desc_alloc(struct ioat2_dma_chan *ioat, u16 len)
+{
+       ioat->head += len;
+       return ioat->head - len;
+}
+
+static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len)
+{
+       u16 num_descs = len >> ioat->xfercap_log;
+
+       num_descs += !!(len & ((1 << ioat->xfercap_log) - 1));
+       return num_descs;
+}
+
+/**
+ * struct ioat_ring_ent - wrapper around hardware descriptor
+ * @hw: hardware DMA descriptor (for memcpy)
+ * @fill: hardware fill descriptor
+ * @xor: hardware xor descriptor
+ * @xor_ex: hardware xor extension descriptor
+ * @pq: hardware pq descriptor
+ * @pq_ex: hardware pq extension descriptor
+ * @pqu: hardware pq update descriptor
+ * @raw: hardware raw (un-typed) descriptor
+ * @txd: the generic software descriptor for all engines
+ * @len: total transaction length for unmap
+ * @result: asynchronous result of validate operations
+ * @id: identifier for debug
+ */
+
+struct ioat_ring_ent {
+       union {
+               struct ioat_dma_descriptor *hw;
+               struct ioat_fill_descriptor *fill;
+               struct ioat_xor_descriptor *xor;
+               struct ioat_xor_ext_descriptor *xor_ex;
+               struct ioat_pq_descriptor *pq;
+               struct ioat_pq_ext_descriptor *pq_ex;
+               struct ioat_pq_update_descriptor *pqu;
+               struct ioat_raw_descriptor *raw;
+       };
+       size_t len;
+       struct dma_async_tx_descriptor txd;
+       enum sum_check_flags *result;
+       #ifdef DEBUG
+       int id;
+       #endif
+};
+
+static inline struct ioat_ring_ent *
+ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx)
+{
+       return ioat->ring[idx & ioat2_ring_mask(ioat)];
+}
+
+static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+
+       writel(addr & 0x00000000FFFFFFFF,
+              chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
+       writel(addr >> 32,
+              chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
+}
+
+int __devinit ioat2_dma_probe(struct ioatdma_device *dev, int dca);
+int __devinit ioat3_dma_probe(struct ioatdma_device *dev, int dca);
+struct dca_provider * __devinit ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+struct dca_provider * __devinit ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_descs);
+int ioat2_enumerate_channels(struct ioatdma_device *device);
+struct dma_async_tx_descriptor *
+ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+                          dma_addr_t dma_src, size_t len, unsigned long flags);
+void ioat2_issue_pending(struct dma_chan *chan);
+int ioat2_alloc_chan_resources(struct dma_chan *c);
+void ioat2_free_chan_resources(struct dma_chan *c);
+enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+                                 dma_cookie_t *done, dma_cookie_t *used);
+void __ioat2_restart_chan(struct ioat2_dma_chan *ioat);
+bool reshape_ring(struct ioat2_dma_chan *ioat, int order);
+void __ioat2_issue_pending(struct ioat2_dma_chan *ioat);
+void ioat2_cleanup_tasklet(unsigned long data);
+void ioat2_timer_event(unsigned long data);
+extern struct kobj_type ioat2_ktype;
+extern struct kmem_cache *ioat2_cache;
+#endif /* IOATDMA_V2_H */
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
new file mode 100644 (file)
index 0000000..35d1e33
--- /dev/null
@@ -0,0 +1,1223 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2004-2009 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Support routines for v3+ hardware
+ */
+
+#include <linux/pci.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include "registers.h"
+#include "hw.h"
+#include "dma.h"
+#include "dma_v2.h"
+
+/* ioat hardware assumes at least two sources for raid operations */
+#define src_cnt_to_sw(x) ((x) + 2)
+#define src_cnt_to_hw(x) ((x) - 2)
+
+/* provide a lookup table for setting the source address in the base or
+ * extended descriptor of an xor or pq descriptor
+ */
+static const u8 xor_idx_to_desc __read_mostly = 0xd0;
+static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 };
+static const u8 pq_idx_to_desc __read_mostly = 0xf8;
+static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 };
+
+static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
+{
+       struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
+
+       return raw->field[xor_idx_to_field[idx]];
+}
+
+static void xor_set_src(struct ioat_raw_descriptor *descs[2],
+                       dma_addr_t addr, u32 offset, int idx)
+{
+       struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
+
+       raw->field[xor_idx_to_field[idx]] = addr + offset;
+}
+
+static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
+{
+       struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
+
+       return raw->field[pq_idx_to_field[idx]];
+}
+
+static void pq_set_src(struct ioat_raw_descriptor *descs[2],
+                      dma_addr_t addr, u32 offset, u8 coef, int idx)
+{
+       struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
+       struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
+
+       raw->field[pq_idx_to_field[idx]] = addr + offset;
+       pq->coef[idx] = coef;
+}
+
+static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
+                           struct ioat_ring_ent *desc, int idx)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       struct pci_dev *pdev = chan->device->pdev;
+       size_t len = desc->len;
+       size_t offset = len - desc->hw->size;
+       struct dma_async_tx_descriptor *tx = &desc->txd;
+       enum dma_ctrl_flags flags = tx->flags;
+
+       switch (desc->hw->ctl_f.op) {
+       case IOAT_OP_COPY:
+               if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */
+                       ioat_dma_unmap(chan, flags, len, desc->hw);
+               break;
+       case IOAT_OP_FILL: {
+               struct ioat_fill_descriptor *hw = desc->fill;
+
+               if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
+                       ioat_unmap(pdev, hw->dst_addr - offset, len,
+                                  PCI_DMA_FROMDEVICE, flags, 1);
+               break;
+       }
+       case IOAT_OP_XOR_VAL:
+       case IOAT_OP_XOR: {
+               struct ioat_xor_descriptor *xor = desc->xor;
+               struct ioat_ring_ent *ext;
+               struct ioat_xor_ext_descriptor *xor_ex = NULL;
+               int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt);
+               struct ioat_raw_descriptor *descs[2];
+               int i;
+
+               if (src_cnt > 5) {
+                       ext = ioat2_get_ring_ent(ioat, idx + 1);
+                       xor_ex = ext->xor_ex;
+               }
+
+               if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+                       descs[0] = (struct ioat_raw_descriptor *) xor;
+                       descs[1] = (struct ioat_raw_descriptor *) xor_ex;
+                       for (i = 0; i < src_cnt; i++) {
+                               dma_addr_t src = xor_get_src(descs, i);
+
+                               ioat_unmap(pdev, src - offset, len,
+                                          PCI_DMA_TODEVICE, flags, 0);
+                       }
+
+                       /* dest is a source in xor validate operations */
+                       if (xor->ctl_f.op == IOAT_OP_XOR_VAL) {
+                               ioat_unmap(pdev, xor->dst_addr - offset, len,
+                                          PCI_DMA_TODEVICE, flags, 1);
+                               break;
+                       }
+               }
+
+               if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
+                       ioat_unmap(pdev, xor->dst_addr - offset, len,
+                                  PCI_DMA_FROMDEVICE, flags, 1);
+               break;
+       }
+       case IOAT_OP_PQ_VAL:
+       case IOAT_OP_PQ: {
+               struct ioat_pq_descriptor *pq = desc->pq;
+               struct ioat_ring_ent *ext;
+               struct ioat_pq_ext_descriptor *pq_ex = NULL;
+               int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
+               struct ioat_raw_descriptor *descs[2];
+               int i;
+
+               if (src_cnt > 3) {
+                       ext = ioat2_get_ring_ent(ioat, idx + 1);
+                       pq_ex = ext->pq_ex;
+               }
+
+               /* in the 'continue' case don't unmap the dests as sources */
+               if (dmaf_p_disabled_continue(flags))
+                       src_cnt--;
+               else if (dmaf_continue(flags))
+                       src_cnt -= 3;
+
+               if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+                       descs[0] = (struct ioat_raw_descriptor *) pq;
+                       descs[1] = (struct ioat_raw_descriptor *) pq_ex;
+                       for (i = 0; i < src_cnt; i++) {
+                               dma_addr_t src = pq_get_src(descs, i);
+
+                               ioat_unmap(pdev, src - offset, len,
+                                          PCI_DMA_TODEVICE, flags, 0);
+                       }
+
+                       /* the dests are sources in pq validate operations */
+                       if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
+                               if (!(flags & DMA_PREP_PQ_DISABLE_P))
+                                       ioat_unmap(pdev, pq->p_addr - offset,
+                                                  len, PCI_DMA_TODEVICE, flags, 0);
+                               if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+                                       ioat_unmap(pdev, pq->q_addr - offset,
+                                                  len, PCI_DMA_TODEVICE, flags, 0);
+                               break;
+                       }
+               }
+
+               if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+                       if (!(flags & DMA_PREP_PQ_DISABLE_P))
+                               ioat_unmap(pdev, pq->p_addr - offset, len,
+                                          PCI_DMA_BIDIRECTIONAL, flags, 1);
+                       if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+                               ioat_unmap(pdev, pq->q_addr - offset, len,
+                                          PCI_DMA_BIDIRECTIONAL, flags, 1);
+               }
+               break;
+       }
+       default:
+               dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
+                       __func__, desc->hw->ctl_f.op);
+       }
+}
+
+static bool desc_has_ext(struct ioat_ring_ent *desc)
+{
+       struct ioat_dma_descriptor *hw = desc->hw;
+
+       if (hw->ctl_f.op == IOAT_OP_XOR ||
+           hw->ctl_f.op == IOAT_OP_XOR_VAL) {
+               struct ioat_xor_descriptor *xor = desc->xor;
+
+               if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
+                       return true;
+       } else if (hw->ctl_f.op == IOAT_OP_PQ ||
+                  hw->ctl_f.op == IOAT_OP_PQ_VAL) {
+               struct ioat_pq_descriptor *pq = desc->pq;
+
+               if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
+                       return true;
+       }
+
+       return false;
+}
+
+/**
+ * __cleanup - reclaim used descriptors
+ * @ioat: channel (ring) to clean
+ *
+ * The difference from the dma_v2.c __cleanup() is that this routine
+ * handles extended descriptors and dma-unmapping raid operations.
+ */
+static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       struct ioat_ring_ent *desc;
+       bool seen_current = false;
+       u16 active;
+       int i;
+
+       dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
+               __func__, ioat->head, ioat->tail, ioat->issued);
+
+       active = ioat2_ring_active(ioat);
+       for (i = 0; i < active && !seen_current; i++) {
+               struct dma_async_tx_descriptor *tx;
+
+               prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1));
+               desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
+               dump_desc_dbg(ioat, desc);
+               tx = &desc->txd;
+               if (tx->cookie) {
+                       chan->completed_cookie = tx->cookie;
+                       ioat3_dma_unmap(ioat, desc, ioat->tail + i);
+                       tx->cookie = 0;
+                       if (tx->callback) {
+                               tx->callback(tx->callback_param);
+                               tx->callback = NULL;
+                       }
+               }
+
+               if (tx->phys == phys_complete)
+                       seen_current = true;
+
+               /* skip extended descriptors */
+               if (desc_has_ext(desc)) {
+                       BUG_ON(i + 1 >= active);
+                       i++;
+               }
+       }
+       ioat->tail += i;
+       BUG_ON(!seen_current); /* no active descs have written a completion? */
+       chan->last_completion = phys_complete;
+       if (ioat->head == ioat->tail) {
+               dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
+                       __func__);
+               clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
+               mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+       }
+}
+
+static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       unsigned long phys_complete;
+
+       prefetch(chan->completion);
+
+       if (!spin_trylock_bh(&chan->cleanup_lock))
+               return;
+
+       if (!ioat_cleanup_preamble(chan, &phys_complete)) {
+               spin_unlock_bh(&chan->cleanup_lock);
+               return;
+       }
+
+       if (!spin_trylock_bh(&ioat->ring_lock)) {
+               spin_unlock_bh(&chan->cleanup_lock);
+               return;
+       }
+
+       __cleanup(ioat, phys_complete);
+
+       spin_unlock_bh(&ioat->ring_lock);
+       spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static void ioat3_cleanup_tasklet(unsigned long data)
+{
+       struct ioat2_dma_chan *ioat = (void *) data;
+
+       ioat3_cleanup(ioat);
+       writew(IOAT_CHANCTRL_RUN | IOAT3_CHANCTRL_COMPL_DCA_EN,
+              ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+static void ioat3_restart_channel(struct ioat2_dma_chan *ioat)
+{
+       struct ioat_chan_common *chan = &ioat->base;
+       unsigned long phys_complete;
+       u32 status;
+
+       status = ioat_chansts(chan);
+       if (is_ioat_active(status) || is_ioat_idle(status))
+               ioat_suspend(chan);
+       while (is_ioat_active(status) || is_ioat_idle(status)) {
+               status = ioat_chansts(chan);
+               cpu_relax();
+       }
+
+       if (ioat_cleanup_preamble(chan, &phys_complete))
+               __cleanup(ioat, phys_complete);
+
+       __ioat2_restart_chan(ioat);
+}
+
+static void ioat3_timer_event(unsigned long data)
+{
+       struct ioat2_dma_chan *ioat = (void *) data;
+       struct ioat_chan_common *chan = &ioat->base;
+
+       spin_lock_bh(&chan->cleanup_lock);
+       if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
+               unsigned long phys_complete;
+               u64 status;
+
+               spin_lock_bh(&ioat->ring_lock);
+               status = ioat_chansts(chan);
+
+               /* when halted due to errors check for channel
+                * programming errors before advancing the completion state
+                */
+               if (is_ioat_halted(status)) {
+                       u32 chanerr;
+
+                       chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+                       BUG_ON(is_ioat_bug(chanerr));
+               }
+
+               /* if we haven't made progress and we have already
+                * acknowledged a pending completion once, then be more
+                * forceful with a restart
+                */
+               if (ioat_cleanup_preamble(chan, &phys_complete))
+                       __cleanup(ioat, phys_complete);
+               else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
+                       ioat3_restart_channel(ioat);
+               else {
+                       set_bit(IOAT_COMPLETION_ACK, &chan->state);
+                       mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+               }
+               spin_unlock_bh(&ioat->ring_lock);
+       } else {
+               u16 active;
+
+               /* if the ring is idle, empty, and oversized try to step
+                * down the size
+                */
+               spin_lock_bh(&ioat->ring_lock);
+               active = ioat2_ring_active(ioat);
+               if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
+                       reshape_ring(ioat, ioat->alloc_order-1);
+               spin_unlock_bh(&ioat->ring_lock);
+
+               /* keep shrinking until we get back to our minimum
+                * default size
+                */
+               if (ioat->alloc_order > ioat_get_alloc_order())
+                       mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
+       }
+       spin_unlock_bh(&chan->cleanup_lock);
+}
+
+static enum dma_status
+ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie,
+                 dma_cookie_t *done, dma_cookie_t *used)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+
+       if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
+               return DMA_SUCCESS;
+
+       ioat3_cleanup(ioat);
+
+       return ioat_is_complete(c, cookie, done, used);
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value,
+                      size_t len, unsigned long flags)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+       struct ioat_ring_ent *desc;
+       size_t total_len = len;
+       struct ioat_fill_descriptor *fill;
+       int num_descs;
+       u64 src_data = (0x0101010101010101ULL) * (value & 0xff);
+       u16 idx;
+       int i;
+
+       num_descs = ioat2_xferlen_to_descs(ioat, len);
+       if (likely(num_descs) &&
+           ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0)
+               /* pass */;
+       else
+               return NULL;
+       i = 0;
+       do {
+               size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
+
+               desc = ioat2_get_ring_ent(ioat, idx + i);
+               fill = desc->fill;
+
+               fill->size = xfer_size;
+               fill->src_data = src_data;
+               fill->dst_addr = dest;
+               fill->ctl = 0;
+               fill->ctl_f.op = IOAT_OP_FILL;
+
+               len -= xfer_size;
+               dest += xfer_size;
+               dump_desc_dbg(ioat, desc);
+       } while (++i < num_descs);
+
+       desc->txd.flags = flags;
+       desc->len = total_len;
+       fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+       fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+       fill->ctl_f.compl_write = 1;
+       dump_desc_dbg(ioat, desc);
+
+       /* we leave the channel locked to ensure in order submission */
+       return &desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
+                     dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
+                     size_t len, unsigned long flags)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+       struct ioat_ring_ent *compl_desc;
+       struct ioat_ring_ent *desc;
+       struct ioat_ring_ent *ext;
+       size_t total_len = len;
+       struct ioat_xor_descriptor *xor;
+       struct ioat_xor_ext_descriptor *xor_ex = NULL;
+       struct ioat_dma_descriptor *hw;
+       u32 offset = 0;
+       int num_descs;
+       int with_ext;
+       int i;
+       u16 idx;
+       u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
+
+       BUG_ON(src_cnt < 2);
+
+       num_descs = ioat2_xferlen_to_descs(ioat, len);
+       /* we need 2x the number of descriptors to cover greater than 5
+        * sources
+        */
+       if (src_cnt > 5) {
+               with_ext = 1;
+               num_descs *= 2;
+       } else
+               with_ext = 0;
+
+       /* completion writes from the raid engine may pass completion
+        * writes from the legacy engine, so we need one extra null
+        * (legacy) descriptor to ensure all completion writes arrive in
+        * order.
+        */
+       if (likely(num_descs) &&
+           ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
+               /* pass */;
+       else
+               return NULL;
+       i = 0;
+       do {
+               struct ioat_raw_descriptor *descs[2];
+               size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
+               int s;
+
+               desc = ioat2_get_ring_ent(ioat, idx + i);
+               xor = desc->xor;
+
+               /* save a branch by unconditionally retrieving the
+                * extended descriptor xor_set_src() knows to not write
+                * to it in the single descriptor case
+                */
+               ext = ioat2_get_ring_ent(ioat, idx + i + 1);
+               xor_ex = ext->xor_ex;
+
+               descs[0] = (struct ioat_raw_descriptor *) xor;
+               descs[1] = (struct ioat_raw_descriptor *) xor_ex;
+               for (s = 0; s < src_cnt; s++)
+                       xor_set_src(descs, src[s], offset, s);
+               xor->size = xfer_size;
+               xor->dst_addr = dest + offset;
+               xor->ctl = 0;
+               xor->ctl_f.op = op;
+               xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
+
+               len -= xfer_size;
+               offset += xfer_size;
+               dump_desc_dbg(ioat, desc);
+       } while ((i += 1 + with_ext) < num_descs);
+
+       /* last xor descriptor carries the unmap parameters and fence bit */
+       desc->txd.flags = flags;
+       desc->len = total_len;
+       if (result)
+               desc->result = result;
+       xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+
+       /* completion descriptor carries interrupt bit */
+       compl_desc = ioat2_get_ring_ent(ioat, idx + i);
+       compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+       hw = compl_desc->hw;
+       hw->ctl = 0;
+       hw->ctl_f.null = 1;
+       hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+       hw->ctl_f.compl_write = 1;
+       hw->size = NULL_DESC_BUFFER_SIZE;
+       dump_desc_dbg(ioat, compl_desc);
+
+       /* we leave the channel locked to ensure in order submission */
+       return &desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+              unsigned int src_cnt, size_t len, unsigned long flags)
+{
+       return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
+                   unsigned int src_cnt, size_t len,
+                   enum sum_check_flags *result, unsigned long flags)
+{
+       /* the cleanup routine only sets bits on validate failure, it
+        * does not clear bits on validate success... so clear it here
+        */
+       *result = 0;
+
+       return __ioat3_prep_xor_lock(chan, result, src[0], &src[1],
+                                    src_cnt - 1, len, flags);
+}
+
+static void
+dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext)
+{
+       struct device *dev = to_dev(&ioat->base);
+       struct ioat_pq_descriptor *pq = desc->pq;
+       struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
+       struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
+       int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
+       int i;
+
+       dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
+               " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n",
+               desc_id(desc), (unsigned long long) desc->txd.phys,
+               (unsigned long long) (pq_ex ? pq_ex->next : pq->next),
+               desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en,
+               pq->ctl_f.compl_write,
+               pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
+               pq->ctl_f.src_cnt);
+       for (i = 0; i < src_cnt; i++)
+               dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
+                       (unsigned long long) pq_get_src(descs, i), pq->coef[i]);
+       dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
+       dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
+}
+
+static struct dma_async_tx_descriptor *
+__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
+                    const dma_addr_t *dst, const dma_addr_t *src,
+                    unsigned int src_cnt, const unsigned char *scf,
+                    size_t len, unsigned long flags)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+       struct ioat_chan_common *chan = &ioat->base;
+       struct ioat_ring_ent *compl_desc;
+       struct ioat_ring_ent *desc;
+       struct ioat_ring_ent *ext;
+       size_t total_len = len;
+       struct ioat_pq_descriptor *pq;
+       struct ioat_pq_ext_descriptor *pq_ex = NULL;
+       struct ioat_dma_descriptor *hw;
+       u32 offset = 0;
+       int num_descs;
+       int with_ext;
+       int i, s;
+       u16 idx;
+       u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
+
+       dev_dbg(to_dev(chan), "%s\n", __func__);
+       /* the engine requires at least two sources (we provide
+        * at least 1 implied source in the DMA_PREP_CONTINUE case)
+        */
+       BUG_ON(src_cnt + dmaf_continue(flags) < 2);
+
+       num_descs = ioat2_xferlen_to_descs(ioat, len);
+       /* we need 2x the number of descriptors to cover greater than 3
+        * sources
+        */
+       if (src_cnt > 3 || flags & DMA_PREP_CONTINUE) {
+               with_ext = 1;
+               num_descs *= 2;
+       } else
+               with_ext = 0;
+
+       /* completion writes from the raid engine may pass completion
+        * writes from the legacy engine, so we need one extra null
+        * (legacy) descriptor to ensure all completion writes arrive in
+        * order.
+        */
+       if (likely(num_descs) &&
+           ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0)
+               /* pass */;
+       else
+               return NULL;
+       i = 0;
+       do {
+               struct ioat_raw_descriptor *descs[2];
+               size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
+
+               desc = ioat2_get_ring_ent(ioat, idx + i);
+               pq = desc->pq;
+
+               /* save a branch by unconditionally retrieving the
+                * extended descriptor pq_set_src() knows to not write
+                * to it in the single descriptor case
+                */
+               ext = ioat2_get_ring_ent(ioat, idx + i + with_ext);
+               pq_ex = ext->pq_ex;
+
+               descs[0] = (struct ioat_raw_descriptor *) pq;
+               descs[1] = (struct ioat_raw_descriptor *) pq_ex;
+
+               for (s = 0; s < src_cnt; s++)
+                       pq_set_src(descs, src[s], offset, scf[s], s);
+
+               /* see the comment for dma_maxpq in include/linux/dmaengine.h */
+               if (dmaf_p_disabled_continue(flags))
+                       pq_set_src(descs, dst[1], offset, 1, s++);
+               else if (dmaf_continue(flags)) {
+                       pq_set_src(descs, dst[0], offset, 0, s++);
+                       pq_set_src(descs, dst[1], offset, 1, s++);
+                       pq_set_src(descs, dst[1], offset, 0, s++);
+               }
+               pq->size = xfer_size;
+               pq->p_addr = dst[0] + offset;
+               pq->q_addr = dst[1] + offset;
+               pq->ctl = 0;
+               pq->ctl_f.op = op;
+               pq->ctl_f.src_cnt = src_cnt_to_hw(s);
+               pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
+               pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
+
+               len -= xfer_size;
+               offset += xfer_size;
+       } while ((i += 1 + with_ext) < num_descs);
+
+       /* last pq descriptor carries the unmap parameters and fence bit */
+       desc->txd.flags = flags;
+       desc->len = total_len;
+       if (result)
+               desc->result = result;
+       pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+       dump_pq_desc_dbg(ioat, desc, ext);
+
+       /* completion descriptor carries interrupt bit */
+       compl_desc = ioat2_get_ring_ent(ioat, idx + i);
+       compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+       hw = compl_desc->hw;
+       hw->ctl = 0;
+       hw->ctl_f.null = 1;
+       hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+       hw->ctl_f.compl_write = 1;
+       hw->size = NULL_DESC_BUFFER_SIZE;
+       dump_desc_dbg(ioat, compl_desc);
+
+       /* we leave the channel locked to ensure in order submission */
+       return &desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+             unsigned int src_cnt, const unsigned char *scf, size_t len,
+             unsigned long flags)
+{
+       /* handle the single source multiply case from the raid6
+        * recovery path
+        */
+       if (unlikely((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1)) {
+               dma_addr_t single_source[2];
+               unsigned char single_source_coef[2];
+
+               BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
+               single_source[0] = src[0];
+               single_source[1] = src[0];
+               single_source_coef[0] = scf[0];
+               single_source_coef[1] = 0;
+
+               return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2,
+                                           single_source_coef, len, flags);
+       } else
+               return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf,
+                                           len, flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+                 unsigned int src_cnt, const unsigned char *scf, size_t len,
+                 enum sum_check_flags *pqres, unsigned long flags)
+{
+       /* the cleanup routine only sets bits on validate failure, it
+        * does not clear bits on validate success... so clear it here
+        */
+       *pqres = 0;
+
+       return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
+                                   flags);
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
+                unsigned int src_cnt, size_t len, unsigned long flags)
+{
+       unsigned char scf[src_cnt];
+       dma_addr_t pq[2];
+
+       memset(scf, 0, src_cnt);
+       flags |= DMA_PREP_PQ_DISABLE_Q;
+       pq[0] = dst;
+       pq[1] = ~0;
+
+       return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
+                                   flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
+                    unsigned int src_cnt, size_t len,
+                    enum sum_check_flags *result, unsigned long flags)
+{
+       unsigned char scf[src_cnt];
+       dma_addr_t pq[2];
+
+       /* the cleanup routine only sets bits on validate failure, it
+        * does not clear bits on validate success... so clear it here
+        */
+       *result = 0;
+
+       memset(scf, 0, src_cnt);
+       flags |= DMA_PREP_PQ_DISABLE_Q;
+       pq[0] = src[0];
+       pq[1] = ~0;
+
+       return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf,
+                                   len, flags);
+}
+
+static struct dma_async_tx_descriptor *
+ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
+{
+       struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
+       struct ioat_ring_ent *desc;
+       struct ioat_dma_descriptor *hw;
+       u16 idx;
+
+       if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0)
+               desc = ioat2_get_ring_ent(ioat, idx);
+       else
+               return NULL;
+
+       hw = desc->hw;
+       hw->ctl = 0;
+       hw->ctl_f.null = 1;
+       hw->ctl_f.int_en = 1;
+       hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+       hw->ctl_f.compl_write = 1;
+       hw->size = NULL_DESC_BUFFER_SIZE;
+       hw->src_addr = 0;
+       hw->dst_addr = 0;
+
+       desc->txd.flags = flags;
+       desc->len = 1;
+
+       dump_desc_dbg(ioat, desc);
+
+       /* we leave the channel locked to ensure in order submission */
+       return &desc->txd;
+}
+
+static void __devinit ioat3_dma_test_callback(void *dma_async_param)
+{
+       struct completion *cmp = dma_async_param;
+
+       complete(cmp);
+}
+
+#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
+static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device)
+{
+       int i, src_idx;
+       struct page *dest;
+       struct page *xor_srcs[IOAT_NUM_SRC_TEST];
+       struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
+       dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
+       dma_addr_t dma_addr, dest_dma;
+       struct dma_async_tx_descriptor *tx;
+       struct dma_chan *dma_chan;
+       dma_cookie_t cookie;
+       u8 cmp_byte = 0;
+       u32 cmp_word;
+       u32 xor_val_result;
+       int err = 0;
+       struct completion cmp;
+       unsigned long tmo;
+       struct device *dev = &device->pdev->dev;
+       struct dma_device *dma = &device->common;
+
+       dev_dbg(dev, "%s\n", __func__);
+
+       if (!dma_has_cap(DMA_XOR, dma->cap_mask))
+               return 0;
+
+       for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
+               xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+               if (!xor_srcs[src_idx]) {
+                       while (src_idx--)
+                               __free_page(xor_srcs[src_idx]);
+                       return -ENOMEM;
+               }
+       }
+
+       dest = alloc_page(GFP_KERNEL);
+       if (!dest) {
+               while (src_idx--)
+                       __free_page(xor_srcs[src_idx]);
+               return -ENOMEM;
+       }
+
+       /* Fill in src buffers */
+       for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
+               u8 *ptr = page_address(xor_srcs[src_idx]);
+               for (i = 0; i < PAGE_SIZE; i++)
+                       ptr[i] = (1 << src_idx);
+       }
+
+       for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
+               cmp_byte ^= (u8) (1 << src_idx);
+
+       cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+                       (cmp_byte << 8) | cmp_byte;
+
+       memset(page_address(dest), 0, PAGE_SIZE);
+
+       dma_chan = container_of(dma->channels.next, struct dma_chan,
+                               device_node);
+       if (dma->device_alloc_chan_resources(dma_chan) < 1) {
+               err = -ENODEV;
+               goto out;
+       }
+
+       /* test xor */
+       dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+       for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+               dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
+                                          DMA_TO_DEVICE);
+       tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+                                     IOAT_NUM_SRC_TEST, PAGE_SIZE,
+                                     DMA_PREP_INTERRUPT);
+
+       if (!tx) {
+               dev_err(dev, "Self-test xor prep failed\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       async_tx_ack(tx);
+       init_completion(&cmp);
+       tx->callback = ioat3_dma_test_callback;
+       tx->callback_param = &cmp;
+       cookie = tx->tx_submit(tx);
+       if (cookie < 0) {
+               dev_err(dev, "Self-test xor setup failed\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+       dma->device_issue_pending(dma_chan);
+
+       tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+       if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+               dev_err(dev, "Self-test xor timed out\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+       for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+               u32 *ptr = page_address(dest);
+               if (ptr[i] != cmp_word) {
+                       dev_err(dev, "Self-test xor failed compare\n");
+                       err = -ENODEV;
+                       goto free_resources;
+               }
+       }
+       dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_TO_DEVICE);
+
+       /* skip validate if the capability is not present */
+       if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
+               goto free_resources;
+
+       /* validate the sources with the destintation page */
+       for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+               xor_val_srcs[i] = xor_srcs[i];
+       xor_val_srcs[i] = dest;
+
+       xor_val_result = 1;
+
+       for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+               dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
+                                          DMA_TO_DEVICE);
+       tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
+                                         IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
+                                         &xor_val_result, DMA_PREP_INTERRUPT);
+       if (!tx) {
+               dev_err(dev, "Self-test zero prep failed\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       async_tx_ack(tx);
+       init_completion(&cmp);
+       tx->callback = ioat3_dma_test_callback;
+       tx->callback_param = &cmp;
+       cookie = tx->tx_submit(tx);
+       if (cookie < 0) {
+               dev_err(dev, "Self-test zero setup failed\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+       dma->device_issue_pending(dma_chan);
+
+       tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+       if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+               dev_err(dev, "Self-test validate timed out\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       if (xor_val_result != 0) {
+               dev_err(dev, "Self-test validate failed compare\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       /* skip memset if the capability is not present */
+       if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask))
+               goto free_resources;
+
+       /* test memset */
+       dma_addr = dma_map_page(dev, dest, 0,
+                       PAGE_SIZE, DMA_FROM_DEVICE);
+       tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE,
+                                        DMA_PREP_INTERRUPT);
+       if (!tx) {
+               dev_err(dev, "Self-test memset prep failed\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       async_tx_ack(tx);
+       init_completion(&cmp);
+       tx->callback = ioat3_dma_test_callback;
+       tx->callback_param = &cmp;
+       cookie = tx->tx_submit(tx);
+       if (cookie < 0) {
+               dev_err(dev, "Self-test memset setup failed\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+       dma->device_issue_pending(dma_chan);
+
+       tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+       if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+               dev_err(dev, "Self-test memset timed out\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
+               u32 *ptr = page_address(dest);
+               if (ptr[i]) {
+                       dev_err(dev, "Self-test memset failed compare\n");
+                       err = -ENODEV;
+                       goto free_resources;
+               }
+       }
+
+       /* test for non-zero parity sum */
+       xor_val_result = 0;
+       for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+               dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
+                                          DMA_TO_DEVICE);
+       tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
+                                         IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
+                                         &xor_val_result, DMA_PREP_INTERRUPT);
+       if (!tx) {
+               dev_err(dev, "Self-test 2nd zero prep failed\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       async_tx_ack(tx);
+       init_completion(&cmp);
+       tx->callback = ioat3_dma_test_callback;
+       tx->callback_param = &cmp;
+       cookie = tx->tx_submit(tx);
+       if (cookie < 0) {
+               dev_err(dev, "Self-test  2nd zero setup failed\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+       dma->device_issue_pending(dma_chan);
+
+       tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+       if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+               dev_err(dev, "Self-test 2nd validate timed out\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       if (xor_val_result != SUM_CHECK_P_RESULT) {
+               dev_err(dev, "Self-test validate failed compare\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+free_resources:
+       dma->device_free_chan_resources(dma_chan);
+out:
+       src_idx = IOAT_NUM_SRC_TEST;
+       while (src_idx--)
+               __free_page(xor_srcs[src_idx]);
+       __free_page(dest);
+       return err;
+}
+
+static int __devinit ioat3_dma_self_test(struct ioatdma_device *device)
+{
+       int rc = ioat_dma_self_test(device);
+
+       if (rc)
+               return rc;
+
+       rc = ioat_xor_val_self_test(device);
+       if (rc)
+               return rc;
+
+       return 0;
+}
+
+int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
+{
+       struct pci_dev *pdev = device->pdev;
+       struct dma_device *dma;
+       struct dma_chan *c;
+       struct ioat_chan_common *chan;
+       bool is_raid_device = false;
+       int err;
+       u16 dev_id;
+       u32 cap;
+
+       device->enumerate_channels = ioat2_enumerate_channels;
+       device->self_test = ioat3_dma_self_test;
+       dma = &device->common;
+       dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
+       dma->device_issue_pending = ioat2_issue_pending;
+       dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
+       dma->device_free_chan_resources = ioat2_free_chan_resources;
+
+       dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
+       dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock;
+
+       cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET);
+       if (cap & IOAT_CAP_XOR) {
+               is_raid_device = true;
+               dma->max_xor = 8;
+               dma->xor_align = 2;
+
+               dma_cap_set(DMA_XOR, dma->cap_mask);
+               dma->device_prep_dma_xor = ioat3_prep_xor;
+
+               dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+               dma->device_prep_dma_xor_val = ioat3_prep_xor_val;
+       }
+       if (cap & IOAT_CAP_PQ) {
+               is_raid_device = true;
+               dma_set_maxpq(dma, 8, 0);
+               dma->pq_align = 2;
+
+               dma_cap_set(DMA_PQ, dma->cap_mask);
+               dma->device_prep_dma_pq = ioat3_prep_pq;
+
+               dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
+               dma->device_prep_dma_pq_val = ioat3_prep_pq_val;
+
+               if (!(cap & IOAT_CAP_XOR)) {
+                       dma->max_xor = 8;
+                       dma->xor_align = 2;
+
+                       dma_cap_set(DMA_XOR, dma->cap_mask);
+                       dma->device_prep_dma_xor = ioat3_prep_pqxor;
+
+                       dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+                       dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val;
+               }
+       }
+       if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) {
+               dma_cap_set(DMA_MEMSET, dma->cap_mask);
+               dma->device_prep_dma_memset = ioat3_prep_memset_lock;
+       }
+
+
+       if (is_raid_device) {
+               dma->device_is_tx_complete = ioat3_is_complete;
+               device->cleanup_tasklet = ioat3_cleanup_tasklet;
+               device->timer_fn = ioat3_timer_event;
+       } else {
+               dma->device_is_tx_complete = ioat2_is_complete;
+               device->cleanup_tasklet = ioat2_cleanup_tasklet;
+               device->timer_fn = ioat2_timer_event;
+       }
+
+       /* -= IOAT ver.3 workarounds =- */
+       /* Write CHANERRMSK_INT with 3E07h to mask out the errors
+        * that can cause stability issues for IOAT ver.3
+        */
+       pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07);
+
+       /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
+        * (workaround for spurious config parity error after restart)
+        */
+       pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
+       if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0)
+               pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10);
+
+       err = ioat_probe(device);
+       if (err)
+               return err;
+       ioat_set_tcp_copy_break(262144);
+
+       list_for_each_entry(c, &dma->channels, device_node) {
+               chan = to_chan_common(c);
+               writel(IOAT_DMA_DCA_ANY_CPU,
+                      chan->reg_base + IOAT_DCACTRL_OFFSET);
+       }
+
+       err = ioat_register(device);
+       if (err)
+               return err;
+
+       ioat_kobject_add(device, &ioat2_ktype);
+
+       if (dca)
+               device->dca = ioat3_dca_init(pdev, device->reg_base);
+
+       return 0;
+}
diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h
new file mode 100644 (file)
index 0000000..99afb12
--- /dev/null
@@ -0,0 +1,215 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_HW_H_
+#define _IOAT_HW_H_
+
+/* PCI Configuration Space Values */
+#define IOAT_PCI_VID            0x8086
+#define IOAT_MMIO_BAR          0
+
+/* CB device ID's */
+#define IOAT_PCI_DID_5000       0x1A38
+#define IOAT_PCI_DID_CNB        0x360B
+#define IOAT_PCI_DID_SCNB       0x65FF
+#define IOAT_PCI_DID_SNB        0x402F
+
+#define IOAT_PCI_RID            0x00
+#define IOAT_PCI_SVID           0x8086
+#define IOAT_PCI_SID            0x8086
+#define IOAT_VER_1_2            0x12    /* Version 1.2 */
+#define IOAT_VER_2_0            0x20    /* Version 2.0 */
+#define IOAT_VER_3_0            0x30    /* Version 3.0 */
+#define IOAT_VER_3_2            0x32    /* Version 3.2 */
+
+struct ioat_dma_descriptor {
+       uint32_t        size;
+       union {
+               uint32_t ctl;
+               struct {
+                       unsigned int int_en:1;
+                       unsigned int src_snoop_dis:1;
+                       unsigned int dest_snoop_dis:1;
+                       unsigned int compl_write:1;
+                       unsigned int fence:1;
+                       unsigned int null:1;
+                       unsigned int src_brk:1;
+                       unsigned int dest_brk:1;
+                       unsigned int bundle:1;
+                       unsigned int dest_dca:1;
+                       unsigned int hint:1;
+                       unsigned int rsvd2:13;
+                       #define IOAT_OP_COPY 0x00
+                       unsigned int op:8;
+               } ctl_f;
+       };
+       uint64_t        src_addr;
+       uint64_t        dst_addr;
+       uint64_t        next;
+       uint64_t        rsv1;
+       uint64_t        rsv2;
+       /* store some driver data in an unused portion of the descriptor */
+       union {
+               uint64_t        user1;
+               uint64_t        tx_cnt;
+       };
+       uint64_t        user2;
+};
+
+struct ioat_fill_descriptor {
+       uint32_t        size;
+       union {
+               uint32_t ctl;
+               struct {
+                       unsigned int int_en:1;
+                       unsigned int rsvd:1;
+                       unsigned int dest_snoop_dis:1;
+                       unsigned int compl_write:1;
+                       unsigned int fence:1;
+                       unsigned int rsvd2:2;
+                       unsigned int dest_brk:1;
+                       unsigned int bundle:1;
+                       unsigned int rsvd4:15;
+                       #define IOAT_OP_FILL 0x01
+                       unsigned int op:8;
+               } ctl_f;
+       };
+       uint64_t        src_data;
+       uint64_t        dst_addr;
+       uint64_t        next;
+       uint64_t        rsv1;
+       uint64_t        next_dst_addr;
+       uint64_t        user1;
+       uint64_t        user2;
+};
+
+struct ioat_xor_descriptor {
+       uint32_t        size;
+       union {
+               uint32_t ctl;
+               struct {
+                       unsigned int int_en:1;
+                       unsigned int src_snoop_dis:1;
+                       unsigned int dest_snoop_dis:1;
+                       unsigned int compl_write:1;
+                       unsigned int fence:1;
+                       unsigned int src_cnt:3;
+                       unsigned int bundle:1;
+                       unsigned int dest_dca:1;
+                       unsigned int hint:1;
+                       unsigned int rsvd:13;
+                       #define IOAT_OP_XOR 0x87
+                       #define IOAT_OP_XOR_VAL 0x88
+                       unsigned int op:8;
+               } ctl_f;
+       };
+       uint64_t        src_addr;
+       uint64_t        dst_addr;
+       uint64_t        next;
+       uint64_t        src_addr2;
+       uint64_t        src_addr3;
+       uint64_t        src_addr4;
+       uint64_t        src_addr5;
+};
+
+struct ioat_xor_ext_descriptor {
+       uint64_t        src_addr6;
+       uint64_t        src_addr7;
+       uint64_t        src_addr8;
+       uint64_t        next;
+       uint64_t        rsvd[4];
+};
+
+struct ioat_pq_descriptor {
+       uint32_t        size;
+       union {
+               uint32_t ctl;
+               struct {
+                       unsigned int int_en:1;
+                       unsigned int src_snoop_dis:1;
+                       unsigned int dest_snoop_dis:1;
+                       unsigned int compl_write:1;
+                       unsigned int fence:1;
+                       unsigned int src_cnt:3;
+                       unsigned int bundle:1;
+                       unsigned int dest_dca:1;
+                       unsigned int hint:1;
+                       unsigned int p_disable:1;
+                       unsigned int q_disable:1;
+                       unsigned int rsvd:11;
+                       #define IOAT_OP_PQ 0x89
+                       #define IOAT_OP_PQ_VAL 0x8a
+                       unsigned int op:8;
+               } ctl_f;
+       };
+       uint64_t        src_addr;
+       uint64_t        p_addr;
+       uint64_t        next;
+       uint64_t        src_addr2;
+       uint64_t        src_addr3;
+       uint8_t         coef[8];
+       uint64_t        q_addr;
+};
+
+struct ioat_pq_ext_descriptor {
+       uint64_t        src_addr4;
+       uint64_t        src_addr5;
+       uint64_t        src_addr6;
+       uint64_t        next;
+       uint64_t        src_addr7;
+       uint64_t        src_addr8;
+       uint64_t        rsvd[2];
+};
+
+struct ioat_pq_update_descriptor {
+       uint32_t        size;
+       union {
+               uint32_t ctl;
+               struct {
+                       unsigned int int_en:1;
+                       unsigned int src_snoop_dis:1;
+                       unsigned int dest_snoop_dis:1;
+                       unsigned int compl_write:1;
+                       unsigned int fence:1;
+                       unsigned int src_cnt:3;
+                       unsigned int bundle:1;
+                       unsigned int dest_dca:1;
+                       unsigned int hint:1;
+                       unsigned int p_disable:1;
+                       unsigned int q_disable:1;
+                       unsigned int rsvd:3;
+                       unsigned int coef:8;
+                       #define IOAT_OP_PQ_UP 0x8b
+                       unsigned int op:8;
+               } ctl_f;
+       };
+       uint64_t        src_addr;
+       uint64_t        p_addr;
+       uint64_t        next;
+       uint64_t        src_addr2;
+       uint64_t        p_src;
+       uint64_t        q_src;
+       uint64_t        q_addr;
+};
+
+struct ioat_raw_descriptor {
+       uint64_t        field[8];
+};
+#endif
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
new file mode 100644 (file)
index 0000000..d545fae
--- /dev/null
@@ -0,0 +1,210 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2007 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dca.h>
+#include "dma.h"
+#include "dma_v2.h"
+#include "registers.h"
+#include "hw.h"
+
+MODULE_VERSION(IOAT_DMA_VERSION);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+
+static struct pci_device_id ioat_pci_tbl[] = {
+       /* I/OAT v1 platforms */
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB)  },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
+       { PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
+
+       /* I/OAT v2 platforms */
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
+
+       /* I/OAT v3 platforms */
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
+
+       /* I/OAT v3.2 platforms */
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
+
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
+
+static int __devinit ioat_pci_probe(struct pci_dev *pdev,
+                                   const struct pci_device_id *id);
+static void __devexit ioat_remove(struct pci_dev *pdev);
+
+static int ioat_dca_enabled = 1;
+module_param(ioat_dca_enabled, int, 0644);
+MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
+
+struct kmem_cache *ioat2_cache;
+
+#define DRV_NAME "ioatdma"
+
+static struct pci_driver ioat_pci_driver = {
+       .name           = DRV_NAME,
+       .id_table       = ioat_pci_tbl,
+       .probe          = ioat_pci_probe,
+       .remove         = __devexit_p(ioat_remove),
+};
+
+static struct ioatdma_device *
+alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase)
+{
+       struct device *dev = &pdev->dev;
+       struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
+
+       if (!d)
+               return NULL;
+       d->pdev = pdev;
+       d->reg_base = iobase;
+       return d;
+}
+
+static int __devinit ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+       void __iomem * const *iomap;
+       struct device *dev = &pdev->dev;
+       struct ioatdma_device *device;
+       int err;
+
+       err = pcim_enable_device(pdev);
+       if (err)
+               return err;
+
+       err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME);
+       if (err)
+               return err;
+       iomap = pcim_iomap_table(pdev);
+       if (!iomap)
+               return -ENOMEM;
+
+       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+       if (err)
+               err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+       if (err)
+               return err;
+
+       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+       if (err)
+               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+       if (err)
+               return err;
+
+       device = devm_kzalloc(dev, sizeof(*device), GFP_KERNEL);
+       if (!device)
+               return -ENOMEM;
+
+       pci_set_master(pdev);
+
+       device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]);
+       if (!device)
+               return -ENOMEM;
+       pci_set_drvdata(pdev, device);
+
+       device->version = readb(device->reg_base + IOAT_VER_OFFSET);
+       if (device->version == IOAT_VER_1_2)
+               err = ioat1_dma_probe(device, ioat_dca_enabled);
+       else if (device->version == IOAT_VER_2_0)
+               err = ioat2_dma_probe(device, ioat_dca_enabled);
+       else if (device->version >= IOAT_VER_3_0)
+               err = ioat3_dma_probe(device, ioat_dca_enabled);
+       else
+               return -ENODEV;
+
+       if (err) {
+               dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n");
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
+static void __devexit ioat_remove(struct pci_dev *pdev)
+{
+       struct ioatdma_device *device = pci_get_drvdata(pdev);
+
+       if (!device)
+               return;
+
+       dev_err(&pdev->dev, "Removing dma and dca services\n");
+       if (device->dca) {
+               unregister_dca_provider(device->dca, &pdev->dev);
+               free_dca_provider(device->dca);
+               device->dca = NULL;
+       }
+       ioat_dma_remove(device);
+}
+
+static int __init ioat_init_module(void)
+{
+       int err;
+
+       pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
+               DRV_NAME, IOAT_DMA_VERSION);
+
+       ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent),
+                                       0, SLAB_HWCACHE_ALIGN, NULL);
+       if (!ioat2_cache)
+               return -ENOMEM;
+
+       err = pci_register_driver(&ioat_pci_driver);
+       if (err)
+               kmem_cache_destroy(ioat2_cache);
+
+       return err;
+}
+module_init(ioat_init_module);
+
+static void __exit ioat_exit_module(void)
+{
+       pci_unregister_driver(&ioat_pci_driver);
+       kmem_cache_destroy(ioat2_cache);
+}
+module_exit(ioat_exit_module);
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h
new file mode 100644 (file)
index 0000000..63038e1
--- /dev/null
@@ -0,0 +1,250 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_REGISTERS_H_
+#define _IOAT_REGISTERS_H_
+
+#define IOAT_PCI_DMACTRL_OFFSET                        0x48
+#define IOAT_PCI_DMACTRL_DMA_EN                        0x00000001
+#define IOAT_PCI_DMACTRL_MSI_EN                        0x00000002
+
+#define IOAT_PCI_DEVICE_ID_OFFSET              0x02
+#define IOAT_PCI_DMAUNCERRSTS_OFFSET           0x148
+#define IOAT_PCI_CHANERRMASK_INT_OFFSET                0x184
+
+/* MMIO Device Registers */
+#define IOAT_CHANCNT_OFFSET                    0x00    /*  8-bit */
+
+#define IOAT_XFERCAP_OFFSET                    0x01    /*  8-bit */
+#define IOAT_XFERCAP_4KB                       12
+#define IOAT_XFERCAP_8KB                       13
+#define IOAT_XFERCAP_16KB                      14
+#define IOAT_XFERCAP_32KB                      15
+#define IOAT_XFERCAP_32GB                      0
+
+#define IOAT_GENCTRL_OFFSET                    0x02    /*  8-bit */
+#define IOAT_GENCTRL_DEBUG_EN                  0x01
+
+#define IOAT_INTRCTRL_OFFSET                   0x03    /*  8-bit */
+#define IOAT_INTRCTRL_MASTER_INT_EN            0x01    /* Master Interrupt Enable */
+#define IOAT_INTRCTRL_INT_STATUS               0x02    /* ATTNSTATUS -or- Channel Int */
+#define IOAT_INTRCTRL_INT                      0x04    /* INT_STATUS -and- MASTER_INT_EN */
+#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL      0x08    /* Enable all MSI-X vectors */
+
+#define IOAT_ATTNSTATUS_OFFSET                 0x04    /* Each bit is a channel */
+
+#define IOAT_VER_OFFSET                                0x08    /*  8-bit */
+#define IOAT_VER_MAJOR_MASK                    0xF0
+#define IOAT_VER_MINOR_MASK                    0x0F
+#define GET_IOAT_VER_MAJOR(x)                  (((x) & IOAT_VER_MAJOR_MASK) >> 4)
+#define GET_IOAT_VER_MINOR(x)                  ((x) & IOAT_VER_MINOR_MASK)
+
+#define IOAT_PERPORTOFFSET_OFFSET              0x0A    /* 16-bit */
+
+#define IOAT_INTRDELAY_OFFSET                  0x0C    /* 16-bit */
+#define IOAT_INTRDELAY_INT_DELAY_MASK          0x3FFF  /* Interrupt Delay Time */
+#define IOAT_INTRDELAY_COALESE_SUPPORT         0x8000  /* Interrupt Coalescing Supported */
+
+#define IOAT_DEVICE_STATUS_OFFSET              0x0E    /* 16-bit */
+#define IOAT_DEVICE_STATUS_DEGRADED_MODE       0x0001
+#define IOAT_DEVICE_MMIO_RESTRICTED            0x0002
+#define IOAT_DEVICE_MEMORY_BYPASS              0x0004
+#define IOAT_DEVICE_ADDRESS_REMAPPING          0x0008
+
+#define IOAT_DMA_CAP_OFFSET                    0x10    /* 32-bit */
+#define IOAT_CAP_PAGE_BREAK                    0x00000001
+#define IOAT_CAP_CRC                           0x00000002
+#define IOAT_CAP_SKIP_MARKER                   0x00000004
+#define IOAT_CAP_DCA                           0x00000010
+#define IOAT_CAP_CRC_MOVE                      0x00000020
+#define IOAT_CAP_FILL_BLOCK                    0x00000040
+#define IOAT_CAP_APIC                          0x00000080
+#define IOAT_CAP_XOR                           0x00000100
+#define IOAT_CAP_PQ                            0x00000200
+
+#define IOAT_CHANNEL_MMIO_SIZE                 0x80    /* Each Channel MMIO space is this size */
+
+/* DMA Channel Registers */
+#define IOAT_CHANCTRL_OFFSET                   0x00    /* 16-bit Channel Control Register */
+#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK    0xF000
+#define IOAT3_CHANCTRL_COMPL_DCA_EN            0x0200
+#define IOAT_CHANCTRL_CHANNEL_IN_USE           0x0100
+#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL    0x0020
+#define IOAT_CHANCTRL_ERR_INT_EN               0x0010
+#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN         0x0008
+#define IOAT_CHANCTRL_ERR_COMPLETION_EN                0x0004
+#define IOAT_CHANCTRL_INT_REARM                        0x0001
+#define IOAT_CHANCTRL_RUN                      (IOAT_CHANCTRL_INT_REARM |\
+                                                IOAT_CHANCTRL_ERR_COMPLETION_EN |\
+                                                IOAT_CHANCTRL_ANY_ERR_ABORT_EN |\
+                                                IOAT_CHANCTRL_ERR_INT_EN)
+
+#define IOAT_DMA_COMP_OFFSET                   0x02    /* 16-bit DMA channel compatibility */
+#define IOAT_DMA_COMP_V1                       0x0001  /* Compatibility with DMA version 1 */
+#define IOAT_DMA_COMP_V2                       0x0002  /* Compatibility with DMA version 2 */
+
+
+#define IOAT1_CHANSTS_OFFSET           0x04    /* 64-bit Channel Status Register */
+#define IOAT2_CHANSTS_OFFSET           0x08    /* 64-bit Channel Status Register */
+#define IOAT_CHANSTS_OFFSET(ver)               ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET)
+#define IOAT1_CHANSTS_OFFSET_LOW       0x04
+#define IOAT2_CHANSTS_OFFSET_LOW       0x08
+#define IOAT_CHANSTS_OFFSET_LOW(ver)           ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW)
+#define IOAT1_CHANSTS_OFFSET_HIGH      0x08
+#define IOAT2_CHANSTS_OFFSET_HIGH      0x0C
+#define IOAT_CHANSTS_OFFSET_HIGH(ver)          ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
+#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR (~0x3fULL)
+#define IOAT_CHANSTS_SOFT_ERR                  0x10ULL
+#define IOAT_CHANSTS_UNAFFILIATED_ERR          0x8ULL
+#define IOAT_CHANSTS_STATUS    0x7ULL
+#define IOAT_CHANSTS_ACTIVE    0x0
+#define IOAT_CHANSTS_DONE      0x1
+#define IOAT_CHANSTS_SUSPENDED 0x2
+#define IOAT_CHANSTS_HALTED    0x3
+
+
+
+#define IOAT_CHAN_DMACOUNT_OFFSET      0x06    /* 16-bit DMA Count register */
+
+#define IOAT_DCACTRL_OFFSET         0x30   /* 32 bit Direct Cache Access Control Register */
+#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000
+#define IOAT_DCACTRL_TARGET_CPU_MASK   0xFFFF /* APIC ID */
+
+/* CB DCA Memory Space Registers */
+#define IOAT_DCAOFFSET_OFFSET       0x14
+/* CB_BAR + IOAT_DCAOFFSET value */
+#define IOAT_DCA_VER_OFFSET         0x00
+#define IOAT_DCA_VER_MAJOR_MASK     0xF0
+#define IOAT_DCA_VER_MINOR_MASK     0x0F
+
+#define IOAT_DCA_COMP_OFFSET        0x02
+#define IOAT_DCA_COMP_V1            0x1
+
+#define IOAT_FSB_CAPABILITY_OFFSET  0x04
+#define IOAT_FSB_CAPABILITY_PREFETCH    0x1
+
+#define IOAT_PCI_CAPABILITY_OFFSET  0x06
+#define IOAT_PCI_CAPABILITY_MEMWR   0x1
+
+#define IOAT_FSB_CAP_ENABLE_OFFSET  0x08
+#define IOAT_FSB_CAP_ENABLE_PREFETCH    0x1
+
+#define IOAT_PCI_CAP_ENABLE_OFFSET  0x0A
+#define IOAT_PCI_CAP_ENABLE_MEMWR   0x1
+
+#define IOAT_APICID_TAG_MAP_OFFSET  0x0C
+#define IOAT_APICID_TAG_MAP_TAG0    0x0000000F
+#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0
+#define IOAT_APICID_TAG_MAP_TAG1    0x000000F0
+#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4
+#define IOAT_APICID_TAG_MAP_TAG2    0x00000F00
+#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8
+#define IOAT_APICID_TAG_MAP_TAG3    0x0000F000
+#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12
+#define IOAT_APICID_TAG_MAP_TAG4    0x000F0000
+#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16
+#define IOAT_APICID_TAG_CB2_VALID   0x8080808080
+
+#define IOAT_DCA_GREQID_OFFSET      0x10
+#define IOAT_DCA_GREQID_SIZE        0x04
+#define IOAT_DCA_GREQID_MASK        0xFFFF
+#define IOAT_DCA_GREQID_IGNOREFUN   0x10000000
+#define IOAT_DCA_GREQID_VALID       0x20000000
+#define IOAT_DCA_GREQID_LASTID      0x80000000
+
+#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
+#define IOAT3_CSI_CAPABILITY_PREFETCH    0x1
+
+#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
+#define IOAT3_PCI_CAPABILITY_MEMWR  0x1
+
+#define IOAT3_CSI_CONTROL_OFFSET    0x0C
+#define IOAT3_CSI_CONTROL_PREFETCH  0x1
+
+#define IOAT3_PCI_CONTROL_OFFSET    0x0E
+#define IOAT3_PCI_CONTROL_MEMWR     0x1
+
+#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_LOW  0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
+
+#define IOAT3_DCA_GREQID_OFFSET     0x02
+
+#define IOAT1_CHAINADDR_OFFSET         0x0C    /* 64-bit Descriptor Chain Address Register */
+#define IOAT2_CHAINADDR_OFFSET         0x10    /* 64-bit Descriptor Chain Address Register */
+#define IOAT_CHAINADDR_OFFSET(ver)             ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET)
+#define IOAT1_CHAINADDR_OFFSET_LOW     0x0C
+#define IOAT2_CHAINADDR_OFFSET_LOW     0x10
+#define IOAT_CHAINADDR_OFFSET_LOW(ver)         ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW)
+#define IOAT1_CHAINADDR_OFFSET_HIGH    0x10
+#define IOAT2_CHAINADDR_OFFSET_HIGH    0x14
+#define IOAT_CHAINADDR_OFFSET_HIGH(ver)                ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH)
+
+#define IOAT1_CHANCMD_OFFSET           0x14    /*  8-bit DMA Channel Command Register */
+#define IOAT2_CHANCMD_OFFSET           0x04    /*  8-bit DMA Channel Command Register */
+#define IOAT_CHANCMD_OFFSET(ver)               ((ver) < IOAT_VER_2_0 \
+                                               ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET)
+#define IOAT_CHANCMD_RESET                     0x20
+#define IOAT_CHANCMD_RESUME                    0x10
+#define IOAT_CHANCMD_ABORT                     0x08
+#define IOAT_CHANCMD_SUSPEND                   0x04
+#define IOAT_CHANCMD_APPEND                    0x02
+#define IOAT_CHANCMD_START                     0x01
+
+#define IOAT_CHANCMP_OFFSET                    0x18    /* 64-bit Channel Completion Address Register */
+#define IOAT_CHANCMP_OFFSET_LOW                        0x18
+#define IOAT_CHANCMP_OFFSET_HIGH               0x1C
+
+#define IOAT_CDAR_OFFSET                       0x20    /* 64-bit Current Descriptor Address Register */
+#define IOAT_CDAR_OFFSET_LOW                   0x20
+#define IOAT_CDAR_OFFSET_HIGH                  0x24
+
+#define IOAT_CHANERR_OFFSET                    0x28    /* 32-bit Channel Error Register */
+#define IOAT_CHANERR_SRC_ADDR_ERR      0x0001
+#define IOAT_CHANERR_DEST_ADDR_ERR     0x0002
+#define IOAT_CHANERR_NEXT_ADDR_ERR     0x0004
+#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR       0x0008
+#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR      0x0010
+#define IOAT_CHANERR_CHANCMD_ERR               0x0020
+#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR  0x0040
+#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR      0x0080
+#define IOAT_CHANERR_READ_DATA_ERR             0x0100
+#define IOAT_CHANERR_WRITE_DATA_ERR            0x0200
+#define IOAT_CHANERR_CONTROL_ERR       0x0400
+#define IOAT_CHANERR_LENGTH_ERR        0x0800
+#define IOAT_CHANERR_COMPLETION_ADDR_ERR       0x1000
+#define IOAT_CHANERR_INT_CONFIGURATION_ERR     0x2000
+#define IOAT_CHANERR_SOFT_ERR                  0x4000
+#define IOAT_CHANERR_UNAFFILIATED_ERR          0x8000
+#define IOAT_CHANERR_XOR_P_OR_CRC_ERR          0x10000
+#define IOAT_CHANERR_XOR_Q_ERR                 0x20000
+#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR      0x40000
+
+#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
+
+#define IOAT_CHANERR_MASK_OFFSET               0x2C    /* 32-bit Channel Error Register */
+
+#endif /* _IOAT_REGISTERS_H_ */
diff --git a/drivers/dma/ioat_dca.c b/drivers/dma/ioat_dca.c
deleted file mode 100644 (file)
index c012a1e..0000000
+++ /dev/null
@@ -1,681 +0,0 @@
-/*
- * Intel I/OAT DMA Linux driver
- * Copyright(c) 2007 - 2009 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/smp.h>
-#include <linux/interrupt.h>
-#include <linux/dca.h>
-
-/* either a kernel change is needed, or we need something like this in kernel */
-#ifndef CONFIG_SMP
-#include <asm/smp.h>
-#undef cpu_physical_id
-#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24)
-#endif
-
-#include "ioatdma.h"
-#include "ioatdma_registers.h"
-
-/*
- * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
- * contain the bit number of the APIC ID to map into the DCA tag.  If the valid
- * bit is not set, then the value must be 0 or 1 and defines the bit in the tag.
- */
-#define DCA_TAG_MAP_VALID 0x80
-
-#define DCA3_TAG_MAP_BIT_TO_INV 0x80
-#define DCA3_TAG_MAP_BIT_TO_SEL 0x40
-#define DCA3_TAG_MAP_LITERAL_VAL 0x1
-
-#define DCA_TAG_MAP_MASK 0xDF
-
-/* expected tag map bytes for I/OAT ver.2 */
-#define DCA2_TAG_MAP_BYTE0 0x80
-#define DCA2_TAG_MAP_BYTE1 0x0
-#define DCA2_TAG_MAP_BYTE2 0x81
-#define DCA2_TAG_MAP_BYTE3 0x82
-#define DCA2_TAG_MAP_BYTE4 0x82
-
-/* verify if tag map matches expected values */
-static inline int dca2_tag_map_valid(u8 *tag_map)
-{
-       return ((tag_map[0] == DCA2_TAG_MAP_BYTE0) &&
-               (tag_map[1] == DCA2_TAG_MAP_BYTE1) &&
-               (tag_map[2] == DCA2_TAG_MAP_BYTE2) &&
-               (tag_map[3] == DCA2_TAG_MAP_BYTE3) &&
-               (tag_map[4] == DCA2_TAG_MAP_BYTE4));
-}
-
-/*
- * "Legacy" DCA systems do not implement the DCA register set in the
- * I/OAT device.  Software needs direct support for their tag mappings.
- */
-
-#define APICID_BIT(x)          (DCA_TAG_MAP_VALID | (x))
-#define IOAT_TAG_MAP_LEN       8
-
-static u8 ioat_tag_map_BNB[IOAT_TAG_MAP_LEN] = {
-       1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
-static u8 ioat_tag_map_SCNB[IOAT_TAG_MAP_LEN] = {
-       1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
-static u8 ioat_tag_map_CNB[IOAT_TAG_MAP_LEN] = {
-       1, APICID_BIT(1), APICID_BIT(3), APICID_BIT(4), APICID_BIT(2), };
-static u8 ioat_tag_map_UNISYS[IOAT_TAG_MAP_LEN] = { 0 };
-
-/* pack PCI B/D/F into a u16 */
-static inline u16 dcaid_from_pcidev(struct pci_dev *pci)
-{
-       return (pci->bus->number << 8) | pci->devfn;
-}
-
-static int dca_enabled_in_bios(struct pci_dev *pdev)
-{
-       /* CPUID level 9 returns DCA configuration */
-       /* Bit 0 indicates DCA enabled by the BIOS */
-       unsigned long cpuid_level_9;
-       int res;
-
-       cpuid_level_9 = cpuid_eax(9);
-       res = test_bit(0, &cpuid_level_9);
-       if (!res)
-               dev_err(&pdev->dev, "DCA is disabled in BIOS\n");
-
-       return res;
-}
-
-static int system_has_dca_enabled(struct pci_dev *pdev)
-{
-       if (boot_cpu_has(X86_FEATURE_DCA))
-               return dca_enabled_in_bios(pdev);
-
-       dev_err(&pdev->dev, "boot cpu doesn't have X86_FEATURE_DCA\n");
-       return 0;
-}
-
-struct ioat_dca_slot {
-       struct pci_dev *pdev;   /* requester device */
-       u16 rid;                /* requester id, as used by IOAT */
-};
-
-#define IOAT_DCA_MAX_REQ 6
-#define IOAT3_DCA_MAX_REQ 2
-
-struct ioat_dca_priv {
-       void __iomem            *iobase;
-       void __iomem            *dca_base;
-       int                      max_requesters;
-       int                      requester_count;
-       u8                       tag_map[IOAT_TAG_MAP_LEN];
-       struct ioat_dca_slot     req_slots[0];
-};
-
-/* 5000 series chipset DCA Port Requester ID Table Entry Format
- * [15:8]      PCI-Express Bus Number
- * [7:3]       PCI-Express Device Number
- * [2:0]       PCI-Express Function Number
- *
- * 5000 series chipset DCA control register format
- * [7:1]       Reserved (0)
- * [0]         Ignore Function Number
- */
-
-static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev)
-{
-       struct ioat_dca_priv *ioatdca = dca_priv(dca);
-       struct pci_dev *pdev;
-       int i;
-       u16 id;
-
-       /* This implementation only supports PCI-Express */
-       if (dev->bus != &pci_bus_type)
-               return -ENODEV;
-       pdev = to_pci_dev(dev);
-       id = dcaid_from_pcidev(pdev);
-
-       if (ioatdca->requester_count == ioatdca->max_requesters)
-               return -ENODEV;
-
-       for (i = 0; i < ioatdca->max_requesters; i++) {
-               if (ioatdca->req_slots[i].pdev == NULL) {
-                       /* found an empty slot */
-                       ioatdca->requester_count++;
-                       ioatdca->req_slots[i].pdev = pdev;
-                       ioatdca->req_slots[i].rid = id;
-                       writew(id, ioatdca->dca_base + (i * 4));
-                       /* make sure the ignore function bit is off */
-                       writeb(0, ioatdca->dca_base + (i * 4) + 2);
-                       return i;
-               }
-       }
-       /* Error, ioatdma->requester_count is out of whack */
-       return -EFAULT;
-}
-
-static int ioat_dca_remove_requester(struct dca_provider *dca,
-                                    struct device *dev)
-{
-       struct ioat_dca_priv *ioatdca = dca_priv(dca);
-       struct pci_dev *pdev;
-       int i;
-
-       /* This implementation only supports PCI-Express */
-       if (dev->bus != &pci_bus_type)
-               return -ENODEV;
-       pdev = to_pci_dev(dev);
-
-       for (i = 0; i < ioatdca->max_requesters; i++) {
-               if (ioatdca->req_slots[i].pdev == pdev) {
-                       writew(0, ioatdca->dca_base + (i * 4));
-                       ioatdca->req_slots[i].pdev = NULL;
-                       ioatdca->req_slots[i].rid = 0;
-                       ioatdca->requester_count--;
-                       return i;
-               }
-       }
-       return -ENODEV;
-}
-
-static u8 ioat_dca_get_tag(struct dca_provider *dca,
-                          struct device *dev,
-                          int cpu)
-{
-       struct ioat_dca_priv *ioatdca = dca_priv(dca);
-       int i, apic_id, bit, value;
-       u8 entry, tag;
-
-       tag = 0;
-       apic_id = cpu_physical_id(cpu);
-
-       for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
-               entry = ioatdca->tag_map[i];
-               if (entry & DCA_TAG_MAP_VALID) {
-                       bit = entry & ~DCA_TAG_MAP_VALID;
-                       value = (apic_id & (1 << bit)) ? 1 : 0;
-               } else {
-                       value = entry ? 1 : 0;
-               }
-               tag |= (value << i);
-       }
-       return tag;
-}
-
-static int ioat_dca_dev_managed(struct dca_provider *dca,
-                               struct device *dev)
-{
-       struct ioat_dca_priv *ioatdca = dca_priv(dca);
-       struct pci_dev *pdev;
-       int i;
-
-       pdev = to_pci_dev(dev);
-       for (i = 0; i < ioatdca->max_requesters; i++) {
-               if (ioatdca->req_slots[i].pdev == pdev)
-                       return 1;
-       }
-       return 0;
-}
-
-static struct dca_ops ioat_dca_ops = {
-       .add_requester          = ioat_dca_add_requester,
-       .remove_requester       = ioat_dca_remove_requester,
-       .get_tag                = ioat_dca_get_tag,
-       .dev_managed            = ioat_dca_dev_managed,
-};
-
-
-struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
-{
-       struct dca_provider *dca;
-       struct ioat_dca_priv *ioatdca;
-       u8 *tag_map = NULL;
-       int i;
-       int err;
-       u8 version;
-       u8 max_requesters;
-
-       if (!system_has_dca_enabled(pdev))
-               return NULL;
-
-       /* I/OAT v1 systems must have a known tag_map to support DCA */
-       switch (pdev->vendor) {
-       case PCI_VENDOR_ID_INTEL:
-               switch (pdev->device) {
-               case PCI_DEVICE_ID_INTEL_IOAT:
-                       tag_map = ioat_tag_map_BNB;
-                       break;
-               case PCI_DEVICE_ID_INTEL_IOAT_CNB:
-                       tag_map = ioat_tag_map_CNB;
-                       break;
-               case PCI_DEVICE_ID_INTEL_IOAT_SCNB:
-                       tag_map = ioat_tag_map_SCNB;
-                       break;
-               }
-               break;
-       case PCI_VENDOR_ID_UNISYS:
-               switch (pdev->device) {
-               case PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR:
-                       tag_map = ioat_tag_map_UNISYS;
-                       break;
-               }
-               break;
-       }
-       if (tag_map == NULL)
-               return NULL;
-
-       version = readb(iobase + IOAT_VER_OFFSET);
-       if (version == IOAT_VER_3_0)
-               max_requesters = IOAT3_DCA_MAX_REQ;
-       else
-               max_requesters = IOAT_DCA_MAX_REQ;
-
-       dca = alloc_dca_provider(&ioat_dca_ops,
-                       sizeof(*ioatdca) +
-                       (sizeof(struct ioat_dca_slot) * max_requesters));
-       if (!dca)
-               return NULL;
-
-       ioatdca = dca_priv(dca);
-       ioatdca->max_requesters = max_requesters;
-       ioatdca->dca_base = iobase + 0x54;
-
-       /* copy over the APIC ID to DCA tag mapping */
-       for (i = 0; i < IOAT_TAG_MAP_LEN; i++)
-               ioatdca->tag_map[i] = tag_map[i];
-
-       err = register_dca_provider(dca, &pdev->dev);
-       if (err) {
-               free_dca_provider(dca);
-               return NULL;
-       }
-
-       return dca;
-}
-
-
-static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev)
-{
-       struct ioat_dca_priv *ioatdca = dca_priv(dca);
-       struct pci_dev *pdev;
-       int i;
-       u16 id;
-       u16 global_req_table;
-
-       /* This implementation only supports PCI-Express */
-       if (dev->bus != &pci_bus_type)
-               return -ENODEV;
-       pdev = to_pci_dev(dev);
-       id = dcaid_from_pcidev(pdev);
-
-       if (ioatdca->requester_count == ioatdca->max_requesters)
-               return -ENODEV;
-
-       for (i = 0; i < ioatdca->max_requesters; i++) {
-               if (ioatdca->req_slots[i].pdev == NULL) {
-                       /* found an empty slot */
-                       ioatdca->requester_count++;
-                       ioatdca->req_slots[i].pdev = pdev;
-                       ioatdca->req_slots[i].rid = id;
-                       global_req_table =
-                             readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
-                       writel(id | IOAT_DCA_GREQID_VALID,
-                              ioatdca->iobase + global_req_table + (i * 4));
-                       return i;
-               }
-       }
-       /* Error, ioatdma->requester_count is out of whack */
-       return -EFAULT;
-}
-
-static int ioat2_dca_remove_requester(struct dca_provider *dca,
-                                     struct device *dev)
-{
-       struct ioat_dca_priv *ioatdca = dca_priv(dca);
-       struct pci_dev *pdev;
-       int i;
-       u16 global_req_table;
-
-       /* This implementation only supports PCI-Express */
-       if (dev->bus != &pci_bus_type)
-               return -ENODEV;
-       pdev = to_pci_dev(dev);
-
-       for (i = 0; i < ioatdca->max_requesters; i++) {
-               if (ioatdca->req_slots[i].pdev == pdev) {
-                       global_req_table =
-                             readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
-                       writel(0, ioatdca->iobase + global_req_table + (i * 4));
-                       ioatdca->req_slots[i].pdev = NULL;
-                       ioatdca->req_slots[i].rid = 0;
-                       ioatdca->requester_count--;
-                       return i;
-               }
-       }
-       return -ENODEV;
-}
-
-static u8 ioat2_dca_get_tag(struct dca_provider *dca,
-                           struct device *dev,
-                           int cpu)
-{
-       u8 tag;
-
-       tag = ioat_dca_get_tag(dca, dev, cpu);
-       tag = (~tag) & 0x1F;
-       return tag;
-}
-
-static struct dca_ops ioat2_dca_ops = {
-       .add_requester          = ioat2_dca_add_requester,
-       .remove_requester       = ioat2_dca_remove_requester,
-       .get_tag                = ioat2_dca_get_tag,
-       .dev_managed            = ioat_dca_dev_managed,
-};
-
-static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset)
-{
-       int slots = 0;
-       u32 req;
-       u16 global_req_table;
-
-       global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET);
-       if (global_req_table == 0)
-               return 0;
-       do {
-               req = readl(iobase + global_req_table + (slots * sizeof(u32)));
-               slots++;
-       } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
-
-       return slots;
-}
-
-struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
-{
-       struct dca_provider *dca;
-       struct ioat_dca_priv *ioatdca;
-       int slots;
-       int i;
-       int err;
-       u32 tag_map;
-       u16 dca_offset;
-       u16 csi_fsb_control;
-       u16 pcie_control;
-       u8 bit;
-
-       if (!system_has_dca_enabled(pdev))
-               return NULL;
-
-       dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
-       if (dca_offset == 0)
-               return NULL;
-
-       slots = ioat2_dca_count_dca_slots(iobase, dca_offset);
-       if (slots == 0)
-               return NULL;
-
-       dca = alloc_dca_provider(&ioat2_dca_ops,
-                                sizeof(*ioatdca)
-                                     + (sizeof(struct ioat_dca_slot) * slots));
-       if (!dca)
-               return NULL;
-
-       ioatdca = dca_priv(dca);
-       ioatdca->iobase = iobase;
-       ioatdca->dca_base = iobase + dca_offset;
-       ioatdca->max_requesters = slots;
-
-       /* some bios might not know to turn these on */
-       csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
-       if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) {
-               csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH;
-               writew(csi_fsb_control,
-                      ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
-       }
-       pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
-       if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) {
-               pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR;
-               writew(pcie_control,
-                      ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
-       }
-
-
-       /* TODO version, compatibility and configuration checks */
-
-       /* copy out the APIC to DCA tag map */
-       tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET);
-       for (i = 0; i < 5; i++) {
-               bit = (tag_map >> (4 * i)) & 0x0f;
-               if (bit < 8)
-                       ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID;
-               else
-                       ioatdca->tag_map[i] = 0;
-       }
-
-       if (!dca2_tag_map_valid(ioatdca->tag_map)) {
-               dev_err(&pdev->dev, "APICID_TAG_MAP set incorrectly by BIOS, "
-                       "disabling DCA\n");
-               free_dca_provider(dca);
-               return NULL;
-       }
-
-       err = register_dca_provider(dca, &pdev->dev);
-       if (err) {
-               free_dca_provider(dca);
-               return NULL;
-       }
-
-       return dca;
-}
-
-static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev)
-{
-       struct ioat_dca_priv *ioatdca = dca_priv(dca);
-       struct pci_dev *pdev;
-       int i;
-       u16 id;
-       u16 global_req_table;
-
-       /* This implementation only supports PCI-Express */
-       if (dev->bus != &pci_bus_type)
-               return -ENODEV;
-       pdev = to_pci_dev(dev);
-       id = dcaid_from_pcidev(pdev);
-
-       if (ioatdca->requester_count == ioatdca->max_requesters)
-               return -ENODEV;
-
-       for (i = 0; i < ioatdca->max_requesters; i++) {
-               if (ioatdca->req_slots[i].pdev == NULL) {
-                       /* found an empty slot */
-                       ioatdca->requester_count++;
-                       ioatdca->req_slots[i].pdev = pdev;
-                       ioatdca->req_slots[i].rid = id;
-                       global_req_table =
-                             readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
-                       writel(id | IOAT_DCA_GREQID_VALID,
-                              ioatdca->iobase + global_req_table + (i * 4));
-                       return i;
-               }
-       }
-       /* Error, ioatdma->requester_count is out of whack */
-       return -EFAULT;
-}
-
-static int ioat3_dca_remove_requester(struct dca_provider *dca,
-                                     struct device *dev)
-{
-       struct ioat_dca_priv *ioatdca = dca_priv(dca);
-       struct pci_dev *pdev;
-       int i;
-       u16 global_req_table;
-
-       /* This implementation only supports PCI-Express */
-       if (dev->bus != &pci_bus_type)
-               return -ENODEV;
-       pdev = to_pci_dev(dev);
-
-       for (i = 0; i < ioatdca->max_requesters; i++) {
-               if (ioatdca->req_slots[i].pdev == pdev) {
-                       global_req_table =
-                             readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
-                       writel(0, ioatdca->iobase + global_req_table + (i * 4));
-                       ioatdca->req_slots[i].pdev = NULL;
-                       ioatdca->req_slots[i].rid = 0;
-                       ioatdca->requester_count--;
-                       return i;
-               }
-       }
-       return -ENODEV;
-}
-
-static u8 ioat3_dca_get_tag(struct dca_provider *dca,
-                           struct device *dev,
-                           int cpu)
-{
-       u8 tag;
-
-       struct ioat_dca_priv *ioatdca = dca_priv(dca);
-       int i, apic_id, bit, value;
-       u8 entry;
-
-       tag = 0;
-       apic_id = cpu_physical_id(cpu);
-
-       for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
-               entry = ioatdca->tag_map[i];
-               if (entry & DCA3_TAG_MAP_BIT_TO_SEL) {
-                       bit = entry &
-                               ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV);
-                       value = (apic_id & (1 << bit)) ? 1 : 0;
-               } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) {
-                       bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV;
-                       value = (apic_id & (1 << bit)) ? 0 : 1;
-               } else {
-                       value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0;
-               }
-               tag |= (value << i);
-       }
-
-       return tag;
-}
-
-static struct dca_ops ioat3_dca_ops = {
-       .add_requester          = ioat3_dca_add_requester,
-       .remove_requester       = ioat3_dca_remove_requester,
-       .get_tag                = ioat3_dca_get_tag,
-       .dev_managed            = ioat_dca_dev_managed,
-};
-
-static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset)
-{
-       int slots = 0;
-       u32 req;
-       u16 global_req_table;
-
-       global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET);
-       if (global_req_table == 0)
-               return 0;
-
-       do {
-               req = readl(iobase + global_req_table + (slots * sizeof(u32)));
-               slots++;
-       } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
-
-       return slots;
-}
-
-struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
-{
-       struct dca_provider *dca;
-       struct ioat_dca_priv *ioatdca;
-       int slots;
-       int i;
-       int err;
-       u16 dca_offset;
-       u16 csi_fsb_control;
-       u16 pcie_control;
-       u8 bit;
-
-       union {
-               u64 full;
-               struct {
-                       u32 low;
-                       u32 high;
-               };
-       } tag_map;
-
-       if (!system_has_dca_enabled(pdev))
-               return NULL;
-
-       dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
-       if (dca_offset == 0)
-               return NULL;
-
-       slots = ioat3_dca_count_dca_slots(iobase, dca_offset);
-       if (slots == 0)
-               return NULL;
-
-       dca = alloc_dca_provider(&ioat3_dca_ops,
-                                sizeof(*ioatdca)
-                                     + (sizeof(struct ioat_dca_slot) * slots));
-       if (!dca)
-               return NULL;
-
-       ioatdca = dca_priv(dca);
-       ioatdca->iobase = iobase;
-       ioatdca->dca_base = iobase + dca_offset;
-       ioatdca->max_requesters = slots;
-
-       /* some bios might not know to turn these on */
-       csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
-       if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) {
-               csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH;
-               writew(csi_fsb_control,
-                      ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
-       }
-       pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
-       if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) {
-               pcie_control |= IOAT3_PCI_CONTROL_MEMWR;
-               writew(pcie_control,
-                      ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
-       }
-
-
-       /* TODO version, compatibility and configuration checks */
-
-       /* copy out the APIC to DCA tag map */
-       tag_map.low =
-               readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW);
-       tag_map.high =
-               readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH);
-       for (i = 0; i < 8; i++) {
-               bit = tag_map.full >> (8 * i);
-               ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK;
-       }
-
-       err = register_dca_provider(dca, &pdev->dev);
-       if (err) {
-               free_dca_provider(dca);
-               return NULL;
-       }
-
-       return dca;
-}
diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
deleted file mode 100644 (file)
index a600fc0..0000000
+++ /dev/null
@@ -1,1741 +0,0 @@
-/*
- * Intel I/OAT DMA Linux driver
- * Copyright(c) 2004 - 2009 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- */
-
-/*
- * This driver supports an Intel I/OAT DMA engine, which does asynchronous
- * copy operations.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/dmaengine.h>
-#include <linux/delay.h>
-#include <linux/dma-mapping.h>
-#include <linux/workqueue.h>
-#include <linux/i7300_idle.h>
-#include "ioatdma.h"
-#include "ioatdma_registers.h"
-#include "ioatdma_hw.h"
-
-#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common)
-#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
-#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
-#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx)
-
-#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
-static int ioat_pending_level = 4;
-module_param(ioat_pending_level, int, 0644);
-MODULE_PARM_DESC(ioat_pending_level,
-                "high-water mark for pushing ioat descriptors (default: 4)");
-
-#define RESET_DELAY  msecs_to_jiffies(100)
-#define WATCHDOG_DELAY  round_jiffies(msecs_to_jiffies(2000))
-static void ioat_dma_chan_reset_part2(struct work_struct *work);
-static void ioat_dma_chan_watchdog(struct work_struct *work);
-
-/*
- * workaround for IOAT ver.3.0 null descriptor issue
- * (channel returns error when size is 0)
- */
-#define NULL_DESC_BUFFER_SIZE 1
-
-/* internal functions */
-static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan);
-static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
-
-static struct ioat_desc_sw *
-ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
-static struct ioat_desc_sw *
-ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
-
-static inline struct ioat_dma_chan *ioat_lookup_chan_by_index(
-                                               struct ioatdma_device *device,
-                                               int index)
-{
-       return device->idx[index];
-}
-
-/**
- * ioat_dma_do_interrupt - handler used for single vector interrupt mode
- * @irq: interrupt id
- * @data: interrupt data
- */
-static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
-{
-       struct ioatdma_device *instance = data;
-       struct ioat_dma_chan *ioat_chan;
-       unsigned long attnstatus;
-       int bit;
-       u8 intrctrl;
-
-       intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
-
-       if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
-               return IRQ_NONE;
-
-       if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
-               writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
-               return IRQ_NONE;
-       }
-
-       attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
-       for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
-               ioat_chan = ioat_lookup_chan_by_index(instance, bit);
-               tasklet_schedule(&ioat_chan->cleanup_task);
-       }
-
-       writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
-       return IRQ_HANDLED;
-}
-
-/**
- * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
- * @irq: interrupt id
- * @data: interrupt data
- */
-static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
-{
-       struct ioat_dma_chan *ioat_chan = data;
-
-       tasklet_schedule(&ioat_chan->cleanup_task);
-
-       return IRQ_HANDLED;
-}
-
-static void ioat_dma_cleanup_tasklet(unsigned long data);
-
-/**
- * ioat_dma_enumerate_channels - find and initialize the device's channels
- * @device: the device to be enumerated
- */
-static int ioat_dma_enumerate_channels(struct ioatdma_device *device)
-{
-       u8 xfercap_scale;
-       u32 xfercap;
-       int i;
-       struct ioat_dma_chan *ioat_chan;
-
-       /*
-        * IOAT ver.3 workarounds
-        */
-       if (device->version == IOAT_VER_3_0) {
-               u32 chan_err_mask;
-               u16 dev_id;
-               u32 dmauncerrsts;
-
-               /*
-                * Write CHANERRMSK_INT with 3E07h to mask out the errors
-                * that can cause stability issues for IOAT ver.3
-                */
-               chan_err_mask = 0x3E07;
-               pci_write_config_dword(device->pdev,
-                       IOAT_PCI_CHANERRMASK_INT_OFFSET,
-                       chan_err_mask);
-
-               /*
-                * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
-                * (workaround for spurious config parity error after restart)
-                */
-               pci_read_config_word(device->pdev,
-                       IOAT_PCI_DEVICE_ID_OFFSET,
-                       &dev_id);
-               if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
-                       dmauncerrsts = 0x10;
-                       pci_write_config_dword(device->pdev,
-                               IOAT_PCI_DMAUNCERRSTS_OFFSET,
-                               dmauncerrsts);
-               }
-       }
-
-       device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
-       xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
-       xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
-
-#ifdef  CONFIG_I7300_IDLE_IOAT_CHANNEL
-       if (i7300_idle_platform_probe(NULL, NULL, 1) == 0) {
-               device->common.chancnt--;
-       }
-#endif
-       for (i = 0; i < device->common.chancnt; i++) {
-               ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL);
-               if (!ioat_chan) {
-                       device->common.chancnt = i;
-                       break;
-               }
-
-               ioat_chan->device = device;
-               ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1));
-               ioat_chan->xfercap = xfercap;
-               ioat_chan->desccount = 0;
-               INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2);
-               if (ioat_chan->device->version == IOAT_VER_2_0)
-                       writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE |
-                              IOAT_DMA_DCA_ANY_CPU,
-                              ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
-               else if (ioat_chan->device->version == IOAT_VER_3_0)
-                       writel(IOAT_DMA_DCA_ANY_CPU,
-                              ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
-               spin_lock_init(&ioat_chan->cleanup_lock);
-               spin_lock_init(&ioat_chan->desc_lock);
-               INIT_LIST_HEAD(&ioat_chan->free_desc);
-               INIT_LIST_HEAD(&ioat_chan->used_desc);
-               /* This should be made common somewhere in dmaengine.c */
-               ioat_chan->common.device = &device->common;
-               list_add_tail(&ioat_chan->common.device_node,
-                             &device->common.channels);
-               device->idx[i] = ioat_chan;
-               tasklet_init(&ioat_chan->cleanup_task,
-                            ioat_dma_cleanup_tasklet,
-                            (unsigned long) ioat_chan);
-               tasklet_disable(&ioat_chan->cleanup_task);
-       }
-       return device->common.chancnt;
-}
-
-/**
- * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
- *                                 descriptors to hw
- * @chan: DMA channel handle
- */
-static inline void __ioat1_dma_memcpy_issue_pending(
-                                               struct ioat_dma_chan *ioat_chan)
-{
-       ioat_chan->pending = 0;
-       writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET);
-}
-
-static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
-{
-       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
-
-       if (ioat_chan->pending > 0) {
-               spin_lock_bh(&ioat_chan->desc_lock);
-               __ioat1_dma_memcpy_issue_pending(ioat_chan);
-               spin_unlock_bh(&ioat_chan->desc_lock);
-       }
-}
-
-static inline void __ioat2_dma_memcpy_issue_pending(
-                                               struct ioat_dma_chan *ioat_chan)
-{
-       ioat_chan->pending = 0;
-       writew(ioat_chan->dmacount,
-              ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
-}
-
-static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan)
-{
-       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
-
-       if (ioat_chan->pending > 0) {
-               spin_lock_bh(&ioat_chan->desc_lock);
-               __ioat2_dma_memcpy_issue_pending(ioat_chan);
-               spin_unlock_bh(&ioat_chan->desc_lock);
-       }
-}
-
-
-/**
- * ioat_dma_chan_reset_part2 - reinit the channel after a reset
- */
-static void ioat_dma_chan_reset_part2(struct work_struct *work)
-{
-       struct ioat_dma_chan *ioat_chan =
-               container_of(work, struct ioat_dma_chan, work.work);
-       struct ioat_desc_sw *desc;
-
-       spin_lock_bh(&ioat_chan->cleanup_lock);
-       spin_lock_bh(&ioat_chan->desc_lock);
-
-       ioat_chan->completion_virt->low = 0;
-       ioat_chan->completion_virt->high = 0;
-       ioat_chan->pending = 0;
-
-       /*
-        * count the descriptors waiting, and be sure to do it
-        * right for both the CB1 line and the CB2 ring
-        */
-       ioat_chan->dmacount = 0;
-       if (ioat_chan->used_desc.prev) {
-               desc = to_ioat_desc(ioat_chan->used_desc.prev);
-               do {
-                       ioat_chan->dmacount++;
-                       desc = to_ioat_desc(desc->node.next);
-               } while (&desc->node != ioat_chan->used_desc.next);
-       }
-
-       /*
-        * write the new starting descriptor address
-        * this puts channel engine into ARMED state
-        */
-       desc = to_ioat_desc(ioat_chan->used_desc.prev);
-       switch (ioat_chan->device->version) {
-       case IOAT_VER_1_2:
-               writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
-                      ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
-               writel(((u64) desc->async_tx.phys) >> 32,
-                      ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
-
-               writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
-                       + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
-               break;
-       case IOAT_VER_2_0:
-               writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
-                      ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
-               writel(((u64) desc->async_tx.phys) >> 32,
-                      ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
-
-               /* tell the engine to go with what's left to be done */
-               writew(ioat_chan->dmacount,
-                      ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
-
-               break;
-       }
-       dev_err(&ioat_chan->device->pdev->dev,
-               "chan%d reset - %d descs waiting, %d total desc\n",
-               chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
-
-       spin_unlock_bh(&ioat_chan->desc_lock);
-       spin_unlock_bh(&ioat_chan->cleanup_lock);
-}
-
-/**
- * ioat_dma_reset_channel - restart a channel
- * @ioat_chan: IOAT DMA channel handle
- */
-static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan)
-{
-       u32 chansts, chanerr;
-
-       if (!ioat_chan->used_desc.prev)
-               return;
-
-       chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
-       chansts = (ioat_chan->completion_virt->low
-                                       & IOAT_CHANSTS_DMA_TRANSFER_STATUS);
-       if (chanerr) {
-               dev_err(&ioat_chan->device->pdev->dev,
-                       "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
-                       chan_num(ioat_chan), chansts, chanerr);
-               writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
-       }
-
-       /*
-        * whack it upside the head with a reset
-        * and wait for things to settle out.
-        * force the pending count to a really big negative
-        * to make sure no one forces an issue_pending
-        * while we're waiting.
-        */
-
-       spin_lock_bh(&ioat_chan->desc_lock);
-       ioat_chan->pending = INT_MIN;
-       writeb(IOAT_CHANCMD_RESET,
-              ioat_chan->reg_base
-              + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
-       spin_unlock_bh(&ioat_chan->desc_lock);
-
-       /* schedule the 2nd half instead of sleeping a long time */
-       schedule_delayed_work(&ioat_chan->work, RESET_DELAY);
-}
-
-/**
- * ioat_dma_chan_watchdog - watch for stuck channels
- */
-static void ioat_dma_chan_watchdog(struct work_struct *work)
-{
-       struct ioatdma_device *device =
-               container_of(work, struct ioatdma_device, work.work);
-       struct ioat_dma_chan *ioat_chan;
-       int i;
-
-       union {
-               u64 full;
-               struct {
-                       u32 low;
-                       u32 high;
-               };
-       } completion_hw;
-       unsigned long compl_desc_addr_hw;
-
-       for (i = 0; i < device->common.chancnt; i++) {
-               ioat_chan = ioat_lookup_chan_by_index(device, i);
-
-               if (ioat_chan->device->version == IOAT_VER_1_2
-                       /* have we started processing anything yet */
-                   && ioat_chan->last_completion
-                       /* have we completed any since last watchdog cycle? */
-                   && (ioat_chan->last_completion ==
-                               ioat_chan->watchdog_completion)
-                       /* has TCP stuck on one cookie since last watchdog? */
-                   && (ioat_chan->watchdog_tcp_cookie ==
-                               ioat_chan->watchdog_last_tcp_cookie)
-                   && (ioat_chan->watchdog_tcp_cookie !=
-                               ioat_chan->completed_cookie)
-                       /* is there something in the chain to be processed? */
-                       /* CB1 chain always has at least the last one processed */
-                   && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next)
-                   && ioat_chan->pending == 0) {
-
-                       /*
-                        * check CHANSTS register for completed
-                        * descriptor address.
-                        * if it is different than completion writeback,
-                        * it is not zero
-                        * and it has changed since the last watchdog
-                        *     we can assume that channel
-                        *     is still working correctly
-                        *     and the problem is in completion writeback.
-                        *     update completion writeback
-                        *     with actual CHANSTS value
-                        * else
-                        *     try resetting the channel
-                        */
-
-                       completion_hw.low = readl(ioat_chan->reg_base +
-                               IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version));
-                       completion_hw.high = readl(ioat_chan->reg_base +
-                               IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version));
-#if (BITS_PER_LONG == 64)
-                       compl_desc_addr_hw =
-                               completion_hw.full
-                               & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
-#else
-                       compl_desc_addr_hw =
-                               completion_hw.low & IOAT_LOW_COMPLETION_MASK;
-#endif
-
-                       if ((compl_desc_addr_hw != 0)
-                          && (compl_desc_addr_hw != ioat_chan->watchdog_completion)
-                          && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) {
-                               ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw;
-                               ioat_chan->completion_virt->low = completion_hw.low;
-                               ioat_chan->completion_virt->high = completion_hw.high;
-                       } else {
-                               ioat_dma_reset_channel(ioat_chan);
-                               ioat_chan->watchdog_completion = 0;
-                               ioat_chan->last_compl_desc_addr_hw = 0;
-                       }
-
-               /*
-                * for version 2.0 if there are descriptors yet to be processed
-                * and the last completed hasn't changed since the last watchdog
-                *      if they haven't hit the pending level
-                *          issue the pending to push them through
-                *      else
-                *          try resetting the channel
-                */
-               } else if (ioat_chan->device->version == IOAT_VER_2_0
-                   && ioat_chan->used_desc.prev
-                   && ioat_chan->last_completion
-                   && ioat_chan->last_completion == ioat_chan->watchdog_completion) {
-
-                       if (ioat_chan->pending < ioat_pending_level)
-                               ioat2_dma_memcpy_issue_pending(&ioat_chan->common);
-                       else {
-                               ioat_dma_reset_channel(ioat_chan);
-                               ioat_chan->watchdog_completion = 0;
-                       }
-               } else {
-                       ioat_chan->last_compl_desc_addr_hw = 0;
-                       ioat_chan->watchdog_completion
-                                       = ioat_chan->last_completion;
-               }
-
-               ioat_chan->watchdog_last_tcp_cookie =
-                       ioat_chan->watchdog_tcp_cookie;
-       }
-
-       schedule_delayed_work(&device->work, WATCHDOG_DELAY);
-}
-
-static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
-{
-       struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
-       struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
-       struct ioat_desc_sw *prev, *new;
-       struct ioat_dma_descriptor *hw;
-       dma_cookie_t cookie;
-       LIST_HEAD(new_chain);
-       u32 copy;
-       size_t len;
-       dma_addr_t src, dst;
-       unsigned long orig_flags;
-       unsigned int desc_count = 0;
-
-       /* src and dest and len are stored in the initial descriptor */
-       len = first->len;
-       src = first->src;
-       dst = first->dst;
-       orig_flags = first->async_tx.flags;
-       new = first;
-
-       spin_lock_bh(&ioat_chan->desc_lock);
-       prev = to_ioat_desc(ioat_chan->used_desc.prev);
-       prefetch(prev->hw);
-       do {
-               copy = min_t(size_t, len, ioat_chan->xfercap);
-
-               async_tx_ack(&new->async_tx);
-
-               hw = new->hw;
-               hw->size = copy;
-               hw->ctl = 0;
-               hw->src_addr = src;
-               hw->dst_addr = dst;
-               hw->next = 0;
-
-               /* chain together the physical address list for the HW */
-               wmb();
-               prev->hw->next = (u64) new->async_tx.phys;
-
-               len -= copy;
-               dst += copy;
-               src += copy;
-
-               list_add_tail(&new->node, &new_chain);
-               desc_count++;
-               prev = new;
-       } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan)));
-
-       if (!new) {
-               dev_err(&ioat_chan->device->pdev->dev,
-                       "tx submit failed\n");
-               spin_unlock_bh(&ioat_chan->desc_lock);
-               return -ENOMEM;
-       }
-
-       hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
-       if (first->async_tx.callback) {
-               hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
-               if (first != new) {
-                       /* move callback into to last desc */
-                       new->async_tx.callback = first->async_tx.callback;
-                       new->async_tx.callback_param
-                                       = first->async_tx.callback_param;
-                       first->async_tx.callback = NULL;
-                       first->async_tx.callback_param = NULL;
-               }
-       }
-
-       new->tx_cnt = desc_count;
-       new->async_tx.flags = orig_flags; /* client is in control of this ack */
-
-       /* store the original values for use in later cleanup */
-       if (new != first) {
-               new->src = first->src;
-               new->dst = first->dst;
-               new->len = first->len;
-       }
-
-       /* cookie incr and addition to used_list must be atomic */
-       cookie = ioat_chan->common.cookie;
-       cookie++;
-       if (cookie < 0)
-               cookie = 1;
-       ioat_chan->common.cookie = new->async_tx.cookie = cookie;
-
-       /* write address into NextDescriptor field of last desc in chain */
-       to_ioat_desc(ioat_chan->used_desc.prev)->hw->next =
-                                                       first->async_tx.phys;
-       list_splice_tail(&new_chain, &ioat_chan->used_desc);
-
-       ioat_chan->dmacount += desc_count;
-       ioat_chan->pending += desc_count;
-       if (ioat_chan->pending >= ioat_pending_level)
-               __ioat1_dma_memcpy_issue_pending(ioat_chan);
-       spin_unlock_bh(&ioat_chan->desc_lock);
-
-       return cookie;
-}
-
-static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx)
-{
-       struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
-       struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
-       struct ioat_desc_sw *new;
-       struct ioat_dma_descriptor *hw;
-       dma_cookie_t cookie;
-       u32 copy;
-       size_t len;
-       dma_addr_t src, dst;
-       unsigned long orig_flags;
-       unsigned int desc_count = 0;
-
-       /* src and dest and len are stored in the initial descriptor */
-       len = first->len;
-       src = first->src;
-       dst = first->dst;
-       orig_flags = first->async_tx.flags;
-       new = first;
-
-       /*
-        * ioat_chan->desc_lock is still in force in version 2 path
-        * it gets unlocked at end of this function
-        */
-       do {
-               copy = min_t(size_t, len, ioat_chan->xfercap);
-
-               async_tx_ack(&new->async_tx);
-
-               hw = new->hw;
-               hw->size = copy;
-               hw->ctl = 0;
-               hw->src_addr = src;
-               hw->dst_addr = dst;
-
-               len -= copy;
-               dst += copy;
-               src += copy;
-               desc_count++;
-       } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan)));
-
-       if (!new) {
-               dev_err(&ioat_chan->device->pdev->dev,
-                       "tx submit failed\n");
-               spin_unlock_bh(&ioat_chan->desc_lock);
-               return -ENOMEM;
-       }
-
-       hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
-       if (first->async_tx.callback) {
-               hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
-               if (first != new) {
-                       /* move callback into to last desc */
-                       new->async_tx.callback = first->async_tx.callback;
-                       new->async_tx.callback_param
-                                       = first->async_tx.callback_param;
-                       first->async_tx.callback = NULL;
-                       first->async_tx.callback_param = NULL;
-               }
-       }
-
-       new->tx_cnt = desc_count;
-       new->async_tx.flags = orig_flags; /* client is in control of this ack */
-
-       /* store the original values for use in later cleanup */
-       if (new != first) {
-               new->src = first->src;
-               new->dst = first->dst;
-               new->len = first->len;
-       }
-
-       /* cookie incr and addition to used_list must be atomic */
-       cookie = ioat_chan->common.cookie;
-       cookie++;
-       if (cookie < 0)
-               cookie = 1;
-       ioat_chan->common.cookie = new->async_tx.cookie = cookie;
-
-       ioat_chan->dmacount += desc_count;
-       ioat_chan->pending += desc_count;
-       if (ioat_chan->pending >= ioat_pending_level)
-               __ioat2_dma_memcpy_issue_pending(ioat_chan);
-       spin_unlock_bh(&ioat_chan->desc_lock);
-
-       return cookie;
-}
-
-/**
- * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
- * @ioat_chan: the channel supplying the memory pool for the descriptors
- * @flags: allocation flags
- */
-static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
-                                       struct ioat_dma_chan *ioat_chan,
-                                       gfp_t flags)
-{
-       struct ioat_dma_descriptor *desc;
-       struct ioat_desc_sw *desc_sw;
-       struct ioatdma_device *ioatdma_device;
-       dma_addr_t phys;
-
-       ioatdma_device = to_ioatdma_device(ioat_chan->common.device);
-       desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
-       if (unlikely(!desc))
-               return NULL;
-
-       desc_sw = kzalloc(sizeof(*desc_sw), flags);
-       if (unlikely(!desc_sw)) {
-               pci_pool_free(ioatdma_device->dma_pool, desc, phys);
-               return NULL;
-       }
-
-       memset(desc, 0, sizeof(*desc));
-       dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common);
-       switch (ioat_chan->device->version) {
-       case IOAT_VER_1_2:
-               desc_sw->async_tx.tx_submit = ioat1_tx_submit;
-               break;
-       case IOAT_VER_2_0:
-       case IOAT_VER_3_0:
-               desc_sw->async_tx.tx_submit = ioat2_tx_submit;
-               break;
-       }
-
-       desc_sw->hw = desc;
-       desc_sw->async_tx.phys = phys;
-
-       return desc_sw;
-}
-
-static int ioat_initial_desc_count = 256;
-module_param(ioat_initial_desc_count, int, 0644);
-MODULE_PARM_DESC(ioat_initial_desc_count,
-                "initial descriptors per channel (default: 256)");
-
-/**
- * ioat2_dma_massage_chan_desc - link the descriptors into a circle
- * @ioat_chan: the channel to be massaged
- */
-static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan)
-{
-       struct ioat_desc_sw *desc, *_desc;
-
-       /* setup used_desc */
-       ioat_chan->used_desc.next = ioat_chan->free_desc.next;
-       ioat_chan->used_desc.prev = NULL;
-
-       /* pull free_desc out of the circle so that every node is a hw
-        * descriptor, but leave it pointing to the list
-        */
-       ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next;
-       ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev;
-
-       /* circle link the hw descriptors */
-       desc = to_ioat_desc(ioat_chan->free_desc.next);
-       desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys;
-       list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) {
-               desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys;
-       }
-}
-
-/**
- * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors
- * @chan: the channel to be filled out
- */
-static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
-{
-       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
-       struct ioat_desc_sw *desc;
-       u16 chanctrl;
-       u32 chanerr;
-       int i;
-       LIST_HEAD(tmp_list);
-
-       /* have we already been set up? */
-       if (!list_empty(&ioat_chan->free_desc))
-               return ioat_chan->desccount;
-
-       /* Setup register to interrupt and write completion status on error */
-       chanctrl = IOAT_CHANCTRL_ERR_INT_EN |
-               IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
-               IOAT_CHANCTRL_ERR_COMPLETION_EN;
-       writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
-
-       chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
-       if (chanerr) {
-               dev_err(&ioat_chan->device->pdev->dev,
-                       "CHANERR = %x, clearing\n", chanerr);
-               writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
-       }
-
-       /* Allocate descriptors */
-       for (i = 0; i < ioat_initial_desc_count; i++) {
-               desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL);
-               if (!desc) {
-                       dev_err(&ioat_chan->device->pdev->dev,
-                               "Only %d initial descriptors\n", i);
-                       break;
-               }
-               list_add_tail(&desc->node, &tmp_list);
-       }
-       spin_lock_bh(&ioat_chan->desc_lock);
-       ioat_chan->desccount = i;
-       list_splice(&tmp_list, &ioat_chan->free_desc);
-       if (ioat_chan->device->version != IOAT_VER_1_2)
-               ioat2_dma_massage_chan_desc(ioat_chan);
-       spin_unlock_bh(&ioat_chan->desc_lock);
-
-       /* allocate a completion writeback area */
-       /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
-       ioat_chan->completion_virt =
-               pci_pool_alloc(ioat_chan->device->completion_pool,
-                              GFP_KERNEL,
-                              &ioat_chan->completion_addr);
-       memset(ioat_chan->completion_virt, 0,
-              sizeof(*ioat_chan->completion_virt));
-       writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF,
-              ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
-       writel(((u64) ioat_chan->completion_addr) >> 32,
-              ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
-
-       tasklet_enable(&ioat_chan->cleanup_task);
-       ioat_dma_start_null_desc(ioat_chan);  /* give chain to dma device */
-       return ioat_chan->desccount;
-}
-
-/**
- * ioat_dma_free_chan_resources - release all the descriptors
- * @chan: the channel to be cleaned
- */
-static void ioat_dma_free_chan_resources(struct dma_chan *chan)
-{
-       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
-       struct ioatdma_device *ioatdma_device = to_ioatdma_device(chan->device);
-       struct ioat_desc_sw *desc, *_desc;
-       int in_use_descs = 0;
-
-       /* Before freeing channel resources first check
-        * if they have been previously allocated for this channel.
-        */
-       if (ioat_chan->desccount == 0)
-               return;
-
-       tasklet_disable(&ioat_chan->cleanup_task);
-       ioat_dma_memcpy_cleanup(ioat_chan);
-
-       /* Delay 100ms after reset to allow internal DMA logic to quiesce
-        * before removing DMA descriptor resources.
-        */
-       writeb(IOAT_CHANCMD_RESET,
-              ioat_chan->reg_base
-                       + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
-       mdelay(100);
-
-       spin_lock_bh(&ioat_chan->desc_lock);
-       switch (ioat_chan->device->version) {
-       case IOAT_VER_1_2:
-               list_for_each_entry_safe(desc, _desc,
-                                        &ioat_chan->used_desc, node) {
-                       in_use_descs++;
-                       list_del(&desc->node);
-                       pci_pool_free(ioatdma_device->dma_pool, desc->hw,
-                                     desc->async_tx.phys);
-                       kfree(desc);
-               }
-               list_for_each_entry_safe(desc, _desc,
-                                        &ioat_chan->free_desc, node) {
-                       list_del(&desc->node);
-                       pci_pool_free(ioatdma_device->dma_pool, desc->hw,
-                                     desc->async_tx.phys);
-                       kfree(desc);
-               }
-               break;
-       case IOAT_VER_2_0:
-       case IOAT_VER_3_0:
-               list_for_each_entry_safe(desc, _desc,
-                                        ioat_chan->free_desc.next, node) {
-                       list_del(&desc->node);
-                       pci_pool_free(ioatdma_device->dma_pool, desc->hw,
-                                     desc->async_tx.phys);
-                       kfree(desc);
-               }
-               desc = to_ioat_desc(ioat_chan->free_desc.next);
-               pci_pool_free(ioatdma_device->dma_pool, desc->hw,
-                             desc->async_tx.phys);
-               kfree(desc);
-               INIT_LIST_HEAD(&ioat_chan->free_desc);
-               INIT_LIST_HEAD(&ioat_chan->used_desc);
-               break;
-       }
-       spin_unlock_bh(&ioat_chan->desc_lock);
-
-       pci_pool_free(ioatdma_device->completion_pool,
-                     ioat_chan->completion_virt,
-                     ioat_chan->completion_addr);
-
-       /* one is ok since we left it on there on purpose */
-       if (in_use_descs > 1)
-               dev_err(&ioat_chan->device->pdev->dev,
-                       "Freeing %d in use descriptors!\n",
-                       in_use_descs - 1);
-
-       ioat_chan->last_completion = ioat_chan->completion_addr = 0;
-       ioat_chan->pending = 0;
-       ioat_chan->dmacount = 0;
-       ioat_chan->desccount = 0;
-       ioat_chan->watchdog_completion = 0;
-       ioat_chan->last_compl_desc_addr_hw = 0;
-       ioat_chan->watchdog_tcp_cookie =
-               ioat_chan->watchdog_last_tcp_cookie = 0;
-}
-
-/**
- * ioat_dma_get_next_descriptor - return the next available descriptor
- * @ioat_chan: IOAT DMA channel handle
- *
- * Gets the next descriptor from the chain, and must be called with the
- * channel's desc_lock held.  Allocates more descriptors if the channel
- * has run out.
- */
-static struct ioat_desc_sw *
-ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
-{
-       struct ioat_desc_sw *new;
-
-       if (!list_empty(&ioat_chan->free_desc)) {
-               new = to_ioat_desc(ioat_chan->free_desc.next);
-               list_del(&new->node);
-       } else {
-               /* try to get another desc */
-               new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
-               if (!new) {
-                       dev_err(&ioat_chan->device->pdev->dev,
-                               "alloc failed\n");
-                       return NULL;
-               }
-       }
-
-       prefetch(new->hw);
-       return new;
-}
-
-static struct ioat_desc_sw *
-ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
-{
-       struct ioat_desc_sw *new;
-
-       /*
-        * used.prev points to where to start processing
-        * used.next points to next free descriptor
-        * if used.prev == NULL, there are none waiting to be processed
-        * if used.next == used.prev.prev, there is only one free descriptor,
-        *      and we need to use it to as a noop descriptor before
-        *      linking in a new set of descriptors, since the device
-        *      has probably already read the pointer to it
-        */
-       if (ioat_chan->used_desc.prev &&
-           ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) {
-
-               struct ioat_desc_sw *desc;
-               struct ioat_desc_sw *noop_desc;
-               int i;
-
-               /* set up the noop descriptor */
-               noop_desc = to_ioat_desc(ioat_chan->used_desc.next);
-               /* set size to non-zero value (channel returns error when size is 0) */
-               noop_desc->hw->size = NULL_DESC_BUFFER_SIZE;
-               noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
-               noop_desc->hw->src_addr = 0;
-               noop_desc->hw->dst_addr = 0;
-
-               ioat_chan->used_desc.next = ioat_chan->used_desc.next->next;
-               ioat_chan->pending++;
-               ioat_chan->dmacount++;
-
-               /* try to get a few more descriptors */
-               for (i = 16; i; i--) {
-                       desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
-                       if (!desc) {
-                               dev_err(&ioat_chan->device->pdev->dev,
-                                       "alloc failed\n");
-                               break;
-                       }
-                       list_add_tail(&desc->node, ioat_chan->used_desc.next);
-
-                       desc->hw->next
-                               = to_ioat_desc(desc->node.next)->async_tx.phys;
-                       to_ioat_desc(desc->node.prev)->hw->next
-                               = desc->async_tx.phys;
-                       ioat_chan->desccount++;
-               }
-
-               ioat_chan->used_desc.next = noop_desc->node.next;
-       }
-       new = to_ioat_desc(ioat_chan->used_desc.next);
-       prefetch(new);
-       ioat_chan->used_desc.next = new->node.next;
-
-       if (ioat_chan->used_desc.prev == NULL)
-               ioat_chan->used_desc.prev = &new->node;
-
-       prefetch(new->hw);
-       return new;
-}
-
-static struct ioat_desc_sw *ioat_dma_get_next_descriptor(
-                                               struct ioat_dma_chan *ioat_chan)
-{
-       if (!ioat_chan)
-               return NULL;
-
-       switch (ioat_chan->device->version) {
-       case IOAT_VER_1_2:
-               return ioat1_dma_get_next_descriptor(ioat_chan);
-       case IOAT_VER_2_0:
-       case IOAT_VER_3_0:
-               return ioat2_dma_get_next_descriptor(ioat_chan);
-       }
-       return NULL;
-}
-
-static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy(
-                                               struct dma_chan *chan,
-                                               dma_addr_t dma_dest,
-                                               dma_addr_t dma_src,
-                                               size_t len,
-                                               unsigned long flags)
-{
-       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
-       struct ioat_desc_sw *new;
-
-       spin_lock_bh(&ioat_chan->desc_lock);
-       new = ioat_dma_get_next_descriptor(ioat_chan);
-       spin_unlock_bh(&ioat_chan->desc_lock);
-
-       if (new) {
-               new->len = len;
-               new->dst = dma_dest;
-               new->src = dma_src;
-               new->async_tx.flags = flags;
-               return &new->async_tx;
-       } else {
-               dev_err(&ioat_chan->device->pdev->dev,
-                       "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
-                       chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
-               return NULL;
-       }
-}
-
-static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy(
-                                               struct dma_chan *chan,
-                                               dma_addr_t dma_dest,
-                                               dma_addr_t dma_src,
-                                               size_t len,
-                                               unsigned long flags)
-{
-       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
-       struct ioat_desc_sw *new;
-
-       spin_lock_bh(&ioat_chan->desc_lock);
-       new = ioat2_dma_get_next_descriptor(ioat_chan);
-
-       /*
-        * leave ioat_chan->desc_lock set in ioat 2 path
-        * it will get unlocked at end of tx_submit
-        */
-
-       if (new) {
-               new->len = len;
-               new->dst = dma_dest;
-               new->src = dma_src;
-               new->async_tx.flags = flags;
-               return &new->async_tx;
-       } else {
-               spin_unlock_bh(&ioat_chan->desc_lock);
-               dev_err(&ioat_chan->device->pdev->dev,
-                       "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
-                       chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
-               return NULL;
-       }
-}
-
-static void ioat_dma_cleanup_tasklet(unsigned long data)
-{
-       struct ioat_dma_chan *chan = (void *)data;
-       ioat_dma_memcpy_cleanup(chan);
-       writew(IOAT_CHANCTRL_INT_DISABLE,
-              chan->reg_base + IOAT_CHANCTRL_OFFSET);
-}
-
-static void
-ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc)
-{
-       if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-               if (desc->async_tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-                       pci_unmap_single(ioat_chan->device->pdev,
-                                        pci_unmap_addr(desc, dst),
-                                        pci_unmap_len(desc, len),
-                                        PCI_DMA_FROMDEVICE);
-               else
-                       pci_unmap_page(ioat_chan->device->pdev,
-                                      pci_unmap_addr(desc, dst),
-                                      pci_unmap_len(desc, len),
-                                      PCI_DMA_FROMDEVICE);
-       }
-
-       if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-               if (desc->async_tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-                       pci_unmap_single(ioat_chan->device->pdev,
-                                        pci_unmap_addr(desc, src),
-                                        pci_unmap_len(desc, len),
-                                        PCI_DMA_TODEVICE);
-               else
-                       pci_unmap_page(ioat_chan->device->pdev,
-                                      pci_unmap_addr(desc, src),
-                                      pci_unmap_len(desc, len),
-                                      PCI_DMA_TODEVICE);
-       }
-}
-
-/**
- * ioat_dma_memcpy_cleanup - cleanup up finished descriptors
- * @chan: ioat channel to be cleaned up
- */
-static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
-{
-       unsigned long phys_complete;
-       struct ioat_desc_sw *desc, *_desc;
-       dma_cookie_t cookie = 0;
-       unsigned long desc_phys;
-       struct ioat_desc_sw *latest_desc;
-
-       prefetch(ioat_chan->completion_virt);
-
-       if (!spin_trylock_bh(&ioat_chan->cleanup_lock))
-               return;
-
-       /* The completion writeback can happen at any time,
-          so reads by the driver need to be atomic operations
-          The descriptor physical addresses are limited to 32-bits
-          when the CPU can only do a 32-bit mov */
-
-#if (BITS_PER_LONG == 64)
-       phys_complete =
-               ioat_chan->completion_virt->full
-               & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
-#else
-       phys_complete =
-               ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK;
-#endif
-
-       if ((ioat_chan->completion_virt->full
-               & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
-                               IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
-               dev_err(&ioat_chan->device->pdev->dev,
-                       "Channel halted, chanerr = %x\n",
-                       readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET));
-
-               /* TODO do something to salvage the situation */
-       }
-
-       if (phys_complete == ioat_chan->last_completion) {
-               spin_unlock_bh(&ioat_chan->cleanup_lock);
-               /*
-                * perhaps we're stuck so hard that the watchdog can't go off?
-                * try to catch it after 2 seconds
-                */
-               if (ioat_chan->device->version != IOAT_VER_3_0) {
-                       if (time_after(jiffies,
-                                      ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) {
-                               ioat_dma_chan_watchdog(&(ioat_chan->device->work.work));
-                               ioat_chan->last_completion_time = jiffies;
-                       }
-               }
-               return;
-       }
-       ioat_chan->last_completion_time = jiffies;
-
-       cookie = 0;
-       if (!spin_trylock_bh(&ioat_chan->desc_lock)) {
-               spin_unlock_bh(&ioat_chan->cleanup_lock);
-               return;
-       }
-
-       switch (ioat_chan->device->version) {
-       case IOAT_VER_1_2:
-               list_for_each_entry_safe(desc, _desc,
-                                        &ioat_chan->used_desc, node) {
-
-                       /*
-                        * Incoming DMA requests may use multiple descriptors,
-                        * due to exceeding xfercap, perhaps. If so, only the
-                        * last one will have a cookie, and require unmapping.
-                        */
-                       if (desc->async_tx.cookie) {
-                               cookie = desc->async_tx.cookie;
-                               ioat_dma_unmap(ioat_chan, desc);
-                               if (desc->async_tx.callback) {
-                                       desc->async_tx.callback(desc->async_tx.callback_param);
-                                       desc->async_tx.callback = NULL;
-                               }
-                       }
-
-                       if (desc->async_tx.phys != phys_complete) {
-                               /*
-                                * a completed entry, but not the last, so clean
-                                * up if the client is done with the descriptor
-                                */
-                               if (async_tx_test_ack(&desc->async_tx)) {
-                                       list_move_tail(&desc->node,
-                                                      &ioat_chan->free_desc);
-                               } else
-                                       desc->async_tx.cookie = 0;
-                       } else {
-                               /*
-                                * last used desc. Do not remove, so we can
-                                * append from it, but don't look at it next
-                                * time, either
-                                */
-                               desc->async_tx.cookie = 0;
-
-                               /* TODO check status bits? */
-                               break;
-                       }
-               }
-               break;
-       case IOAT_VER_2_0:
-       case IOAT_VER_3_0:
-               /* has some other thread has already cleaned up? */
-               if (ioat_chan->used_desc.prev == NULL)
-                       break;
-
-               /* work backwards to find latest finished desc */
-               desc = to_ioat_desc(ioat_chan->used_desc.next);
-               latest_desc = NULL;
-               do {
-                       desc = to_ioat_desc(desc->node.prev);
-                       desc_phys = (unsigned long)desc->async_tx.phys
-                                      & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
-                       if (desc_phys == phys_complete) {
-                               latest_desc = desc;
-                               break;
-                       }
-               } while (&desc->node != ioat_chan->used_desc.prev);
-
-               if (latest_desc != NULL) {
-
-                       /* work forwards to clear finished descriptors */
-                       for (desc = to_ioat_desc(ioat_chan->used_desc.prev);
-                            &desc->node != latest_desc->node.next &&
-                            &desc->node != ioat_chan->used_desc.next;
-                            desc = to_ioat_desc(desc->node.next)) {
-                               if (desc->async_tx.cookie) {
-                                       cookie = desc->async_tx.cookie;
-                                       desc->async_tx.cookie = 0;
-                                       ioat_dma_unmap(ioat_chan, desc);
-                                       if (desc->async_tx.callback) {
-                                               desc->async_tx.callback(desc->async_tx.callback_param);
-                                               desc->async_tx.callback = NULL;
-                                       }
-                               }
-                       }
-
-                       /* move used.prev up beyond those that are finished */
-                       if (&desc->node == ioat_chan->used_desc.next)
-                               ioat_chan->used_desc.prev = NULL;
-                       else
-                               ioat_chan->used_desc.prev = &desc->node;
-               }
-               break;
-       }
-
-       spin_unlock_bh(&ioat_chan->desc_lock);
-
-       ioat_chan->last_completion = phys_complete;
-       if (cookie != 0)
-               ioat_chan->completed_cookie = cookie;
-
-       spin_unlock_bh(&ioat_chan->cleanup_lock);
-}
-
-/**
- * ioat_dma_is_complete - poll the status of a IOAT DMA transaction
- * @chan: IOAT DMA channel handle
- * @cookie: DMA transaction identifier
- * @done: if not %NULL, updated with last completed transaction
- * @used: if not %NULL, updated with last used transaction
- */
-static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
-                                           dma_cookie_t cookie,
-                                           dma_cookie_t *done,
-                                           dma_cookie_t *used)
-{
-       struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
-       dma_cookie_t last_used;
-       dma_cookie_t last_complete;
-       enum dma_status ret;
-
-       last_used = chan->cookie;
-       last_complete = ioat_chan->completed_cookie;
-       ioat_chan->watchdog_tcp_cookie = cookie;
-
-       if (done)
-               *done = last_complete;
-       if (used)
-               *used = last_used;
-
-       ret = dma_async_is_complete(cookie, last_complete, last_used);
-       if (ret == DMA_SUCCESS)
-               return ret;
-
-       ioat_dma_memcpy_cleanup(ioat_chan);
-
-       last_used = chan->cookie;
-       last_complete = ioat_chan->completed_cookie;
-
-       if (done)
-               *done = last_complete;
-       if (used)
-               *used = last_used;
-
-       return dma_async_is_complete(cookie, last_complete, last_used);
-}
-
-static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
-{
-       struct ioat_desc_sw *desc;
-
-       spin_lock_bh(&ioat_chan->desc_lock);
-
-       desc = ioat_dma_get_next_descriptor(ioat_chan);
-
-       if (!desc) {
-               dev_err(&ioat_chan->device->pdev->dev,
-                       "Unable to start null desc - get next desc failed\n");
-               spin_unlock_bh(&ioat_chan->desc_lock);
-               return;
-       }
-
-       desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL
-                               | IOAT_DMA_DESCRIPTOR_CTL_INT_GN
-                               | IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
-       /* set size to non-zero value (channel returns error when size is 0) */
-       desc->hw->size = NULL_DESC_BUFFER_SIZE;
-       desc->hw->src_addr = 0;
-       desc->hw->dst_addr = 0;
-       async_tx_ack(&desc->async_tx);
-       switch (ioat_chan->device->version) {
-       case IOAT_VER_1_2:
-               desc->hw->next = 0;
-               list_add_tail(&desc->node, &ioat_chan->used_desc);
-
-               writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
-                      ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
-               writel(((u64) desc->async_tx.phys) >> 32,
-                      ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
-
-               writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
-                       + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
-               break;
-       case IOAT_VER_2_0:
-       case IOAT_VER_3_0:
-               writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
-                      ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
-               writel(((u64) desc->async_tx.phys) >> 32,
-                      ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
-
-               ioat_chan->dmacount++;
-               __ioat2_dma_memcpy_issue_pending(ioat_chan);
-               break;
-       }
-       spin_unlock_bh(&ioat_chan->desc_lock);
-}
-
-/*
- * Perform a IOAT transaction to verify the HW works.
- */
-#define IOAT_TEST_SIZE 2000
-
-static void ioat_dma_test_callback(void *dma_async_param)
-{
-       struct completion *cmp = dma_async_param;
-
-       complete(cmp);
-}
-
-/**
- * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
- * @device: device to be tested
- */
-static int ioat_dma_self_test(struct ioatdma_device *device)
-{
-       int i;
-       u8 *src;
-       u8 *dest;
-       struct dma_chan *dma_chan;
-       struct dma_async_tx_descriptor *tx;
-       dma_addr_t dma_dest, dma_src;
-       dma_cookie_t cookie;
-       int err = 0;
-       struct completion cmp;
-       unsigned long tmo;
-       unsigned long flags;
-
-       src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
-       if (!src)
-               return -ENOMEM;
-       dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
-       if (!dest) {
-               kfree(src);
-               return -ENOMEM;
-       }
-
-       /* Fill in src buffer */
-       for (i = 0; i < IOAT_TEST_SIZE; i++)
-               src[i] = (u8)i;
-
-       /* Start copy, using first DMA channel */
-       dma_chan = container_of(device->common.channels.next,
-                               struct dma_chan,
-                               device_node);
-       if (device->common.device_alloc_chan_resources(dma_chan) < 1) {
-               dev_err(&device->pdev->dev,
-                       "selftest cannot allocate chan resource\n");
-               err = -ENODEV;
-               goto out;
-       }
-
-       dma_src = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE,
-                                DMA_TO_DEVICE);
-       dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE,
-                                 DMA_FROM_DEVICE);
-       flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE;
-       tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
-                                                  IOAT_TEST_SIZE, flags);
-       if (!tx) {
-               dev_err(&device->pdev->dev,
-                       "Self-test prep failed, disabling\n");
-               err = -ENODEV;
-               goto free_resources;
-       }
-
-       async_tx_ack(tx);
-       init_completion(&cmp);
-       tx->callback = ioat_dma_test_callback;
-       tx->callback_param = &cmp;
-       cookie = tx->tx_submit(tx);
-       if (cookie < 0) {
-               dev_err(&device->pdev->dev,
-                       "Self-test setup failed, disabling\n");
-               err = -ENODEV;
-               goto free_resources;
-       }
-       device->common.device_issue_pending(dma_chan);
-
-       tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
-
-       if (tmo == 0 ||
-           device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL)
-                                       != DMA_SUCCESS) {
-               dev_err(&device->pdev->dev,
-                       "Self-test copy timed out, disabling\n");
-               err = -ENODEV;
-               goto free_resources;
-       }
-       if (memcmp(src, dest, IOAT_TEST_SIZE)) {
-               dev_err(&device->pdev->dev,
-                       "Self-test copy failed compare, disabling\n");
-               err = -ENODEV;
-               goto free_resources;
-       }
-
-free_resources:
-       device->common.device_free_chan_resources(dma_chan);
-out:
-       kfree(src);
-       kfree(dest);
-       return err;
-}
-
-static char ioat_interrupt_style[32] = "msix";
-module_param_string(ioat_interrupt_style, ioat_interrupt_style,
-                   sizeof(ioat_interrupt_style), 0644);
-MODULE_PARM_DESC(ioat_interrupt_style,
-                "set ioat interrupt style: msix (default), "
-                "msix-single-vector, msi, intx)");
-
-/**
- * ioat_dma_setup_interrupts - setup interrupt handler
- * @device: ioat device
- */
-static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
-{
-       struct ioat_dma_chan *ioat_chan;
-       int err, i, j, msixcnt;
-       u8 intrctrl = 0;
-
-       if (!strcmp(ioat_interrupt_style, "msix"))
-               goto msix;
-       if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
-               goto msix_single_vector;
-       if (!strcmp(ioat_interrupt_style, "msi"))
-               goto msi;
-       if (!strcmp(ioat_interrupt_style, "intx"))
-               goto intx;
-       dev_err(&device->pdev->dev, "invalid ioat_interrupt_style %s\n",
-               ioat_interrupt_style);
-       goto err_no_irq;
-
-msix:
-       /* The number of MSI-X vectors should equal the number of channels */
-       msixcnt = device->common.chancnt;
-       for (i = 0; i < msixcnt; i++)
-               device->msix_entries[i].entry = i;
-
-       err = pci_enable_msix(device->pdev, device->msix_entries, msixcnt);
-       if (err < 0)
-               goto msi;
-       if (err > 0)
-               goto msix_single_vector;
-
-       for (i = 0; i < msixcnt; i++) {
-               ioat_chan = ioat_lookup_chan_by_index(device, i);
-               err = request_irq(device->msix_entries[i].vector,
-                                 ioat_dma_do_interrupt_msix,
-                                 0, "ioat-msix", ioat_chan);
-               if (err) {
-                       for (j = 0; j < i; j++) {
-                               ioat_chan =
-                                       ioat_lookup_chan_by_index(device, j);
-                               free_irq(device->msix_entries[j].vector,
-                                        ioat_chan);
-                       }
-                       goto msix_single_vector;
-               }
-       }
-       intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
-       device->irq_mode = msix_multi_vector;
-       goto done;
-
-msix_single_vector:
-       device->msix_entries[0].entry = 0;
-       err = pci_enable_msix(device->pdev, device->msix_entries, 1);
-       if (err)
-               goto msi;
-
-       err = request_irq(device->msix_entries[0].vector, ioat_dma_do_interrupt,
-                         0, "ioat-msix", device);
-       if (err) {
-               pci_disable_msix(device->pdev);
-               goto msi;
-       }
-       device->irq_mode = msix_single_vector;
-       goto done;
-
-msi:
-       err = pci_enable_msi(device->pdev);
-       if (err)
-               goto intx;
-
-       err = request_irq(device->pdev->irq, ioat_dma_do_interrupt,
-                         0, "ioat-msi", device);
-       if (err) {
-               pci_disable_msi(device->pdev);
-               goto intx;
-       }
-       /*
-        * CB 1.2 devices need a bit set in configuration space to enable MSI
-        */
-       if (device->version == IOAT_VER_1_2) {
-               u32 dmactrl;
-               pci_read_config_dword(device->pdev,
-                                     IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
-               dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
-               pci_write_config_dword(device->pdev,
-                                      IOAT_PCI_DMACTRL_OFFSET, dmactrl);
-       }
-       device->irq_mode = msi;
-       goto done;
-
-intx:
-       err = request_irq(device->pdev->irq, ioat_dma_do_interrupt,
-                         IRQF_SHARED, "ioat-intx", device);
-       if (err)
-               goto err_no_irq;
-       device->irq_mode = intx;
-
-done:
-       intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
-       writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
-       return 0;
-
-err_no_irq:
-       /* Disable all interrupt generation */
-       writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
-       dev_err(&device->pdev->dev, "no usable interrupts\n");
-       device->irq_mode = none;
-       return -1;
-}
-
-/**
- * ioat_dma_remove_interrupts - remove whatever interrupts were set
- * @device: ioat device
- */
-static void ioat_dma_remove_interrupts(struct ioatdma_device *device)
-{
-       struct ioat_dma_chan *ioat_chan;
-       int i;
-
-       /* Disable all interrupt generation */
-       writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
-
-       switch (device->irq_mode) {
-       case msix_multi_vector:
-               for (i = 0; i < device->common.chancnt; i++) {
-                       ioat_chan = ioat_lookup_chan_by_index(device, i);
-                       free_irq(device->msix_entries[i].vector, ioat_chan);
-               }
-               pci_disable_msix(device->pdev);
-               break;
-       case msix_single_vector:
-               free_irq(device->msix_entries[0].vector, device);
-               pci_disable_msix(device->pdev);
-               break;
-       case msi:
-               free_irq(device->pdev->irq, device);
-               pci_disable_msi(device->pdev);
-               break;
-       case intx:
-               free_irq(device->pdev->irq, device);
-               break;
-       case none:
-               dev_warn(&device->pdev->dev,
-                        "call to %s without interrupts setup\n", __func__);
-       }
-       device->irq_mode = none;
-}
-
-struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
-                                     void __iomem *iobase)
-{
-       int err;
-       struct ioatdma_device *device;
-
-       device = kzalloc(sizeof(*device), GFP_KERNEL);
-       if (!device) {
-               err = -ENOMEM;
-               goto err_kzalloc;
-       }
-       device->pdev = pdev;
-       device->reg_base = iobase;
-       device->version = readb(device->reg_base + IOAT_VER_OFFSET);
-
-       /* DMA coherent memory pool for DMA descriptor allocations */
-       device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
-                                          sizeof(struct ioat_dma_descriptor),
-                                          64, 0);
-       if (!device->dma_pool) {
-               err = -ENOMEM;
-               goto err_dma_pool;
-       }
-
-       device->completion_pool = pci_pool_create("completion_pool", pdev,
-                                                 sizeof(u64), SMP_CACHE_BYTES,
-                                                 SMP_CACHE_BYTES);
-       if (!device->completion_pool) {
-               err = -ENOMEM;
-               goto err_completion_pool;
-       }
-
-       INIT_LIST_HEAD(&device->common.channels);
-       ioat_dma_enumerate_channels(device);
-
-       device->common.device_alloc_chan_resources =
-                                               ioat_dma_alloc_chan_resources;
-       device->common.device_free_chan_resources =
-                                               ioat_dma_free_chan_resources;
-       device->common.dev = &pdev->dev;
-
-       dma_cap_set(DMA_MEMCPY, device->common.cap_mask);
-       device->common.device_is_tx_complete = ioat_dma_is_complete;
-       switch (device->version) {
-       case IOAT_VER_1_2:
-               device->common.device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
-               device->common.device_issue_pending =
-                                               ioat1_dma_memcpy_issue_pending;
-               break;
-       case IOAT_VER_2_0:
-       case IOAT_VER_3_0:
-               device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy;
-               device->common.device_issue_pending =
-                                               ioat2_dma_memcpy_issue_pending;
-               break;
-       }
-
-       dev_err(&device->pdev->dev,
-               "Intel(R) I/OAT DMA Engine found,"
-               " %d channels, device version 0x%02x, driver version %s\n",
-               device->common.chancnt, device->version, IOAT_DMA_VERSION);
-
-       if (!device->common.chancnt) {
-               dev_err(&device->pdev->dev,
-                       "Intel(R) I/OAT DMA Engine problem found: "
-                       "zero channels detected\n");
-               goto err_setup_interrupts;
-       }
-
-       err = ioat_dma_setup_interrupts(device);
-       if (err)
-               goto err_setup_interrupts;
-
-       err = ioat_dma_self_test(device);
-       if (err)
-               goto err_self_test;
-
-       ioat_set_tcp_copy_break(device);
-
-       dma_async_device_register(&device->common);
-
-       if (device->version != IOAT_VER_3_0) {
-               INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog);
-               schedule_delayed_work(&device->work,
-                                     WATCHDOG_DELAY);
-       }
-
-       return device;
-
-err_self_test:
-       ioat_dma_remove_interrupts(device);
-err_setup_interrupts:
-       pci_pool_destroy(device->completion_pool);
-err_completion_pool:
-       pci_pool_destroy(device->dma_pool);
-err_dma_pool:
-       kfree(device);
-err_kzalloc:
-       dev_err(&pdev->dev,
-               "Intel(R) I/OAT DMA Engine initialization failed\n");
-       return NULL;
-}
-
-void ioat_dma_remove(struct ioatdma_device *device)
-{
-       struct dma_chan *chan, *_chan;
-       struct ioat_dma_chan *ioat_chan;
-
-       if (device->version != IOAT_VER_3_0)
-               cancel_delayed_work(&device->work);
-
-       ioat_dma_remove_interrupts(device);
-
-       dma_async_device_unregister(&device->common);
-
-       pci_pool_destroy(device->dma_pool);
-       pci_pool_destroy(device->completion_pool);
-
-       iounmap(device->reg_base);
-       pci_release_regions(device->pdev);
-       pci_disable_device(device->pdev);
-
-       list_for_each_entry_safe(chan, _chan,
-                                &device->common.channels, device_node) {
-               ioat_chan = to_ioat_chan(chan);
-               list_del(&chan->device_node);
-               kfree(ioat_chan);
-       }
-       kfree(device);
-}
-
diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h
deleted file mode 100644 (file)
index a52ff4b..0000000
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- */
-#ifndef IOATDMA_H
-#define IOATDMA_H
-
-#include <linux/dmaengine.h>
-#include "ioatdma_hw.h"
-#include <linux/init.h>
-#include <linux/dmapool.h>
-#include <linux/cache.h>
-#include <linux/pci_ids.h>
-#include <net/tcp.h>
-
-#define IOAT_DMA_VERSION  "3.64"
-
-enum ioat_interrupt {
-       none = 0,
-       msix_multi_vector = 1,
-       msix_single_vector = 2,
-       msi = 3,
-       intx = 4,
-};
-
-#define IOAT_LOW_COMPLETION_MASK       0xffffffc0
-#define IOAT_DMA_DCA_ANY_CPU           ~0
-#define IOAT_WATCHDOG_PERIOD           (2 * HZ)
-
-
-/**
- * struct ioatdma_device - internal representation of a IOAT device
- * @pdev: PCI-Express device
- * @reg_base: MMIO register space base address
- * @dma_pool: for allocating DMA descriptors
- * @common: embedded struct dma_device
- * @version: version of ioatdma device
- * @irq_mode: which style irq to use
- * @msix_entries: irq handlers
- * @idx: per channel data
- */
-
-struct ioatdma_device {
-       struct pci_dev *pdev;
-       void __iomem *reg_base;
-       struct pci_pool *dma_pool;
-       struct pci_pool *completion_pool;
-       struct dma_device common;
-       u8 version;
-       enum ioat_interrupt irq_mode;
-       struct delayed_work work;
-       struct msix_entry msix_entries[4];
-       struct ioat_dma_chan *idx[4];
-};
-
-/**
- * struct ioat_dma_chan - internal representation of a DMA channel
- */
-struct ioat_dma_chan {
-
-       void __iomem *reg_base;
-
-       dma_cookie_t completed_cookie;
-       unsigned long last_completion;
-       unsigned long last_completion_time;
-
-       size_t xfercap; /* XFERCAP register value expanded out */
-
-       spinlock_t cleanup_lock;
-       spinlock_t desc_lock;
-       struct list_head free_desc;
-       struct list_head used_desc;
-       unsigned long watchdog_completion;
-       int watchdog_tcp_cookie;
-       u32 watchdog_last_tcp_cookie;
-       struct delayed_work work;
-
-       int pending;
-       int dmacount;
-       int desccount;
-
-       struct ioatdma_device *device;
-       struct dma_chan common;
-
-       dma_addr_t completion_addr;
-       union {
-               u64 full; /* HW completion writeback */
-               struct {
-                       u32 low;
-                       u32 high;
-               };
-       } *completion_virt;
-       unsigned long last_compl_desc_addr_hw;
-       struct tasklet_struct cleanup_task;
-};
-
-/* wrapper around hardware descriptor format + additional software fields */
-
-/**
- * struct ioat_desc_sw - wrapper around hardware descriptor
- * @hw: hardware DMA descriptor
- * @node: this descriptor will either be on the free list,
- *     or attached to a transaction list (async_tx.tx_list)
- * @tx_cnt: number of descriptors required to complete the transaction
- * @async_tx: the generic software descriptor for all engines
- */
-struct ioat_desc_sw {
-       struct ioat_dma_descriptor *hw;
-       struct list_head node;
-       int tx_cnt;
-       size_t len;
-       dma_addr_t src;
-       dma_addr_t dst;
-       struct dma_async_tx_descriptor async_tx;
-};
-
-static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev)
-{
-       #ifdef CONFIG_NET_DMA
-       switch (dev->version) {
-       case IOAT_VER_1_2:
-               sysctl_tcp_dma_copybreak = 4096;
-               break;
-       case IOAT_VER_2_0:
-               sysctl_tcp_dma_copybreak = 2048;
-               break;
-       case IOAT_VER_3_0:
-               sysctl_tcp_dma_copybreak = 262144;
-               break;
-       }
-       #endif
-}
-
-#if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE)
-struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
-                                     void __iomem *iobase);
-void ioat_dma_remove(struct ioatdma_device *device);
-struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase);
-struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
-struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
-#else
-#define ioat_dma_probe(pdev, iobase)    NULL
-#define ioat_dma_remove(device)         do { } while (0)
-#define ioat_dca_init(pdev, iobase)    NULL
-#define ioat2_dca_init(pdev, iobase)   NULL
-#define ioat3_dca_init(pdev, iobase)   NULL
-#endif
-
-#endif /* IOATDMA_H */
diff --git a/drivers/dma/ioatdma_hw.h b/drivers/dma/ioatdma_hw.h
deleted file mode 100644 (file)
index afa57ee..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- */
-#ifndef _IOAT_HW_H_
-#define _IOAT_HW_H_
-
-/* PCI Configuration Space Values */
-#define IOAT_PCI_VID            0x8086
-
-/* CB device ID's */
-#define IOAT_PCI_DID_5000       0x1A38
-#define IOAT_PCI_DID_CNB        0x360B
-#define IOAT_PCI_DID_SCNB       0x65FF
-#define IOAT_PCI_DID_SNB        0x402F
-
-#define IOAT_PCI_RID            0x00
-#define IOAT_PCI_SVID           0x8086
-#define IOAT_PCI_SID            0x8086
-#define IOAT_VER_1_2            0x12    /* Version 1.2 */
-#define IOAT_VER_2_0            0x20    /* Version 2.0 */
-#define IOAT_VER_3_0            0x30    /* Version 3.0 */
-
-struct ioat_dma_descriptor {
-       uint32_t        size;
-       uint32_t        ctl;
-       uint64_t        src_addr;
-       uint64_t        dst_addr;
-       uint64_t        next;
-       uint64_t        rsv1;
-       uint64_t        rsv2;
-       uint64_t        user1;
-       uint64_t        user2;
-};
-
-#define IOAT_DMA_DESCRIPTOR_CTL_INT_GN 0x00000001
-#define IOAT_DMA_DESCRIPTOR_CTL_SRC_SN 0x00000002
-#define IOAT_DMA_DESCRIPTOR_CTL_DST_SN 0x00000004
-#define IOAT_DMA_DESCRIPTOR_CTL_CP_STS 0x00000008
-#define IOAT_DMA_DESCRIPTOR_CTL_FRAME  0x00000010
-#define IOAT_DMA_DESCRIPTOR_NUL                0x00000020
-#define IOAT_DMA_DESCRIPTOR_CTL_SP_BRK 0x00000040
-#define IOAT_DMA_DESCRIPTOR_CTL_DP_BRK 0x00000080
-#define IOAT_DMA_DESCRIPTOR_CTL_BNDL   0x00000100
-#define IOAT_DMA_DESCRIPTOR_CTL_DCA    0x00000200
-#define IOAT_DMA_DESCRIPTOR_CTL_BUFHINT        0x00000400
-
-#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_CONTEXT 0xFF000000
-#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_DMA     0x00000000
-
-#define IOAT_DMA_DESCRIPTOR_CTL_CONTEXT_DCA    0x00000001
-#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_MASK    0xFF000000
-
-#endif
diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioatdma_registers.h
deleted file mode 100644 (file)
index 49bc277..0000000
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- */
-#ifndef _IOAT_REGISTERS_H_
-#define _IOAT_REGISTERS_H_
-
-#define IOAT_PCI_DMACTRL_OFFSET                        0x48
-#define IOAT_PCI_DMACTRL_DMA_EN                        0x00000001
-#define IOAT_PCI_DMACTRL_MSI_EN                        0x00000002
-
-#define IOAT_PCI_DEVICE_ID_OFFSET              0x02
-#define IOAT_PCI_DMAUNCERRSTS_OFFSET           0x148
-#define IOAT_PCI_CHANERRMASK_INT_OFFSET                0x184
-
-/* MMIO Device Registers */
-#define IOAT_CHANCNT_OFFSET                    0x00    /*  8-bit */
-
-#define IOAT_XFERCAP_OFFSET                    0x01    /*  8-bit */
-#define IOAT_XFERCAP_4KB                       12
-#define IOAT_XFERCAP_8KB                       13
-#define IOAT_XFERCAP_16KB                      14
-#define IOAT_XFERCAP_32KB                      15
-#define IOAT_XFERCAP_32GB                      0
-
-#define IOAT_GENCTRL_OFFSET                    0x02    /*  8-bit */
-#define IOAT_GENCTRL_DEBUG_EN                  0x01
-
-#define IOAT_INTRCTRL_OFFSET                   0x03    /*  8-bit */
-#define IOAT_INTRCTRL_MASTER_INT_EN            0x01    /* Master Interrupt Enable */
-#define IOAT_INTRCTRL_INT_STATUS               0x02    /* ATTNSTATUS -or- Channel Int */
-#define IOAT_INTRCTRL_INT                      0x04    /* INT_STATUS -and- MASTER_INT_EN */
-#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL      0x08    /* Enable all MSI-X vectors */
-
-#define IOAT_ATTNSTATUS_OFFSET                 0x04    /* Each bit is a channel */
-
-#define IOAT_VER_OFFSET                                0x08    /*  8-bit */
-#define IOAT_VER_MAJOR_MASK                    0xF0
-#define IOAT_VER_MINOR_MASK                    0x0F
-#define GET_IOAT_VER_MAJOR(x)                  (((x) & IOAT_VER_MAJOR_MASK) >> 4)
-#define GET_IOAT_VER_MINOR(x)                  ((x) & IOAT_VER_MINOR_MASK)
-
-#define IOAT_PERPORTOFFSET_OFFSET              0x0A    /* 16-bit */
-
-#define IOAT_INTRDELAY_OFFSET                  0x0C    /* 16-bit */
-#define IOAT_INTRDELAY_INT_DELAY_MASK          0x3FFF  /* Interrupt Delay Time */
-#define IOAT_INTRDELAY_COALESE_SUPPORT         0x8000  /* Interrupt Coalescing Supported */
-
-#define IOAT_DEVICE_STATUS_OFFSET              0x0E    /* 16-bit */
-#define IOAT_DEVICE_STATUS_DEGRADED_MODE       0x0001
-
-#define IOAT_CHANNEL_MMIO_SIZE                 0x80    /* Each Channel MMIO space is this size */
-
-/* DMA Channel Registers */
-#define IOAT_CHANCTRL_OFFSET                   0x00    /* 16-bit Channel Control Register */
-#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK    0xF000
-#define IOAT_CHANCTRL_CHANNEL_IN_USE           0x0100
-#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL    0x0020
-#define IOAT_CHANCTRL_ERR_INT_EN               0x0010
-#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN         0x0008
-#define IOAT_CHANCTRL_ERR_COMPLETION_EN                0x0004
-#define IOAT_CHANCTRL_INT_DISABLE              0x0001
-
-#define IOAT_DMA_COMP_OFFSET                   0x02    /* 16-bit DMA channel compatibility */
-#define IOAT_DMA_COMP_V1                       0x0001  /* Compatibility with DMA version 1 */
-#define IOAT_DMA_COMP_V2                       0x0002  /* Compatibility with DMA version 2 */
-
-
-#define IOAT1_CHANSTS_OFFSET           0x04    /* 64-bit Channel Status Register */
-#define IOAT2_CHANSTS_OFFSET           0x08    /* 64-bit Channel Status Register */
-#define IOAT_CHANSTS_OFFSET(ver)               ((ver) < IOAT_VER_2_0 \
-                                               ? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET)
-#define IOAT1_CHANSTS_OFFSET_LOW       0x04
-#define IOAT2_CHANSTS_OFFSET_LOW       0x08
-#define IOAT_CHANSTS_OFFSET_LOW(ver)           ((ver) < IOAT_VER_2_0 \
-                                               ? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW)
-#define IOAT1_CHANSTS_OFFSET_HIGH      0x08
-#define IOAT2_CHANSTS_OFFSET_HIGH      0x0C
-#define IOAT_CHANSTS_OFFSET_HIGH(ver)          ((ver) < IOAT_VER_2_0 \
-                                               ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
-#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR ~0x3F
-#define IOAT_CHANSTS_SOFT_ERR                  0x0000000000000010
-#define IOAT_CHANSTS_UNAFFILIATED_ERR          0x0000000000000008
-#define IOAT_CHANSTS_DMA_TRANSFER_STATUS       0x0000000000000007
-#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE        0x0
-#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE  0x1
-#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED     0x2
-#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED        0x3
-
-
-
-#define IOAT_CHAN_DMACOUNT_OFFSET      0x06    /* 16-bit DMA Count register */
-
-#define IOAT_DCACTRL_OFFSET         0x30   /* 32 bit Direct Cache Access Control Register */
-#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000
-#define IOAT_DCACTRL_TARGET_CPU_MASK   0xFFFF /* APIC ID */
-
-/* CB DCA Memory Space Registers */
-#define IOAT_DCAOFFSET_OFFSET       0x14
-/* CB_BAR + IOAT_DCAOFFSET value */
-#define IOAT_DCA_VER_OFFSET         0x00
-#define IOAT_DCA_VER_MAJOR_MASK     0xF0
-#define IOAT_DCA_VER_MINOR_MASK     0x0F
-
-#define IOAT_DCA_COMP_OFFSET        0x02
-#define IOAT_DCA_COMP_V1            0x1
-
-#define IOAT_FSB_CAPABILITY_OFFSET  0x04
-#define IOAT_FSB_CAPABILITY_PREFETCH    0x1
-
-#define IOAT_PCI_CAPABILITY_OFFSET  0x06
-#define IOAT_PCI_CAPABILITY_MEMWR   0x1
-
-#define IOAT_FSB_CAP_ENABLE_OFFSET  0x08
-#define IOAT_FSB_CAP_ENABLE_PREFETCH    0x1
-
-#define IOAT_PCI_CAP_ENABLE_OFFSET  0x0A
-#define IOAT_PCI_CAP_ENABLE_MEMWR   0x1
-
-#define IOAT_APICID_TAG_MAP_OFFSET  0x0C
-#define IOAT_APICID_TAG_MAP_TAG0    0x0000000F
-#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0
-#define IOAT_APICID_TAG_MAP_TAG1    0x000000F0
-#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4
-#define IOAT_APICID_TAG_MAP_TAG2    0x00000F00
-#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8
-#define IOAT_APICID_TAG_MAP_TAG3    0x0000F000
-#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12
-#define IOAT_APICID_TAG_MAP_TAG4    0x000F0000
-#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16
-#define IOAT_APICID_TAG_CB2_VALID   0x8080808080
-
-#define IOAT_DCA_GREQID_OFFSET      0x10
-#define IOAT_DCA_GREQID_SIZE        0x04
-#define IOAT_DCA_GREQID_MASK        0xFFFF
-#define IOAT_DCA_GREQID_IGNOREFUN   0x10000000
-#define IOAT_DCA_GREQID_VALID       0x20000000
-#define IOAT_DCA_GREQID_LASTID      0x80000000
-
-#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
-#define IOAT3_CSI_CAPABILITY_PREFETCH    0x1
-
-#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
-#define IOAT3_PCI_CAPABILITY_MEMWR  0x1
-
-#define IOAT3_CSI_CONTROL_OFFSET    0x0C
-#define IOAT3_CSI_CONTROL_PREFETCH  0x1
-
-#define IOAT3_PCI_CONTROL_OFFSET    0x0E
-#define IOAT3_PCI_CONTROL_MEMWR     0x1
-
-#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
-#define IOAT3_APICID_TAG_MAP_OFFSET_LOW  0x10
-#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
-
-#define IOAT3_DCA_GREQID_OFFSET     0x02
-
-#define IOAT1_CHAINADDR_OFFSET         0x0C    /* 64-bit Descriptor Chain Address Register */
-#define IOAT2_CHAINADDR_OFFSET         0x10    /* 64-bit Descriptor Chain Address Register */
-#define IOAT_CHAINADDR_OFFSET(ver)             ((ver) < IOAT_VER_2_0 \
-                                               ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET)
-#define IOAT1_CHAINADDR_OFFSET_LOW     0x0C
-#define IOAT2_CHAINADDR_OFFSET_LOW     0x10
-#define IOAT_CHAINADDR_OFFSET_LOW(ver)         ((ver) < IOAT_VER_2_0 \
-                                               ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW)
-#define IOAT1_CHAINADDR_OFFSET_HIGH    0x10
-#define IOAT2_CHAINADDR_OFFSET_HIGH    0x14
-#define IOAT_CHAINADDR_OFFSET_HIGH(ver)                ((ver) < IOAT_VER_2_0 \
-                                               ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH)
-
-#define IOAT1_CHANCMD_OFFSET           0x14    /*  8-bit DMA Channel Command Register */
-#define IOAT2_CHANCMD_OFFSET           0x04    /*  8-bit DMA Channel Command Register */
-#define IOAT_CHANCMD_OFFSET(ver)               ((ver) < IOAT_VER_2_0 \
-                                               ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET)
-#define IOAT_CHANCMD_RESET                     0x20
-#define IOAT_CHANCMD_RESUME                    0x10
-#define IOAT_CHANCMD_ABORT                     0x08
-#define IOAT_CHANCMD_SUSPEND                   0x04
-#define IOAT_CHANCMD_APPEND                    0x02
-#define IOAT_CHANCMD_START                     0x01
-
-#define IOAT_CHANCMP_OFFSET                    0x18    /* 64-bit Channel Completion Address Register */
-#define IOAT_CHANCMP_OFFSET_LOW                        0x18
-#define IOAT_CHANCMP_OFFSET_HIGH               0x1C
-
-#define IOAT_CDAR_OFFSET                       0x20    /* 64-bit Current Descriptor Address Register */
-#define IOAT_CDAR_OFFSET_LOW                   0x20
-#define IOAT_CDAR_OFFSET_HIGH                  0x24
-
-#define IOAT_CHANERR_OFFSET                    0x28    /* 32-bit Channel Error Register */
-#define IOAT_CHANERR_DMA_TRANSFER_SRC_ADDR_ERR 0x0001
-#define IOAT_CHANERR_DMA_TRANSFER_DEST_ADDR_ERR        0x0002
-#define IOAT_CHANERR_NEXT_DESCRIPTOR_ADDR_ERR  0x0004
-#define IOAT_CHANERR_NEXT_DESCRIPTOR_ALIGNMENT_ERR     0x0008
-#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR      0x0010
-#define IOAT_CHANERR_CHANCMD_ERR               0x0020
-#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR  0x0040
-#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR      0x0080
-#define IOAT_CHANERR_READ_DATA_ERR             0x0100
-#define IOAT_CHANERR_WRITE_DATA_ERR            0x0200
-#define IOAT_CHANERR_DESCRIPTOR_CONTROL_ERR    0x0400
-#define IOAT_CHANERR_DESCRIPTOR_LENGTH_ERR     0x0800
-#define IOAT_CHANERR_COMPLETION_ADDR_ERR       0x1000
-#define IOAT_CHANERR_INT_CONFIGURATION_ERR     0x2000
-#define IOAT_CHANERR_SOFT_ERR                  0x4000
-#define IOAT_CHANERR_UNAFFILIATED_ERR          0x8000
-
-#define IOAT_CHANERR_MASK_OFFSET               0x2C    /* 32-bit Channel Error Register */
-
-#endif /* _IOAT_REGISTERS_H_ */
index 2f05226..645ca8d 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/platform_device.h>
 #include <linux/memory.h>
 #include <linux/ioport.h>
+#include <linux/raid/pq.h>
 
 #include <mach/adma.h>
 
@@ -57,65 +58,110 @@ static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
        }
 }
 
+static void
+iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
+{
+       struct dma_async_tx_descriptor *tx = &desc->async_tx;
+       struct iop_adma_desc_slot *unmap = desc->group_head;
+       struct device *dev = &iop_chan->device->pdev->dev;
+       u32 len = unmap->unmap_len;
+       enum dma_ctrl_flags flags = tx->flags;
+       u32 src_cnt;
+       dma_addr_t addr;
+       dma_addr_t dest;
+
+       src_cnt = unmap->unmap_src_cnt;
+       dest = iop_desc_get_dest_addr(unmap, iop_chan);
+       if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+               enum dma_data_direction dir;
+
+               if (src_cnt > 1) /* is xor? */
+                       dir = DMA_BIDIRECTIONAL;
+               else
+                       dir = DMA_FROM_DEVICE;
+
+               dma_unmap_page(dev, dest, len, dir);
+       }
+
+       if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+               while (src_cnt--) {
+                       addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt);
+                       if (addr == dest)
+                               continue;
+                       dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+               }
+       }
+       desc->group_head = NULL;
+}
+
+static void
+iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
+{
+       struct dma_async_tx_descriptor *tx = &desc->async_tx;
+       struct iop_adma_desc_slot *unmap = desc->group_head;
+       struct device *dev = &iop_chan->device->pdev->dev;
+       u32 len = unmap->unmap_len;
+       enum dma_ctrl_flags flags = tx->flags;
+       u32 src_cnt = unmap->unmap_src_cnt;
+       dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan);
+       dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan);
+       int i;
+
+       if (tx->flags & DMA_PREP_CONTINUE)
+               src_cnt -= 3;
+
+       if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) {
+               dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL);
+               dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL);
+       }
+
+       if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+               dma_addr_t addr;
+
+               for (i = 0; i < src_cnt; i++) {
+                       addr = iop_desc_get_src_addr(unmap, iop_chan, i);
+                       dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+               }
+               if (desc->pq_check_result) {
+                       dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE);
+                       dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE);
+               }
+       }
+
+       desc->group_head = NULL;
+}
+
+
 static dma_cookie_t
 iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
        struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
 {
-       BUG_ON(desc->async_tx.cookie < 0);
-       if (desc->async_tx.cookie > 0) {
-               cookie = desc->async_tx.cookie;
-               desc->async_tx.cookie = 0;
+       struct dma_async_tx_descriptor *tx = &desc->async_tx;
+
+       BUG_ON(tx->cookie < 0);
+       if (tx->cookie > 0) {
+               cookie = tx->cookie;
+               tx->cookie = 0;
 
                /* call the callback (must not sleep or submit new
                 * operations to this channel)
                 */
-               if (desc->async_tx.callback)
-                       desc->async_tx.callback(
-                               desc->async_tx.callback_param);
+               if (tx->callback)
+                       tx->callback(tx->callback_param);
 
                /* unmap dma addresses
                 * (unmap_single vs unmap_page?)
                 */
                if (desc->group_head && desc->unmap_len) {
-                       struct iop_adma_desc_slot *unmap = desc->group_head;
-                       struct device *dev =
-                               &iop_chan->device->pdev->dev;
-                       u32 len = unmap->unmap_len;
-                       enum dma_ctrl_flags flags = desc->async_tx.flags;
-                       u32 src_cnt;
-                       dma_addr_t addr;
-                       dma_addr_t dest;
-
-                       src_cnt = unmap->unmap_src_cnt;
-                       dest = iop_desc_get_dest_addr(unmap, iop_chan);
-                       if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-                               enum dma_data_direction dir;
-
-                               if (src_cnt > 1) /* is xor? */
-                                       dir = DMA_BIDIRECTIONAL;
-                               else
-                                       dir = DMA_FROM_DEVICE;
-
-                               dma_unmap_page(dev, dest, len, dir);
-                       }
-
-                       if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-                               while (src_cnt--) {
-                                       addr = iop_desc_get_src_addr(unmap,
-                                                                    iop_chan,
-                                                                    src_cnt);
-                                       if (addr == dest)
-                                               continue;
-                                       dma_unmap_page(dev, addr, len,
-                                                      DMA_TO_DEVICE);
-                               }
-                       }
-                       desc->group_head = NULL;
+                       if (iop_desc_is_pq(desc))
+                               iop_desc_unmap_pq(iop_chan, desc);
+                       else
+                               iop_desc_unmap(iop_chan, desc);
                }
        }
 
        /* run dependent operations */
-       dma_run_dependencies(&desc->async_tx);
+       dma_run_dependencies(tx);
 
        return cookie;
 }
@@ -287,7 +333,12 @@ static void iop_adma_tasklet(unsigned long data)
 {
        struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data;
 
-       spin_lock(&iop_chan->lock);
+       /* lockdep will flag depedency submissions as potentially
+        * recursive locking, this is not the case as a dependency
+        * submission will never recurse a channels submit routine.
+        * There are checks in async_tx.c to prevent this.
+        */
+       spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING);
        __iop_adma_slot_cleanup(iop_chan);
        spin_unlock(&iop_chan->lock);
 }
@@ -370,7 +421,7 @@ retry:
                        }
                        alloc_tail->group_head = alloc_start;
                        alloc_tail->async_tx.cookie = -EBUSY;
-                       list_splice(&chain, &alloc_tail->async_tx.tx_list);
+                       list_splice(&chain, &alloc_tail->tx_list);
                        iop_chan->last_used = last_used;
                        iop_desc_clear_next_desc(alloc_start);
                        iop_desc_clear_next_desc(alloc_tail);
@@ -429,7 +480,7 @@ iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
 
        old_chain_tail = list_entry(iop_chan->chain.prev,
                struct iop_adma_desc_slot, chain_node);
-       list_splice_init(&sw_desc->async_tx.tx_list,
+       list_splice_init(&sw_desc->tx_list,
                         &old_chain_tail->chain_node);
 
        /* fix up the hardware chain */
@@ -496,6 +547,7 @@ static int iop_adma_alloc_chan_resources(struct dma_chan *chan)
 
                dma_async_tx_descriptor_init(&slot->async_tx, chan);
                slot->async_tx.tx_submit = iop_adma_tx_submit;
+               INIT_LIST_HEAD(&slot->tx_list);
                INIT_LIST_HEAD(&slot->chain_node);
                INIT_LIST_HEAD(&slot->slot_node);
                hw_desc = (char *) iop_chan->device->dma_desc_pool;
@@ -660,9 +712,9 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
 }
 
 static struct dma_async_tx_descriptor *
-iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src,
-                          unsigned int src_cnt, size_t len, u32 *result,
-                          unsigned long flags)
+iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
+                         unsigned int src_cnt, size_t len, u32 *result,
+                         unsigned long flags)
 {
        struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
        struct iop_adma_desc_slot *sw_desc, *grp_start;
@@ -696,6 +748,118 @@ iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src,
        return sw_desc ? &sw_desc->async_tx : NULL;
 }
 
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+                    unsigned int src_cnt, const unsigned char *scf, size_t len,
+                    unsigned long flags)
+{
+       struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+       struct iop_adma_desc_slot *sw_desc, *g;
+       int slot_cnt, slots_per_op;
+       int continue_srcs;
+
+       if (unlikely(!len))
+               return NULL;
+       BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+
+       dev_dbg(iop_chan->device->common.dev,
+               "%s src_cnt: %d len: %u flags: %lx\n",
+               __func__, src_cnt, len, flags);
+
+       if (dmaf_p_disabled_continue(flags))
+               continue_srcs = 1+src_cnt;
+       else if (dmaf_continue(flags))
+               continue_srcs = 3+src_cnt;
+       else
+               continue_srcs = 0+src_cnt;
+
+       spin_lock_bh(&iop_chan->lock);
+       slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op);
+       sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+       if (sw_desc) {
+               int i;
+
+               g = sw_desc->group_head;
+               iop_desc_set_byte_count(g, iop_chan, len);
+
+               /* even if P is disabled its destination address (bits
+                * [3:0]) must match Q.  It is ok if P points to an
+                * invalid address, it won't be written.
+                */
+               if (flags & DMA_PREP_PQ_DISABLE_P)
+                       dst[0] = dst[1] & 0x7;
+
+               iop_desc_set_pq_addr(g, dst);
+               sw_desc->unmap_src_cnt = src_cnt;
+               sw_desc->unmap_len = len;
+               sw_desc->async_tx.flags = flags;
+               for (i = 0; i < src_cnt; i++)
+                       iop_desc_set_pq_src_addr(g, i, src[i], scf[i]);
+
+               /* if we are continuing a previous operation factor in
+                * the old p and q values, see the comment for dma_maxpq
+                * in include/linux/dmaengine.h
+                */
+               if (dmaf_p_disabled_continue(flags))
+                       iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
+               else if (dmaf_continue(flags)) {
+                       iop_desc_set_pq_src_addr(g, i++, dst[0], 0);
+                       iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
+                       iop_desc_set_pq_src_addr(g, i++, dst[1], 0);
+               }
+               iop_desc_init_pq(g, i, flags);
+       }
+       spin_unlock_bh(&iop_chan->lock);
+
+       return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+                        unsigned int src_cnt, const unsigned char *scf,
+                        size_t len, enum sum_check_flags *pqres,
+                        unsigned long flags)
+{
+       struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+       struct iop_adma_desc_slot *sw_desc, *g;
+       int slot_cnt, slots_per_op;
+
+       if (unlikely(!len))
+               return NULL;
+       BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+
+       dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
+               __func__, src_cnt, len);
+
+       spin_lock_bh(&iop_chan->lock);
+       slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op);
+       sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+       if (sw_desc) {
+               /* for validate operations p and q are tagged onto the
+                * end of the source list
+                */
+               int pq_idx = src_cnt;
+
+               g = sw_desc->group_head;
+               iop_desc_init_pq_zero_sum(g, src_cnt+2, flags);
+               iop_desc_set_pq_zero_sum_byte_count(g, len);
+               g->pq_check_result = pqres;
+               pr_debug("\t%s: g->pq_check_result: %p\n",
+                       __func__, g->pq_check_result);
+               sw_desc->unmap_src_cnt = src_cnt+2;
+               sw_desc->unmap_len = len;
+               sw_desc->async_tx.flags = flags;
+               while (src_cnt--)
+                       iop_desc_set_pq_zero_sum_src_addr(g, src_cnt,
+                                                         src[src_cnt],
+                                                         scf[src_cnt]);
+               iop_desc_set_pq_zero_sum_addr(g, pq_idx, src);
+       }
+       spin_unlock_bh(&iop_chan->lock);
+
+       return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
 static void iop_adma_free_chan_resources(struct dma_chan *chan)
 {
        struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
@@ -906,7 +1070,7 @@ out:
 
 #define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
 static int __devinit
-iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
+iop_adma_xor_val_self_test(struct iop_adma_device *device)
 {
        int i, src_idx;
        struct page *dest;
@@ -1002,7 +1166,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
                PAGE_SIZE, DMA_TO_DEVICE);
 
        /* skip zero sum if the capability is not present */
-       if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask))
+       if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
                goto free_resources;
 
        /* zero sum the sources with the destintation page */
@@ -1016,10 +1180,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
                dma_srcs[i] = dma_map_page(dma_chan->device->dev,
                                           zero_sum_srcs[i], 0, PAGE_SIZE,
                                           DMA_TO_DEVICE);
-       tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs,
-                                       IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
-                                       &zero_sum_result,
-                                       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+       tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
+                                      IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+                                      &zero_sum_result,
+                                      DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 
        cookie = iop_adma_tx_submit(tx);
        iop_adma_issue_pending(dma_chan);
@@ -1072,10 +1236,10 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
                dma_srcs[i] = dma_map_page(dma_chan->device->dev,
                                           zero_sum_srcs[i], 0, PAGE_SIZE,
                                           DMA_TO_DEVICE);
-       tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs,
-                                       IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
-                                       &zero_sum_result,
-                                       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+       tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
+                                      IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+                                      &zero_sum_result,
+                                      DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 
        cookie = iop_adma_tx_submit(tx);
        iop_adma_issue_pending(dma_chan);
@@ -1105,6 +1269,170 @@ out:
        return err;
 }
 
+#ifdef CONFIG_MD_RAID6_PQ
+static int __devinit
+iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
+{
+       /* combined sources, software pq results, and extra hw pq results */
+       struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2];
+       /* ptr to the extra hw pq buffers defined above */
+       struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2];
+       /* address conversion buffers (dma_map / page_address) */
+       void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2];
+       dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST];
+       dma_addr_t pq_dest[2];
+
+       int i;
+       struct dma_async_tx_descriptor *tx;
+       struct dma_chan *dma_chan;
+       dma_cookie_t cookie;
+       u32 zero_sum_result;
+       int err = 0;
+       struct device *dev;
+
+       dev_dbg(device->common.dev, "%s\n", __func__);
+
+       for (i = 0; i < ARRAY_SIZE(pq); i++) {
+               pq[i] = alloc_page(GFP_KERNEL);
+               if (!pq[i]) {
+                       while (i--)
+                               __free_page(pq[i]);
+                       return -ENOMEM;
+               }
+       }
+
+       /* Fill in src buffers */
+       for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) {
+               pq_sw[i] = page_address(pq[i]);
+               memset(pq_sw[i], 0x11111111 * (1<<i), PAGE_SIZE);
+       }
+       pq_sw[i] = page_address(pq[i]);
+       pq_sw[i+1] = page_address(pq[i+1]);
+
+       dma_chan = container_of(device->common.channels.next,
+                               struct dma_chan,
+                               device_node);
+       if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+               err = -ENODEV;
+               goto out;
+       }
+
+       dev = dma_chan->device->dev;
+
+       /* initialize the dests */
+       memset(page_address(pq_hw[0]), 0 , PAGE_SIZE);
+       memset(page_address(pq_hw[1]), 0 , PAGE_SIZE);
+
+       /* test pq */
+       pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE);
+       pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE);
+       for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+               pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+                                        DMA_TO_DEVICE);
+
+       tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src,
+                                 IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp,
+                                 PAGE_SIZE,
+                                 DMA_PREP_INTERRUPT |
+                                 DMA_CTRL_ACK);
+
+       cookie = iop_adma_tx_submit(tx);
+       iop_adma_issue_pending(dma_chan);
+       msleep(8);
+
+       if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+               DMA_SUCCESS) {
+               dev_err(dev, "Self-test pq timed out, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw);
+
+       if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST],
+                  page_address(pq_hw[0]), PAGE_SIZE) != 0) {
+               dev_err(dev, "Self-test p failed compare, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+       if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1],
+                  page_address(pq_hw[1]), PAGE_SIZE) != 0) {
+               dev_err(dev, "Self-test q failed compare, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       /* test correct zero sum using the software generated pq values */
+       for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
+               pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+                                        DMA_TO_DEVICE);
+
+       zero_sum_result = ~0;
+       tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
+                                     pq_src, IOP_ADMA_NUM_SRC_TEST,
+                                     raid6_gfexp, PAGE_SIZE, &zero_sum_result,
+                                     DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
+
+       cookie = iop_adma_tx_submit(tx);
+       iop_adma_issue_pending(dma_chan);
+       msleep(8);
+
+       if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+               DMA_SUCCESS) {
+               dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       if (zero_sum_result != 0) {
+               dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n",
+                       zero_sum_result);
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       /* test incorrect zero sum */
+       i = IOP_ADMA_NUM_SRC_TEST;
+       memset(pq_sw[i] + 100, 0, 100);
+       memset(pq_sw[i+1] + 200, 0, 200);
+       for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
+               pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+                                        DMA_TO_DEVICE);
+
+       zero_sum_result = 0;
+       tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
+                                     pq_src, IOP_ADMA_NUM_SRC_TEST,
+                                     raid6_gfexp, PAGE_SIZE, &zero_sum_result,
+                                     DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
+
+       cookie = iop_adma_tx_submit(tx);
+       iop_adma_issue_pending(dma_chan);
+       msleep(8);
+
+       if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+               DMA_SUCCESS) {
+               dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n");
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+       if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) {
+               dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n",
+                       zero_sum_result);
+               err = -ENODEV;
+               goto free_resources;
+       }
+
+free_resources:
+       iop_adma_free_chan_resources(dma_chan);
+out:
+       i = ARRAY_SIZE(pq);
+       while (i--)
+               __free_page(pq[i]);
+       return err;
+}
+#endif
+
 static int __devexit iop_adma_remove(struct platform_device *dev)
 {
        struct iop_adma_device *device = platform_get_drvdata(dev);
@@ -1192,9 +1520,16 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
                dma_dev->max_xor = iop_adma_get_max_xor();
                dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
        }
-       if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask))
-               dma_dev->device_prep_dma_zero_sum =
-                       iop_adma_prep_dma_zero_sum;
+       if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask))
+               dma_dev->device_prep_dma_xor_val =
+                       iop_adma_prep_dma_xor_val;
+       if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+               dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0);
+               dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq;
+       }
+       if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask))
+               dma_dev->device_prep_dma_pq_val =
+                       iop_adma_prep_dma_pq_val;
        if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
                dma_dev->device_prep_dma_interrupt =
                        iop_adma_prep_dma_interrupt;
@@ -1248,23 +1583,35 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
        }
 
        if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
-               dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
-               ret = iop_adma_xor_zero_sum_self_test(adev);
+           dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
+               ret = iop_adma_xor_val_self_test(adev);
                dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
                if (ret)
                        goto err_free_iop_chan;
        }
 
+       if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) &&
+           dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) {
+               #ifdef CONFIG_MD_RAID6_PQ
+               ret = iop_adma_pq_zero_sum_self_test(adev);
+               dev_dbg(&pdev->dev, "pq self test returned %d\n", ret);
+               #else
+               /* can not test raid6, so do not publish capability */
+               dma_cap_clear(DMA_PQ, dma_dev->cap_mask);
+               dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask);
+               ret = 0;
+               #endif
+               if (ret)
+                       goto err_free_iop_chan;
+       }
+
        dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
-         "( %s%s%s%s%s%s%s%s%s%s)\n",
-         dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "",
-         dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
-         dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "",
+         "( %s%s%s%s%s%s%s)\n",
+         dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
+         dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
          dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
-         dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "",
-         dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "",
+         dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "",
          dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)  ? "fill " : "",
-         dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
          dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
          dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
 
@@ -1296,7 +1643,7 @@ static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan)
        if (sw_desc) {
                grp_start = sw_desc->group_head;
 
-               list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
+               list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
                async_tx_ack(&sw_desc->async_tx);
                iop_desc_init_memcpy(grp_start, 0);
                iop_desc_set_byte_count(grp_start, iop_chan, 0);
@@ -1352,7 +1699,7 @@ static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
        sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
        if (sw_desc) {
                grp_start = sw_desc->group_head;
-               list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
+               list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
                async_tx_ack(&sw_desc->async_tx);
                iop_desc_init_null_xor(grp_start, 2, 0);
                iop_desc_set_byte_count(grp_start, iop_chan, 0);
index 9f6fe46..c0a272c 100644 (file)
@@ -183,6 +183,11 @@ dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov,
                                        iov_byte_offset,
                                        kdata,
                                        copy);
+                       /* poll for a descriptor slot */
+                       if (unlikely(dma_cookie < 0)) {
+                               dma_async_issue_pending(chan);
+                               continue;
+                       }
 
                        len -= copy;
                        iov[iovec_idx].iov_len -= copy;
@@ -248,6 +253,11 @@ dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov,
                                        page,
                                        offset,
                                        copy);
+                       /* poll for a descriptor slot */
+                       if (unlikely(dma_cookie < 0)) {
+                               dma_async_issue_pending(chan);
+                               continue;
+                       }
 
                        len -= copy;
                        iov[iovec_idx].iov_len -= copy;
index 3f23eab..466ab10 100644 (file)
@@ -517,7 +517,7 @@ retry:
                        }
                        alloc_tail->group_head = alloc_start;
                        alloc_tail->async_tx.cookie = -EBUSY;
-                       list_splice(&chain, &alloc_tail->async_tx.tx_list);
+                       list_splice(&chain, &alloc_tail->tx_list);
                        mv_chan->last_used = last_used;
                        mv_desc_clear_next_desc(alloc_start);
                        mv_desc_clear_next_desc(alloc_tail);
@@ -565,14 +565,14 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
        cookie = mv_desc_assign_cookie(mv_chan, sw_desc);
 
        if (list_empty(&mv_chan->chain))
-               list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain);
+               list_splice_init(&sw_desc->tx_list, &mv_chan->chain);
        else {
                new_hw_chain = 0;
 
                old_chain_tail = list_entry(mv_chan->chain.prev,
                                            struct mv_xor_desc_slot,
                                            chain_node);
-               list_splice_init(&grp_start->async_tx.tx_list,
+               list_splice_init(&grp_start->tx_list,
                                 &old_chain_tail->chain_node);
 
                if (!mv_can_chain(grp_start))
@@ -632,6 +632,7 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
                slot->async_tx.tx_submit = mv_xor_tx_submit;
                INIT_LIST_HEAD(&slot->chain_node);
                INIT_LIST_HEAD(&slot->slot_node);
+               INIT_LIST_HEAD(&slot->tx_list);
                hw_desc = (char *) mv_chan->device->dma_desc_pool;
                slot->async_tx.phys =
                        (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE];
index 06cafe1..977b592 100644 (file)
@@ -126,9 +126,8 @@ struct mv_xor_chan {
  * @idx: pool index
  * @unmap_src_cnt: number of xor sources
  * @unmap_len: transaction bytecount
+ * @tx_list: list of slots that make up a multi-descriptor transaction
  * @async_tx: support for the async_tx api
- * @group_list: list of slots that make up a multi-descriptor transaction
- *     for example transfer lengths larger than the supported hw max
  * @xor_check_result: result of zero sum
  * @crc32_result: result crc calculation
  */
@@ -145,6 +144,7 @@ struct mv_xor_desc_slot {
        u16                     unmap_src_cnt;
        u32                     value;
        size_t                  unmap_len;
+       struct list_head        tx_list;
        struct dma_async_tx_descriptor  async_tx;
        union {
                u32             *xor_check_result;
diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
new file mode 100644 (file)
index 0000000..b3b065c
--- /dev/null
@@ -0,0 +1,786 @@
+/*
+ * Renesas SuperH DMA Engine support
+ *
+ * base is drivers/dma/flsdma.c
+ *
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * - DMA of SuperH does not have Hardware DMA chain mode.
+ * - MAX DMA size is 16MB.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/platform_device.h>
+#include <cpu/dma.h>
+#include <asm/dma-sh.h>
+#include "shdma.h"
+
+/* DMA descriptor control */
+#define DESC_LAST      (-1)
+#define DESC_COMP      (1)
+#define DESC_NCOMP     (0)
+
+#define NR_DESCS_PER_CHANNEL 32
+/*
+ * Define the default configuration for dual address memory-memory transfer.
+ * The 0x400 value represents auto-request, external->external.
+ *
+ * And this driver set 4byte burst mode.
+ * If you want to change mode, you need to change RS_DEFAULT of value.
+ * (ex 1byte burst mode -> (RS_DUAL & ~TS_32)
+ */
+#define RS_DEFAULT  (RS_DUAL)
+
+#define SH_DMAC_CHAN_BASE(id) (dma_base_addr[id])
+static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg)
+{
+       ctrl_outl(data, (SH_DMAC_CHAN_BASE(sh_dc->id) + reg));
+}
+
+static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg)
+{
+       return ctrl_inl((SH_DMAC_CHAN_BASE(sh_dc->id) + reg));
+}
+
+static void dmae_init(struct sh_dmae_chan *sh_chan)
+{
+       u32 chcr = RS_DEFAULT; /* default is DUAL mode */
+       sh_dmae_writel(sh_chan, chcr, CHCR);
+}
+
+/*
+ * Reset DMA controller
+ *
+ * SH7780 has two DMAOR register
+ */
+static void sh_dmae_ctl_stop(int id)
+{
+       unsigned short dmaor = dmaor_read_reg(id);
+
+       dmaor &= ~(DMAOR_NMIF | DMAOR_AE);
+       dmaor_write_reg(id, dmaor);
+}
+
+static int sh_dmae_rst(int id)
+{
+       unsigned short dmaor;
+
+       sh_dmae_ctl_stop(id);
+       dmaor = (dmaor_read_reg(id)|DMAOR_INIT);
+
+       dmaor_write_reg(id, dmaor);
+       if ((dmaor_read_reg(id) & (DMAOR_AE | DMAOR_NMIF))) {
+               pr_warning(KERN_ERR "dma-sh: Can't initialize DMAOR.\n");
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int dmae_is_idle(struct sh_dmae_chan *sh_chan)
+{
+       u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+       if (chcr & CHCR_DE) {
+               if (!(chcr & CHCR_TE))
+                       return -EBUSY; /* working */
+       }
+       return 0; /* waiting */
+}
+
+static inline unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan)
+{
+       u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+       return ts_shift[(chcr & CHCR_TS_MASK) >> CHCR_TS_SHIFT];
+}
+
+static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs hw)
+{
+       sh_dmae_writel(sh_chan, hw.sar, SAR);
+       sh_dmae_writel(sh_chan, hw.dar, DAR);
+       sh_dmae_writel(sh_chan,
+               (hw.tcr >> calc_xmit_shift(sh_chan)), TCR);
+}
+
+static void dmae_start(struct sh_dmae_chan *sh_chan)
+{
+       u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+
+       chcr |= (CHCR_DE|CHCR_IE);
+       sh_dmae_writel(sh_chan, chcr, CHCR);
+}
+
+static void dmae_halt(struct sh_dmae_chan *sh_chan)
+{
+       u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+
+       chcr &= ~(CHCR_DE | CHCR_TE | CHCR_IE);
+       sh_dmae_writel(sh_chan, chcr, CHCR);
+}
+
+static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val)
+{
+       int ret = dmae_is_idle(sh_chan);
+       /* When DMA was working, can not set data to CHCR */
+       if (ret)
+               return ret;
+
+       sh_dmae_writel(sh_chan, val, CHCR);
+       return 0;
+}
+
+#define DMARS1_ADDR    0x04
+#define DMARS2_ADDR    0x08
+#define DMARS_SHIFT 8
+#define DMARS_CHAN_MSK 0x01
+static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val)
+{
+       u32 addr;
+       int shift = 0;
+       int ret = dmae_is_idle(sh_chan);
+       if (ret)
+               return ret;
+
+       if (sh_chan->id & DMARS_CHAN_MSK)
+               shift = DMARS_SHIFT;
+
+       switch (sh_chan->id) {
+       /* DMARS0 */
+       case 0:
+       case 1:
+               addr = SH_DMARS_BASE;
+               break;
+       /* DMARS1 */
+       case 2:
+       case 3:
+               addr = (SH_DMARS_BASE + DMARS1_ADDR);
+               break;
+       /* DMARS2 */
+       case 4:
+       case 5:
+               addr = (SH_DMARS_BASE + DMARS2_ADDR);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       ctrl_outw((val << shift) |
+               (ctrl_inw(addr) & (shift ? 0xFF00 : 0x00FF)),
+               addr);
+
+       return 0;
+}
+
+static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+       struct sh_desc *desc = tx_to_sh_desc(tx);
+       struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan);
+       dma_cookie_t cookie;
+
+       spin_lock_bh(&sh_chan->desc_lock);
+
+       cookie = sh_chan->common.cookie;
+       cookie++;
+       if (cookie < 0)
+               cookie = 1;
+
+       /* If desc only in the case of 1 */
+       if (desc->async_tx.cookie != -EBUSY)
+               desc->async_tx.cookie = cookie;
+       sh_chan->common.cookie = desc->async_tx.cookie;
+
+       list_splice_init(&desc->tx_list, sh_chan->ld_queue.prev);
+
+       spin_unlock_bh(&sh_chan->desc_lock);
+
+       return cookie;
+}
+
+static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan)
+{
+       struct sh_desc *desc, *_desc, *ret = NULL;
+
+       spin_lock_bh(&sh_chan->desc_lock);
+       list_for_each_entry_safe(desc, _desc, &sh_chan->ld_free, node) {
+               if (async_tx_test_ack(&desc->async_tx)) {
+                       list_del(&desc->node);
+                       ret = desc;
+                       break;
+               }
+       }
+       spin_unlock_bh(&sh_chan->desc_lock);
+
+       return ret;
+}
+
+static void sh_dmae_put_desc(struct sh_dmae_chan *sh_chan, struct sh_desc *desc)
+{
+       if (desc) {
+               spin_lock_bh(&sh_chan->desc_lock);
+
+               list_splice_init(&desc->tx_list, &sh_chan->ld_free);
+               list_add(&desc->node, &sh_chan->ld_free);
+
+               spin_unlock_bh(&sh_chan->desc_lock);
+       }
+}
+
+static int sh_dmae_alloc_chan_resources(struct dma_chan *chan)
+{
+       struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+       struct sh_desc *desc;
+
+       spin_lock_bh(&sh_chan->desc_lock);
+       while (sh_chan->descs_allocated < NR_DESCS_PER_CHANNEL) {
+               spin_unlock_bh(&sh_chan->desc_lock);
+               desc = kzalloc(sizeof(struct sh_desc), GFP_KERNEL);
+               if (!desc) {
+                       spin_lock_bh(&sh_chan->desc_lock);
+                       break;
+               }
+               dma_async_tx_descriptor_init(&desc->async_tx,
+                                       &sh_chan->common);
+               desc->async_tx.tx_submit = sh_dmae_tx_submit;
+               desc->async_tx.flags = DMA_CTRL_ACK;
+               INIT_LIST_HEAD(&desc->tx_list);
+               sh_dmae_put_desc(sh_chan, desc);
+
+               spin_lock_bh(&sh_chan->desc_lock);
+               sh_chan->descs_allocated++;
+       }
+       spin_unlock_bh(&sh_chan->desc_lock);
+
+       return sh_chan->descs_allocated;
+}
+
+/*
+ * sh_dma_free_chan_resources - Free all resources of the channel.
+ */
+static void sh_dmae_free_chan_resources(struct dma_chan *chan)
+{
+       struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+       struct sh_desc *desc, *_desc;
+       LIST_HEAD(list);
+
+       BUG_ON(!list_empty(&sh_chan->ld_queue));
+       spin_lock_bh(&sh_chan->desc_lock);
+
+       list_splice_init(&sh_chan->ld_free, &list);
+       sh_chan->descs_allocated = 0;
+
+       spin_unlock_bh(&sh_chan->desc_lock);
+
+       list_for_each_entry_safe(desc, _desc, &list, node)
+               kfree(desc);
+}
+
+static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy(
+       struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src,
+       size_t len, unsigned long flags)
+{
+       struct sh_dmae_chan *sh_chan;
+       struct sh_desc *first = NULL, *prev = NULL, *new;
+       size_t copy_size;
+
+       if (!chan)
+               return NULL;
+
+       if (!len)
+               return NULL;
+
+       sh_chan = to_sh_chan(chan);
+
+       do {
+               /* Allocate the link descriptor from DMA pool */
+               new = sh_dmae_get_desc(sh_chan);
+               if (!new) {
+                       dev_err(sh_chan->dev,
+                                       "No free memory for link descriptor\n");
+                       goto err_get_desc;
+               }
+
+               copy_size = min(len, (size_t)SH_DMA_TCR_MAX);
+
+               new->hw.sar = dma_src;
+               new->hw.dar = dma_dest;
+               new->hw.tcr = copy_size;
+               if (!first)
+                       first = new;
+
+               new->mark = DESC_NCOMP;
+               async_tx_ack(&new->async_tx);
+
+               prev = new;
+               len -= copy_size;
+               dma_src += copy_size;
+               dma_dest += copy_size;
+               /* Insert the link descriptor to the LD ring */
+               list_add_tail(&new->node, &first->tx_list);
+       } while (len);
+
+       new->async_tx.flags = flags; /* client is in control of this ack */
+       new->async_tx.cookie = -EBUSY; /* Last desc */
+
+       return &first->async_tx;
+
+err_get_desc:
+       sh_dmae_put_desc(sh_chan, first);
+       return NULL;
+
+}
+
+/*
+ * sh_chan_ld_cleanup - Clean up link descriptors
+ *
+ * This function clean up the ld_queue of DMA channel.
+ */
+static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan)
+{
+       struct sh_desc *desc, *_desc;
+
+       spin_lock_bh(&sh_chan->desc_lock);
+       list_for_each_entry_safe(desc, _desc, &sh_chan->ld_queue, node) {
+               dma_async_tx_callback callback;
+               void *callback_param;
+
+               /* non send data */
+               if (desc->mark == DESC_NCOMP)
+                       break;
+
+               /* send data sesc */
+               callback = desc->async_tx.callback;
+               callback_param = desc->async_tx.callback_param;
+
+               /* Remove from ld_queue list */
+               list_splice_init(&desc->tx_list, &sh_chan->ld_free);
+
+               dev_dbg(sh_chan->dev, "link descriptor %p will be recycle.\n",
+                               desc);
+
+               list_move(&desc->node, &sh_chan->ld_free);
+               /* Run the link descriptor callback function */
+               if (callback) {
+                       spin_unlock_bh(&sh_chan->desc_lock);
+                       dev_dbg(sh_chan->dev, "link descriptor %p callback\n",
+                                       desc);
+                       callback(callback_param);
+                       spin_lock_bh(&sh_chan->desc_lock);
+               }
+       }
+       spin_unlock_bh(&sh_chan->desc_lock);
+}
+
+static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan)
+{
+       struct list_head *ld_node;
+       struct sh_dmae_regs hw;
+
+       /* DMA work check */
+       if (dmae_is_idle(sh_chan))
+               return;
+
+       /* Find the first un-transfer desciptor */
+       for (ld_node = sh_chan->ld_queue.next;
+               (ld_node != &sh_chan->ld_queue)
+                       && (to_sh_desc(ld_node)->mark == DESC_COMP);
+               ld_node = ld_node->next)
+               cpu_relax();
+
+       if (ld_node != &sh_chan->ld_queue) {
+               /* Get the ld start address from ld_queue */
+               hw = to_sh_desc(ld_node)->hw;
+               dmae_set_reg(sh_chan, hw);
+               dmae_start(sh_chan);
+       }
+}
+
+static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan)
+{
+       struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+       sh_chan_xfer_ld_queue(sh_chan);
+}
+
+static enum dma_status sh_dmae_is_complete(struct dma_chan *chan,
+                                       dma_cookie_t cookie,
+                                       dma_cookie_t *done,
+                                       dma_cookie_t *used)
+{
+       struct sh_dmae_chan *sh_chan = to_sh_chan(chan);
+       dma_cookie_t last_used;
+       dma_cookie_t last_complete;
+
+       sh_dmae_chan_ld_cleanup(sh_chan);
+
+       last_used = chan->cookie;
+       last_complete = sh_chan->completed_cookie;
+       if (last_complete == -EBUSY)
+               last_complete = last_used;
+
+       if (done)
+               *done = last_complete;
+
+       if (used)
+               *used = last_used;
+
+       return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static irqreturn_t sh_dmae_interrupt(int irq, void *data)
+{
+       irqreturn_t ret = IRQ_NONE;
+       struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
+       u32 chcr = sh_dmae_readl(sh_chan, CHCR);
+
+       if (chcr & CHCR_TE) {
+               /* DMA stop */
+               dmae_halt(sh_chan);
+
+               ret = IRQ_HANDLED;
+               tasklet_schedule(&sh_chan->tasklet);
+       }
+
+       return ret;
+}
+
+#if defined(CONFIG_CPU_SH4)
+static irqreturn_t sh_dmae_err(int irq, void *data)
+{
+       int err = 0;
+       struct sh_dmae_device *shdev = (struct sh_dmae_device *)data;
+
+       /* IRQ Multi */
+       if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+               int cnt = 0;
+               switch (irq) {
+#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ)
+               case DMTE6_IRQ:
+                       cnt++;
+#endif
+               case DMTE0_IRQ:
+                       if (dmaor_read_reg(cnt) & (DMAOR_NMIF | DMAOR_AE)) {
+                               disable_irq(irq);
+                               return IRQ_HANDLED;
+                       }
+               default:
+                       return IRQ_NONE;
+               }
+       } else {
+               /* reset dma controller */
+               err = sh_dmae_rst(0);
+               if (err)
+                       return err;
+               if (shdev->pdata.mode & SHDMA_DMAOR1) {
+                       err = sh_dmae_rst(1);
+                       if (err)
+                               return err;
+               }
+               disable_irq(irq);
+               return IRQ_HANDLED;
+       }
+}
+#endif
+
+static void dmae_do_tasklet(unsigned long data)
+{
+       struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data;
+       struct sh_desc *desc, *_desc, *cur_desc = NULL;
+       u32 sar_buf = sh_dmae_readl(sh_chan, SAR);
+       list_for_each_entry_safe(desc, _desc,
+                                       &sh_chan->ld_queue, node) {
+               if ((desc->hw.sar + desc->hw.tcr) == sar_buf) {
+                       cur_desc = desc;
+                       break;
+               }
+       }
+
+       if (cur_desc) {
+               switch (cur_desc->async_tx.cookie) {
+               case 0: /* other desc data */
+                       break;
+               case -EBUSY: /* last desc */
+               sh_chan->completed_cookie =
+                               cur_desc->async_tx.cookie;
+                       break;
+               default: /* first desc ( 0 < )*/
+                       sh_chan->completed_cookie =
+                               cur_desc->async_tx.cookie - 1;
+                       break;
+               }
+               cur_desc->mark = DESC_COMP;
+       }
+       /* Next desc */
+       sh_chan_xfer_ld_queue(sh_chan);
+       sh_dmae_chan_ld_cleanup(sh_chan);
+}
+
+static unsigned int get_dmae_irq(unsigned int id)
+{
+       unsigned int irq = 0;
+       if (id < ARRAY_SIZE(dmte_irq_map))
+               irq = dmte_irq_map[id];
+       return irq;
+}
+
+static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id)
+{
+       int err;
+       unsigned int irq = get_dmae_irq(id);
+       unsigned long irqflags = IRQF_DISABLED;
+       struct sh_dmae_chan *new_sh_chan;
+
+       /* alloc channel */
+       new_sh_chan = kzalloc(sizeof(struct sh_dmae_chan), GFP_KERNEL);
+       if (!new_sh_chan) {
+               dev_err(shdev->common.dev, "No free memory for allocating "
+                               "dma channels!\n");
+               return -ENOMEM;
+       }
+
+       new_sh_chan->dev = shdev->common.dev;
+       new_sh_chan->id = id;
+
+       /* Init DMA tasklet */
+       tasklet_init(&new_sh_chan->tasklet, dmae_do_tasklet,
+                       (unsigned long)new_sh_chan);
+
+       /* Init the channel */
+       dmae_init(new_sh_chan);
+
+       spin_lock_init(&new_sh_chan->desc_lock);
+
+       /* Init descripter manage list */
+       INIT_LIST_HEAD(&new_sh_chan->ld_queue);
+       INIT_LIST_HEAD(&new_sh_chan->ld_free);
+
+       /* copy struct dma_device */
+       new_sh_chan->common.device = &shdev->common;
+
+       /* Add the channel to DMA device channel list */
+       list_add_tail(&new_sh_chan->common.device_node,
+                       &shdev->common.channels);
+       shdev->common.chancnt++;
+
+       if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+               irqflags = IRQF_SHARED;
+#if defined(DMTE6_IRQ)
+               if (irq >= DMTE6_IRQ)
+                       irq = DMTE6_IRQ;
+               else
+#endif
+                       irq = DMTE0_IRQ;
+       }
+
+       snprintf(new_sh_chan->dev_id, sizeof(new_sh_chan->dev_id),
+                       "sh-dmae%d", new_sh_chan->id);
+
+       /* set up channel irq */
+       err = request_irq(irq, &sh_dmae_interrupt,
+               irqflags, new_sh_chan->dev_id, new_sh_chan);
+       if (err) {
+               dev_err(shdev->common.dev, "DMA channel %d request_irq error "
+                       "with return %d\n", id, err);
+               goto err_no_irq;
+       }
+
+       /* CHCR register control function */
+       new_sh_chan->set_chcr = dmae_set_chcr;
+       /* DMARS register control function */
+       new_sh_chan->set_dmars = dmae_set_dmars;
+
+       shdev->chan[id] = new_sh_chan;
+       return 0;
+
+err_no_irq:
+       /* remove from dmaengine device node */
+       list_del(&new_sh_chan->common.device_node);
+       kfree(new_sh_chan);
+       return err;
+}
+
+static void sh_dmae_chan_remove(struct sh_dmae_device *shdev)
+{
+       int i;
+
+       for (i = shdev->common.chancnt - 1 ; i >= 0 ; i--) {
+               if (shdev->chan[i]) {
+                       struct sh_dmae_chan *shchan = shdev->chan[i];
+                       if (!(shdev->pdata.mode & SHDMA_MIX_IRQ))
+                               free_irq(dmte_irq_map[i], shchan);
+
+                       list_del(&shchan->common.device_node);
+                       kfree(shchan);
+                       shdev->chan[i] = NULL;
+               }
+       }
+       shdev->common.chancnt = 0;
+}
+
+static int __init sh_dmae_probe(struct platform_device *pdev)
+{
+       int err = 0, cnt, ecnt;
+       unsigned long irqflags = IRQF_DISABLED;
+#if defined(CONFIG_CPU_SH4)
+       int eirq[] = { DMAE0_IRQ,
+#if defined(DMAE1_IRQ)
+                       DMAE1_IRQ
+#endif
+               };
+#endif
+       struct sh_dmae_device *shdev;
+
+       shdev = kzalloc(sizeof(struct sh_dmae_device), GFP_KERNEL);
+       if (!shdev) {
+               dev_err(&pdev->dev, "No enough memory\n");
+               err = -ENOMEM;
+               goto shdev_err;
+       }
+
+       /* get platform data */
+       if (!pdev->dev.platform_data)
+               goto shdev_err;
+
+       /* platform data */
+       memcpy(&shdev->pdata, pdev->dev.platform_data,
+                       sizeof(struct sh_dmae_pdata));
+
+       /* reset dma controller */
+       err = sh_dmae_rst(0);
+       if (err)
+               goto rst_err;
+
+       /* SH7780/85/23 has DMAOR1 */
+       if (shdev->pdata.mode & SHDMA_DMAOR1) {
+               err = sh_dmae_rst(1);
+               if (err)
+                       goto rst_err;
+       }
+
+       INIT_LIST_HEAD(&shdev->common.channels);
+
+       dma_cap_set(DMA_MEMCPY, shdev->common.cap_mask);
+       shdev->common.device_alloc_chan_resources
+               = sh_dmae_alloc_chan_resources;
+       shdev->common.device_free_chan_resources = sh_dmae_free_chan_resources;
+       shdev->common.device_prep_dma_memcpy = sh_dmae_prep_memcpy;
+       shdev->common.device_is_tx_complete = sh_dmae_is_complete;
+       shdev->common.device_issue_pending = sh_dmae_memcpy_issue_pending;
+       shdev->common.dev = &pdev->dev;
+
+#if defined(CONFIG_CPU_SH4)
+       /* Non Mix IRQ mode SH7722/SH7730 etc... */
+       if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+               irqflags = IRQF_SHARED;
+               eirq[0] = DMTE0_IRQ;
+#if defined(DMTE6_IRQ) && defined(DMAE1_IRQ)
+               eirq[1] = DMTE6_IRQ;
+#endif
+       }
+
+       for (ecnt = 0 ; ecnt < ARRAY_SIZE(eirq); ecnt++) {
+               err = request_irq(eirq[ecnt], sh_dmae_err,
+                       irqflags, "DMAC Address Error", shdev);
+               if (err) {
+                       dev_err(&pdev->dev, "DMA device request_irq"
+                               "error (irq %d) with return %d\n",
+                               eirq[ecnt], err);
+                       goto eirq_err;
+               }
+       }
+#endif /* CONFIG_CPU_SH4 */
+
+       /* Create DMA Channel */
+       for (cnt = 0 ; cnt < MAX_DMA_CHANNELS ; cnt++) {
+               err = sh_dmae_chan_probe(shdev, cnt);
+               if (err)
+                       goto chan_probe_err;
+       }
+
+       platform_set_drvdata(pdev, shdev);
+       dma_async_device_register(&shdev->common);
+
+       return err;
+
+chan_probe_err:
+       sh_dmae_chan_remove(shdev);
+
+eirq_err:
+       for (ecnt-- ; ecnt >= 0; ecnt--)
+               free_irq(eirq[ecnt], shdev);
+
+rst_err:
+       kfree(shdev);
+
+shdev_err:
+       return err;
+}
+
+static int __exit sh_dmae_remove(struct platform_device *pdev)
+{
+       struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
+
+       dma_async_device_unregister(&shdev->common);
+
+       if (shdev->pdata.mode & SHDMA_MIX_IRQ) {
+               free_irq(DMTE0_IRQ, shdev);
+#if defined(DMTE6_IRQ)
+               free_irq(DMTE6_IRQ, shdev);
+#endif
+       }
+
+       /* channel data remove */
+       sh_dmae_chan_remove(shdev);
+
+       if (!(shdev->pdata.mode & SHDMA_MIX_IRQ)) {
+               free_irq(DMAE0_IRQ, shdev);
+#if defined(DMAE1_IRQ)
+               free_irq(DMAE1_IRQ, shdev);
+#endif
+       }
+       kfree(shdev);
+
+       return 0;
+}
+
+static void sh_dmae_shutdown(struct platform_device *pdev)
+{
+       struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
+       sh_dmae_ctl_stop(0);
+       if (shdev->pdata.mode & SHDMA_DMAOR1)
+               sh_dmae_ctl_stop(1);
+}
+
+static struct platform_driver sh_dmae_driver = {
+       .remove         = __exit_p(sh_dmae_remove),
+       .shutdown       = sh_dmae_shutdown,
+       .driver = {
+               .name   = "sh-dma-engine",
+       },
+};
+
+static int __init sh_dmae_init(void)
+{
+       return platform_driver_probe(&sh_dmae_driver, sh_dmae_probe);
+}
+module_init(sh_dmae_init);
+
+static void __exit sh_dmae_exit(void)
+{
+       platform_driver_unregister(&sh_dmae_driver);
+}
+module_exit(sh_dmae_exit);
+
+MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>");
+MODULE_DESCRIPTION("Renesas SH DMA Engine driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h
new file mode 100644 (file)
index 0000000..2b4bc15
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * Renesas SuperH DMA Engine support
+ *
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+#ifndef __DMA_SHDMA_H
+#define __DMA_SHDMA_H
+
+#include <linux/device.h>
+#include <linux/dmapool.h>
+#include <linux/dmaengine.h>
+
+#define SH_DMA_TCR_MAX 0x00FFFFFF      /* 16MB */
+
+struct sh_dmae_regs {
+       u32 sar; /* SAR / source address */
+       u32 dar; /* DAR / destination address */
+       u32 tcr; /* TCR / transfer count */
+};
+
+struct sh_desc {
+       struct list_head tx_list;
+       struct sh_dmae_regs hw;
+       struct list_head node;
+       struct dma_async_tx_descriptor async_tx;
+       int mark;
+};
+
+struct sh_dmae_chan {
+       dma_cookie_t completed_cookie;  /* The maximum cookie completed */
+       spinlock_t desc_lock;                   /* Descriptor operation lock */
+       struct list_head ld_queue;              /* Link descriptors queue */
+       struct list_head ld_free;               /* Link descriptors free */
+       struct dma_chan common;                 /* DMA common channel */
+       struct device *dev;                             /* Channel device */
+       struct tasklet_struct tasklet;  /* Tasklet */
+       int descs_allocated;                    /* desc count */
+       int id;                         /* Raw id of this channel */
+       char dev_id[16];        /* unique name per DMAC of channel */
+
+       /* Set chcr */
+       int (*set_chcr)(struct sh_dmae_chan *sh_chan, u32 regs);
+       /* Set DMA resource */
+       int (*set_dmars)(struct sh_dmae_chan *sh_chan, u16 res);
+};
+
+struct sh_dmae_device {
+       struct dma_device common;
+       struct sh_dmae_chan *chan[MAX_DMA_CHANNELS];
+       struct sh_dmae_pdata pdata;
+};
+
+#define to_sh_chan(chan) container_of(chan, struct sh_dmae_chan, common)
+#define to_sh_desc(lh) container_of(lh, struct sh_desc, node)
+#define tx_to_sh_desc(tx) container_of(tx, struct sh_desc, async_tx)
+
+#endif /* __DMA_SHDMA_H */
index 7837930..fb6bb64 100644 (file)
@@ -180,9 +180,8 @@ static struct txx9dmac_desc *txx9dmac_first_queued(struct txx9dmac_chan *dc)
 
 static struct txx9dmac_desc *txx9dmac_last_child(struct txx9dmac_desc *desc)
 {
-       if (!list_empty(&desc->txd.tx_list))
-               desc = list_entry(desc->txd.tx_list.prev,
-                                 struct txx9dmac_desc, desc_node);
+       if (!list_empty(&desc->tx_list))
+               desc = list_entry(desc->tx_list.prev, typeof(*desc), desc_node);
        return desc;
 }
 
@@ -197,6 +196,7 @@ static struct txx9dmac_desc *txx9dmac_desc_alloc(struct txx9dmac_chan *dc,
        desc = kzalloc(sizeof(*desc), flags);
        if (!desc)
                return NULL;
+       INIT_LIST_HEAD(&desc->tx_list);
        dma_async_tx_descriptor_init(&desc->txd, &dc->chan);
        desc->txd.tx_submit = txx9dmac_tx_submit;
        /* txd.flags will be overwritten in prep funcs */
@@ -245,7 +245,7 @@ static void txx9dmac_sync_desc_for_cpu(struct txx9dmac_chan *dc,
        struct txx9dmac_dev *ddev = dc->ddev;
        struct txx9dmac_desc *child;
 
-       list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+       list_for_each_entry(child, &desc->tx_list, desc_node)
                dma_sync_single_for_cpu(chan2parent(&dc->chan),
                                child->txd.phys, ddev->descsize,
                                DMA_TO_DEVICE);
@@ -267,11 +267,11 @@ static void txx9dmac_desc_put(struct txx9dmac_chan *dc,
                txx9dmac_sync_desc_for_cpu(dc, desc);
 
                spin_lock_bh(&dc->lock);
-               list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+               list_for_each_entry(child, &desc->tx_list, desc_node)
                        dev_vdbg(chan2dev(&dc->chan),
                                 "moving child desc %p to freelist\n",
                                 child);
-               list_splice_init(&desc->txd.tx_list, &dc->free_list);
+               list_splice_init(&desc->tx_list, &dc->free_list);
                dev_vdbg(chan2dev(&dc->chan), "moving desc %p to freelist\n",
                         desc);
                list_add(&desc->desc_node, &dc->free_list);
@@ -429,7 +429,7 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc,
        param = txd->callback_param;
 
        txx9dmac_sync_desc_for_cpu(dc, desc);
-       list_splice_init(&txd->tx_list, &dc->free_list);
+       list_splice_init(&desc->tx_list, &dc->free_list);
        list_move(&desc->desc_node, &dc->free_list);
 
        if (!ds) {
@@ -571,7 +571,7 @@ static void txx9dmac_handle_error(struct txx9dmac_chan *dc, u32 csr)
                 "Bad descriptor submitted for DMA! (cookie: %d)\n",
                 bad_desc->txd.cookie);
        txx9dmac_dump_desc(dc, &bad_desc->hwdesc);
-       list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node)
+       list_for_each_entry(child, &bad_desc->tx_list, desc_node)
                txx9dmac_dump_desc(dc, &child->hwdesc);
        /* Pretend the descriptor completed successfully */
        txx9dmac_descriptor_complete(dc, bad_desc);
@@ -613,7 +613,7 @@ static void txx9dmac_scan_descriptors(struct txx9dmac_chan *dc)
                        return;
                }
 
-               list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+               list_for_each_entry(child, &desc->tx_list, desc_node)
                        if (desc_read_CHAR(dc, child) == chain) {
                                /* Currently in progress */
                                if (csr & TXX9_DMA_CSR_ABCHC)
@@ -823,8 +823,7 @@ txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
                        dma_sync_single_for_device(chan2parent(&dc->chan),
                                        prev->txd.phys, ddev->descsize,
                                        DMA_TO_DEVICE);
-                       list_add_tail(&desc->desc_node,
-                                       &first->txd.tx_list);
+                       list_add_tail(&desc->desc_node, &first->tx_list);
                }
                prev = desc;
        }
@@ -919,8 +918,7 @@ txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
                                        prev->txd.phys,
                                        ddev->descsize,
                                        DMA_TO_DEVICE);
-                       list_add_tail(&desc->desc_node,
-                                       &first->txd.tx_list);
+                       list_add_tail(&desc->desc_node, &first->tx_list);
                }
                prev = desc;
        }
index c907ff0..365d423 100644 (file)
@@ -231,6 +231,7 @@ struct txx9dmac_desc {
 
        /* THEN values for driver housekeeping */
        struct list_head                desc_node ____cacheline_aligned;
+       struct list_head                tx_list;
        struct dma_async_tx_descriptor  txd;
        size_t                          len;
 };
index a3ca18e..02127e5 100644 (file)
@@ -133,6 +133,13 @@ config EDAC_I3000
          Support for error detection and correction on the Intel
          3000 and 3010 server chipsets.
 
+config EDAC_I3200
+       tristate "Intel 3200"
+       depends on EDAC_MM_EDAC && PCI && X86 && EXPERIMENTAL
+       help
+         Support for error detection and correction on the Intel
+         3200 and 3210 server chipsets.
+
 config EDAC_X38
        tristate "Intel X38"
        depends on EDAC_MM_EDAC && PCI && X86
@@ -176,11 +183,11 @@ config EDAC_I5100
          San Clemente MCH.
 
 config EDAC_MPC85XX
-       tristate "Freescale MPC85xx"
-       depends on EDAC_MM_EDAC && FSL_SOC && MPC85xx
+       tristate "Freescale MPC83xx / MPC85xx"
+       depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || MPC85xx)
        help
          Support for error detection and correction on the Freescale
-         MPC8560, MPC8540, MPC8548
+         MPC8349, MPC8560, MPC8540, MPC8548
 
 config EDAC_MV64X60
        tristate "Marvell MV64x60"
index cfa033c..7a473bb 100644 (file)
@@ -32,6 +32,7 @@ obj-$(CONFIG_EDAC_I82443BXGX)         += i82443bxgx_edac.o
 obj-$(CONFIG_EDAC_I82875P)             += i82875p_edac.o
 obj-$(CONFIG_EDAC_I82975X)             += i82975x_edac.o
 obj-$(CONFIG_EDAC_I3000)               += i3000_edac.o
+obj-$(CONFIG_EDAC_I3200)               += i3200_edac.o
 obj-$(CONFIG_EDAC_X38)                 += x38_edac.o
 obj-$(CONFIG_EDAC_I82860)              += i82860_edac.o
 obj-$(CONFIG_EDAC_R82600)              += r82600_edac.o
@@ -49,3 +50,4 @@ obj-$(CONFIG_EDAC_CELL)                       += cell_edac.o
 obj-$(CONFIG_EDAC_PPC4XX)              += ppc4xx_edac.o
 obj-$(CONFIG_EDAC_AMD8111)             += amd8111_edac.o
 obj-$(CONFIG_EDAC_AMD8131)             += amd8131_edac.o
+
index 8c54196..3d50274 100644 (file)
@@ -885,14 +885,14 @@ static int __devinit cpc925_probe(struct platform_device *pdev)
 
        if (!devm_request_mem_region(&pdev->dev,
                                     r->start,
-                                    r->end - r->start + 1,
+                                    resource_size(r),
                                     pdev->name)) {
                cpc925_printk(KERN_ERR, "Unable to request mem region\n");
                res = -EBUSY;
                goto err1;
        }
 
-       vbase = devm_ioremap(&pdev->dev, r->start, r->end - r->start + 1);
+       vbase = devm_ioremap(&pdev->dev, r->start, resource_size(r));
        if (!vbase) {
                cpc925_printk(KERN_ERR, "Unable to ioremap device\n");
                res = -ENOMEM;
@@ -953,7 +953,7 @@ err3:
        cpc925_mc_exit(mci);
        edac_mc_free(mci);
 err2:
-       devm_release_mem_region(&pdev->dev, r->start, r->end-r->start+1);
+       devm_release_mem_region(&pdev->dev, r->start, resource_size(r));
 err1:
        devres_release_group(&pdev->dev, cpc925_probe);
 out:
index b02a6a6..d5e13c9 100644 (file)
@@ -356,7 +356,6 @@ static void complete_edac_device_list_del(struct rcu_head *head)
 
        edac_dev = container_of(head, struct edac_device_ctl_info, rcu);
        INIT_LIST_HEAD(&edac_dev->link);
-       complete(&edac_dev->removal_complete);
 }
 
 /*
@@ -369,10 +368,8 @@ static void del_edac_device_from_global_list(struct edac_device_ctl_info
                                                *edac_device)
 {
        list_del_rcu(&edac_device->link);
-
-       init_completion(&edac_device->removal_complete);
        call_rcu(&edac_device->rcu, complete_edac_device_list_del);
-       wait_for_completion(&edac_device->removal_complete);
+       rcu_barrier();
 }
 
 /*
index 335b7eb..b629c41 100644 (file)
@@ -418,16 +418,14 @@ static void complete_mc_list_del(struct rcu_head *head)
 
        mci = container_of(head, struct mem_ctl_info, rcu);
        INIT_LIST_HEAD(&mci->link);
-       complete(&mci->complete);
 }
 
 static void del_mc_from_global_list(struct mem_ctl_info *mci)
 {
        atomic_dec(&edac_handlers);
        list_del_rcu(&mci->link);
-       init_completion(&mci->complete);
        call_rcu(&mci->rcu, complete_mc_list_del);
-       wait_for_completion(&mci->complete);
+       rcu_barrier();
 }
 
 /**
index 30b585b..efb5d56 100644 (file)
@@ -174,7 +174,6 @@ static void complete_edac_pci_list_del(struct rcu_head *head)
 
        pci = container_of(head, struct edac_pci_ctl_info, rcu);
        INIT_LIST_HEAD(&pci->link);
-       complete(&pci->complete);
 }
 
 /*
@@ -185,9 +184,8 @@ static void complete_edac_pci_list_del(struct rcu_head *head)
 static void del_edac_pci_from_global_list(struct edac_pci_ctl_info *pci)
 {
        list_del_rcu(&pci->link);
-       init_completion(&pci->complete);
        call_rcu(&pci->rcu, complete_edac_pci_list_del);
-       wait_for_completion(&pci->complete);
+       rcu_barrier();
 }
 
 #if 0
diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c
new file mode 100644 (file)
index 0000000..fde4db9
--- /dev/null
@@ -0,0 +1,527 @@
+/*
+ * Intel 3200/3210 Memory Controller kernel module
+ * Copyright (C) 2008-2009 Akamai Technologies, Inc.
+ * Portions by Hitoshi Mitake <h.mitake@gmail.com>.
+ *
+ * This file may be distributed under the terms of the
+ * GNU General Public License.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/edac.h>
+#include <linux/io.h>
+#include "edac_core.h"
+
+#define I3200_REVISION        "1.1"
+
+#define EDAC_MOD_STR        "i3200_edac"
+
+#define PCI_DEVICE_ID_INTEL_3200_HB    0x29f0
+
+#define I3200_RANKS            8
+#define I3200_RANKS_PER_CHANNEL        4
+#define I3200_CHANNELS         2
+
+/* Intel 3200 register addresses - device 0 function 0 - DRAM Controller */
+
+#define I3200_MCHBAR_LOW       0x48    /* MCH Memory Mapped Register BAR */
+#define I3200_MCHBAR_HIGH      0x4c
+#define I3200_MCHBAR_MASK      0xfffffc000ULL  /* bits 35:14 */
+#define I3200_MMR_WINDOW_SIZE  16384
+
+#define I3200_TOM              0xa0    /* Top of Memory (16b)
+                *
+                * 15:10 reserved
+                *  9:0  total populated physical memory
+                */
+#define I3200_TOM_MASK         0x3ff   /* bits 9:0 */
+#define I3200_TOM_SHIFT                26      /* 64MiB grain */
+
+#define I3200_ERRSTS           0xc8    /* Error Status Register (16b)
+                *
+                * 15    reserved
+                * 14    Isochronous TBWRR Run Behind FIFO Full
+                *       (ITCV)
+                * 13    Isochronous TBWRR Run Behind FIFO Put
+                *       (ITSTV)
+                * 12    reserved
+                * 11    MCH Thermal Sensor Event
+                *       for SMI/SCI/SERR (GTSE)
+                * 10    reserved
+                *  9    LOCK to non-DRAM Memory Flag (LCKF)
+                *  8    reserved
+                *  7    DRAM Throttle Flag (DTF)
+                *  6:2  reserved
+                *  1    Multi-bit DRAM ECC Error Flag (DMERR)
+                *  0    Single-bit DRAM ECC Error Flag (DSERR)
+                */
+#define I3200_ERRSTS_UE                0x0002
+#define I3200_ERRSTS_CE                0x0001
+#define I3200_ERRSTS_BITS      (I3200_ERRSTS_UE | I3200_ERRSTS_CE)
+
+
+/* Intel  MMIO register space - device 0 function 0 - MMR space */
+
+#define I3200_C0DRB    0x200   /* Channel 0 DRAM Rank Boundary (16b x 4)
+                *
+                * 15:10 reserved
+                *  9:0  Channel 0 DRAM Rank Boundary Address
+                */
+#define I3200_C1DRB    0x600   /* Channel 1 DRAM Rank Boundary (16b x 4) */
+#define I3200_DRB_MASK 0x3ff   /* bits 9:0 */
+#define I3200_DRB_SHIFT        26      /* 64MiB grain */
+
+#define I3200_C0ECCERRLOG      0x280   /* Channel 0 ECC Error Log (64b)
+                *
+                * 63:48 Error Column Address (ERRCOL)
+                * 47:32 Error Row Address (ERRROW)
+                * 31:29 Error Bank Address (ERRBANK)
+                * 28:27 Error Rank Address (ERRRANK)
+                * 26:24 reserved
+                * 23:16 Error Syndrome (ERRSYND)
+                * 15: 2 reserved
+                *    1  Multiple Bit Error Status (MERRSTS)
+                *    0  Correctable Error Status (CERRSTS)
+                */
+#define I3200_C1ECCERRLOG              0x680   /* Chan 1 ECC Error Log (64b) */
+#define I3200_ECCERRLOG_CE             0x1
+#define I3200_ECCERRLOG_UE             0x2
+#define I3200_ECCERRLOG_RANK_BITS      0x18000000
+#define I3200_ECCERRLOG_RANK_SHIFT     27
+#define I3200_ECCERRLOG_SYNDROME_BITS  0xff0000
+#define I3200_ECCERRLOG_SYNDROME_SHIFT 16
+#define I3200_CAPID0                   0xe0    /* P.95 of spec for details */
+
+struct i3200_priv {
+       void __iomem *window;
+};
+
+static int nr_channels;
+
+static int how_many_channels(struct pci_dev *pdev)
+{
+       unsigned char capid0_8b; /* 8th byte of CAPID0 */
+
+       pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b);
+       if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */
+               debugf0("In single channel mode.\n");
+               return 1;
+       } else {
+               debugf0("In dual channel mode.\n");
+               return 2;
+       }
+}
+
+static unsigned long eccerrlog_syndrome(u64 log)
+{
+       return (log & I3200_ECCERRLOG_SYNDROME_BITS) >>
+               I3200_ECCERRLOG_SYNDROME_SHIFT;
+}
+
+static int eccerrlog_row(int channel, u64 log)
+{
+       u64 rank = ((log & I3200_ECCERRLOG_RANK_BITS) >>
+               I3200_ECCERRLOG_RANK_SHIFT);
+       return rank | (channel * I3200_RANKS_PER_CHANNEL);
+}
+
+enum i3200_chips {
+       I3200 = 0,
+};
+
+struct i3200_dev_info {
+       const char *ctl_name;
+};
+
+struct i3200_error_info {
+       u16 errsts;
+       u16 errsts2;
+       u64 eccerrlog[I3200_CHANNELS];
+};
+
+static const struct i3200_dev_info i3200_devs[] = {
+       [I3200] = {
+               .ctl_name = "i3200"
+       },
+};
+
+static struct pci_dev *mci_pdev;
+static int i3200_registered = 1;
+
+
+static void i3200_clear_error_info(struct mem_ctl_info *mci)
+{
+       struct pci_dev *pdev;
+
+       pdev = to_pci_dev(mci->dev);
+
+       /*
+        * Clear any error bits.
+        * (Yes, we really clear bits by writing 1 to them.)
+        */
+       pci_write_bits16(pdev, I3200_ERRSTS, I3200_ERRSTS_BITS,
+               I3200_ERRSTS_BITS);
+}
+
+static void i3200_get_and_clear_error_info(struct mem_ctl_info *mci,
+               struct i3200_error_info *info)
+{
+       struct pci_dev *pdev;
+       struct i3200_priv *priv = mci->pvt_info;
+       void __iomem *window = priv->window;
+
+       pdev = to_pci_dev(mci->dev);
+
+       /*
+        * This is a mess because there is no atomic way to read all the
+        * registers at once and the registers can transition from CE being
+        * overwritten by UE.
+        */
+       pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts);
+       if (!(info->errsts & I3200_ERRSTS_BITS))
+               return;
+
+       info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
+       if (nr_channels == 2)
+               info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
+
+       pci_read_config_word(pdev, I3200_ERRSTS, &info->errsts2);
+
+       /*
+        * If the error is the same for both reads then the first set
+        * of reads is valid.  If there is a change then there is a CE
+        * with no info and the second set of reads is valid and
+        * should be UE info.
+        */
+       if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
+               info->eccerrlog[0] = readq(window + I3200_C0ECCERRLOG);
+               if (nr_channels == 2)
+                       info->eccerrlog[1] = readq(window + I3200_C1ECCERRLOG);
+       }
+
+       i3200_clear_error_info(mci);
+}
+
+static void i3200_process_error_info(struct mem_ctl_info *mci,
+               struct i3200_error_info *info)
+{
+       int channel;
+       u64 log;
+
+       if (!(info->errsts & I3200_ERRSTS_BITS))
+               return;
+
+       if ((info->errsts ^ info->errsts2) & I3200_ERRSTS_BITS) {
+               edac_mc_handle_ce_no_info(mci, "UE overwrote CE");
+               info->errsts = info->errsts2;
+       }
+
+       for (channel = 0; channel < nr_channels; channel++) {
+               log = info->eccerrlog[channel];
+               if (log & I3200_ECCERRLOG_UE) {
+                       edac_mc_handle_ue(mci, 0, 0,
+                               eccerrlog_row(channel, log),
+                               "i3200 UE");
+               } else if (log & I3200_ECCERRLOG_CE) {
+                       edac_mc_handle_ce(mci, 0, 0,
+                               eccerrlog_syndrome(log),
+                               eccerrlog_row(channel, log), 0,
+                               "i3200 CE");
+               }
+       }
+}
+
+static void i3200_check(struct mem_ctl_info *mci)
+{
+       struct i3200_error_info info;
+
+       debugf1("MC%d: %s()\n", mci->mc_idx, __func__);
+       i3200_get_and_clear_error_info(mci, &info);
+       i3200_process_error_info(mci, &info);
+}
+
+
+void __iomem *i3200_map_mchbar(struct pci_dev *pdev)
+{
+       union {
+               u64 mchbar;
+               struct {
+                       u32 mchbar_low;
+                       u32 mchbar_high;
+               };
+       } u;
+       void __iomem *window;
+
+       pci_read_config_dword(pdev, I3200_MCHBAR_LOW, &u.mchbar_low);
+       pci_read_config_dword(pdev, I3200_MCHBAR_HIGH, &u.mchbar_high);
+       u.mchbar &= I3200_MCHBAR_MASK;
+
+       if (u.mchbar != (resource_size_t)u.mchbar) {
+               printk(KERN_ERR
+                       "i3200: mmio space beyond accessible range (0x%llx)\n",
+                       (unsigned long long)u.mchbar);
+               return NULL;
+       }
+
+       window = ioremap_nocache(u.mchbar, I3200_MMR_WINDOW_SIZE);
+       if (!window)
+               printk(KERN_ERR "i3200: cannot map mmio space at 0x%llx\n",
+                       (unsigned long long)u.mchbar);
+
+       return window;
+}
+
+
+static void i3200_get_drbs(void __iomem *window,
+       u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
+{
+       int i;
+
+       for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) {
+               drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK;
+               drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK;
+       }
+}
+
+static bool i3200_is_stacked(struct pci_dev *pdev,
+       u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL])
+{
+       u16 tom;
+
+       pci_read_config_word(pdev, I3200_TOM, &tom);
+       tom &= I3200_TOM_MASK;
+
+       return drbs[I3200_CHANNELS - 1][I3200_RANKS_PER_CHANNEL - 1] == tom;
+}
+
+static unsigned long drb_to_nr_pages(
+       u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL], bool stacked,
+       int channel, int rank)
+{
+       int n;
+
+       n = drbs[channel][rank];
+       if (rank > 0)
+               n -= drbs[channel][rank - 1];
+       if (stacked && (channel == 1) &&
+       drbs[channel][rank] == drbs[channel][I3200_RANKS_PER_CHANNEL - 1])
+               n -= drbs[0][I3200_RANKS_PER_CHANNEL - 1];
+
+       n <<= (I3200_DRB_SHIFT - PAGE_SHIFT);
+       return n;
+}
+
+static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
+{
+       int rc;
+       int i;
+       struct mem_ctl_info *mci = NULL;
+       unsigned long last_page;
+       u16 drbs[I3200_CHANNELS][I3200_RANKS_PER_CHANNEL];
+       bool stacked;
+       void __iomem *window;
+       struct i3200_priv *priv;
+
+       debugf0("MC: %s()\n", __func__);
+
+       window = i3200_map_mchbar(pdev);
+       if (!window)
+               return -ENODEV;
+
+       i3200_get_drbs(window, drbs);
+       nr_channels = how_many_channels(pdev);
+
+       mci = edac_mc_alloc(sizeof(struct i3200_priv), I3200_RANKS,
+               nr_channels, 0);
+       if (!mci)
+               return -ENOMEM;
+
+       debugf3("MC: %s(): init mci\n", __func__);
+
+       mci->dev = &pdev->dev;
+       mci->mtype_cap = MEM_FLAG_DDR2;
+
+       mci->edac_ctl_cap = EDAC_FLAG_SECDED;
+       mci->edac_cap = EDAC_FLAG_SECDED;
+
+       mci->mod_name = EDAC_MOD_STR;
+       mci->mod_ver = I3200_REVISION;
+       mci->ctl_name = i3200_devs[dev_idx].ctl_name;
+       mci->dev_name = pci_name(pdev);
+       mci->edac_check = i3200_check;
+       mci->ctl_page_to_phys = NULL;
+       priv = mci->pvt_info;
+       priv->window = window;
+
+       stacked = i3200_is_stacked(pdev, drbs);
+
+       /*
+        * The dram rank boundary (DRB) reg values are boundary addresses
+        * for each DRAM rank with a granularity of 64MB.  DRB regs are
+        * cumulative; the last one will contain the total memory
+        * contained in all ranks.
+        */
+       last_page = -1UL;
+       for (i = 0; i < mci->nr_csrows; i++) {
+               unsigned long nr_pages;
+               struct csrow_info *csrow = &mci->csrows[i];
+
+               nr_pages = drb_to_nr_pages(drbs, stacked,
+                       i / I3200_RANKS_PER_CHANNEL,
+                       i % I3200_RANKS_PER_CHANNEL);
+
+               if (nr_pages == 0) {
+                       csrow->mtype = MEM_EMPTY;
+                       continue;
+               }
+
+               csrow->first_page = last_page + 1;
+               last_page += nr_pages;
+               csrow->last_page = last_page;
+               csrow->nr_pages = nr_pages;
+
+               csrow->grain = nr_pages << PAGE_SHIFT;
+               csrow->mtype = MEM_DDR2;
+               csrow->dtype = DEV_UNKNOWN;
+               csrow->edac_mode = EDAC_UNKNOWN;
+       }
+
+       i3200_clear_error_info(mci);
+
+       rc = -ENODEV;
+       if (edac_mc_add_mc(mci)) {
+               debugf3("MC: %s(): failed edac_mc_add_mc()\n", __func__);
+               goto fail;
+       }
+
+       /* get this far and it's successful */
+       debugf3("MC: %s(): success\n", __func__);
+       return 0;
+
+fail:
+       iounmap(window);
+       if (mci)
+               edac_mc_free(mci);
+
+       return rc;
+}
+
+static int __devinit i3200_init_one(struct pci_dev *pdev,
+               const struct pci_device_id *ent)
+{
+       int rc;
+
+       debugf0("MC: %s()\n", __func__);
+
+       if (pci_enable_device(pdev) < 0)
+               return -EIO;
+
+       rc = i3200_probe1(pdev, ent->driver_data);
+       if (!mci_pdev)
+               mci_pdev = pci_dev_get(pdev);
+
+       return rc;
+}
+
+static void __devexit i3200_remove_one(struct pci_dev *pdev)
+{
+       struct mem_ctl_info *mci;
+       struct i3200_priv *priv;
+
+       debugf0("%s()\n", __func__);
+
+       mci = edac_mc_del_mc(&pdev->dev);
+       if (!mci)
+               return;
+
+       priv = mci->pvt_info;
+       iounmap(priv->window);
+
+       edac_mc_free(mci);
+}
+
+static const struct pci_device_id i3200_pci_tbl[] __devinitdata = {
+       {
+               PCI_VEND_DEV(INTEL, 3200_HB), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+               I3200},
+       {
+               0,
+       }            /* 0 terminated list. */
+};
+
+MODULE_DEVICE_TABLE(pci, i3200_pci_tbl);
+
+static struct pci_driver i3200_driver = {
+       .name = EDAC_MOD_STR,
+       .probe = i3200_init_one,
+       .remove = __devexit_p(i3200_remove_one),
+       .id_table = i3200_pci_tbl,
+};
+
+static int __init i3200_init(void)
+{
+       int pci_rc;
+
+       debugf3("MC: %s()\n", __func__);
+
+       /* Ensure that the OPSTATE is set correctly for POLL or NMI */
+       opstate_init();
+
+       pci_rc = pci_register_driver(&i3200_driver);
+       if (pci_rc < 0)
+               goto fail0;
+
+       if (!mci_pdev) {
+               i3200_registered = 0;
+               mci_pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+                               PCI_DEVICE_ID_INTEL_3200_HB, NULL);
+               if (!mci_pdev) {
+                       debugf0("i3200 pci_get_device fail\n");
+                       pci_rc = -ENODEV;
+                       goto fail1;
+               }
+
+               pci_rc = i3200_init_one(mci_pdev, i3200_pci_tbl);
+               if (pci_rc < 0) {
+                       debugf0("i3200 init fail\n");
+                       pci_rc = -ENODEV;
+                       goto fail1;
+               }
+       }
+
+       return 0;
+
+fail1:
+       pci_unregister_driver(&i3200_driver);
+
+fail0:
+       if (mci_pdev)
+               pci_dev_put(mci_pdev);
+
+       return pci_rc;
+}
+
+static void __exit i3200_exit(void)
+{
+       debugf3("MC: %s()\n", __func__);
+
+       pci_unregister_driver(&i3200_driver);
+       if (!i3200_registered) {
+               i3200_remove_one(mci_pdev);
+               pci_dev_put(mci_pdev);
+       }
+}
+
+module_init(i3200_init);
+module_exit(i3200_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Akamai Technologies, Inc.");
+MODULE_DESCRIPTION("MC support for Intel 3200 memory hub controllers");
+
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
index 3f2ccfc..157f650 100644 (file)
@@ -41,7 +41,9 @@ static u32 orig_pci_err_en;
 #endif
 
 static u32 orig_l2_err_disable;
+#ifdef CONFIG_MPC85xx
 static u32 orig_hid1[2];
+#endif
 
 /************************ MC SYSFS parts ***********************************/
 
@@ -646,6 +648,7 @@ static struct of_device_id mpc85xx_l2_err_of_match[] = {
        { .compatible = "fsl,mpc8560-l2-cache-controller", },
        { .compatible = "fsl,mpc8568-l2-cache-controller", },
        { .compatible = "fsl,mpc8572-l2-cache-controller", },
+       { .compatible = "fsl,p2020-l2-cache-controller", },
        {},
 };
 
@@ -788,19 +791,20 @@ static void __devinit mpc85xx_init_csrows(struct mem_ctl_info *mci)
                csrow = &mci->csrows[index];
                cs_bnds = in_be32(pdata->mc_vbase + MPC85XX_MC_CS_BNDS_0 +
                                  (index * MPC85XX_MC_CS_BNDS_OFS));
-               start = (cs_bnds & 0xfff0000) << 4;
-               end = ((cs_bnds & 0xfff) << 20);
-               if (start)
-                       start |= 0xfffff;
-               if (end)
-                       end |= 0xfffff;
+
+               start = (cs_bnds & 0xffff0000) >> 16;
+               end   = (cs_bnds & 0x0000ffff);
 
                if (start == end)
                        continue;       /* not populated */
 
+               start <<= (24 - PAGE_SHIFT);
+               end   <<= (24 - PAGE_SHIFT);
+               end    |= (1 << (24 - PAGE_SHIFT)) - 1;
+
                csrow->first_page = start >> PAGE_SHIFT;
                csrow->last_page = end >> PAGE_SHIFT;
-               csrow->nr_pages = csrow->last_page + 1 - csrow->first_page;
+               csrow->nr_pages = end + 1 - start;
                csrow->grain = 8;
                csrow->mtype = mtype;
                csrow->dtype = DEV_UNKNOWN;
@@ -984,6 +988,8 @@ static struct of_device_id mpc85xx_mc_err_of_match[] = {
        { .compatible = "fsl,mpc8560-memory-controller", },
        { .compatible = "fsl,mpc8568-memory-controller", },
        { .compatible = "fsl,mpc8572-memory-controller", },
+       { .compatible = "fsl,mpc8349-memory-controller", },
+       { .compatible = "fsl,p2020-memory-controller", },
        {},
 };
 
@@ -999,13 +1005,13 @@ static struct of_platform_driver mpc85xx_mc_err_driver = {
                   },
 };
 
-
+#ifdef CONFIG_MPC85xx
 static void __init mpc85xx_mc_clear_rfxe(void *data)
 {
        orig_hid1[smp_processor_id()] = mfspr(SPRN_HID1);
        mtspr(SPRN_HID1, (orig_hid1[smp_processor_id()] & ~0x20000));
 }
-
+#endif
 
 static int __init mpc85xx_mc_init(void)
 {
@@ -1038,26 +1044,32 @@ static int __init mpc85xx_mc_init(void)
                printk(KERN_WARNING EDAC_MOD_STR "PCI fails to register\n");
 #endif
 
+#ifdef CONFIG_MPC85xx
        /*
         * need to clear HID1[RFXE] to disable machine check int
         * so we can catch it
         */
        if (edac_op_state == EDAC_OPSTATE_INT)
                on_each_cpu(mpc85xx_mc_clear_rfxe, NULL, 0);
+#endif
 
        return 0;
 }
 
 module_init(mpc85xx_mc_init);
 
+#ifdef CONFIG_MPC85xx
 static void __exit mpc85xx_mc_restore_hid1(void *data)
 {
        mtspr(SPRN_HID1, orig_hid1[smp_processor_id()]);
 }
+#endif
 
 static void __exit mpc85xx_mc_exit(void)
 {
+#ifdef CONFIG_MPC85xx
        on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0);
+#endif
 #ifdef CONFIG_PCI
        of_unregister_platform_driver(&mpc85xx_pci_err_driver);
 #endif
index 5131aaa..a6b9fec 100644 (file)
@@ -90,7 +90,7 @@ static int __init mv64x60_pci_fixup(struct platform_device *pdev)
                return -ENOENT;
        }
 
-       pci_serr = ioremap(r->start, r->end - r->start + 1);
+       pci_serr = ioremap(r->start, resource_size(r));
        if (!pci_serr)
                return -ENOMEM;
 
@@ -140,7 +140,7 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev)
 
        if (!devm_request_mem_region(&pdev->dev,
                                     r->start,
-                                    r->end - r->start + 1,
+                                    resource_size(r),
                                     pdata->name)) {
                printk(KERN_ERR "%s: Error while requesting mem region\n",
                       __func__);
@@ -150,7 +150,7 @@ static int __devinit mv64x60_pci_err_probe(struct platform_device *pdev)
 
        pdata->pci_vbase = devm_ioremap(&pdev->dev,
                                        r->start,
-                                       r->end - r->start + 1);
+                                       resource_size(r));
        if (!pdata->pci_vbase) {
                printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__);
                res = -ENOMEM;
@@ -306,7 +306,7 @@ static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev)
 
        if (!devm_request_mem_region(&pdev->dev,
                                     r->start,
-                                    r->end - r->start + 1,
+                                    resource_size(r),
                                     pdata->name)) {
                printk(KERN_ERR "%s: Error while request mem region\n",
                       __func__);
@@ -316,7 +316,7 @@ static int __devinit mv64x60_sram_err_probe(struct platform_device *pdev)
 
        pdata->sram_vbase = devm_ioremap(&pdev->dev,
                                         r->start,
-                                        r->end - r->start + 1);
+                                        resource_size(r));
        if (!pdata->sram_vbase) {
                printk(KERN_ERR "%s: Unable to setup SRAM err regs\n",
                       __func__);
@@ -474,7 +474,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev)
 
        if (!devm_request_mem_region(&pdev->dev,
                                     r->start,
-                                    r->end - r->start + 1,
+                                    resource_size(r),
                                     pdata->name)) {
                printk(KERN_ERR "%s: Error while requesting mem region\n",
                       __func__);
@@ -484,7 +484,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev)
 
        pdata->cpu_vbase[0] = devm_ioremap(&pdev->dev,
                                           r->start,
-                                          r->end - r->start + 1);
+                                          resource_size(r));
        if (!pdata->cpu_vbase[0]) {
                printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__);
                res = -ENOMEM;
@@ -501,7 +501,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev)
 
        if (!devm_request_mem_region(&pdev->dev,
                                     r->start,
-                                    r->end - r->start + 1,
+                                    resource_size(r),
                                     pdata->name)) {
                printk(KERN_ERR "%s: Error while requesting mem region\n",
                       __func__);
@@ -511,7 +511,7 @@ static int __devinit mv64x60_cpu_err_probe(struct platform_device *pdev)
 
        pdata->cpu_vbase[1] = devm_ioremap(&pdev->dev,
                                           r->start,
-                                          r->end - r->start + 1);
+                                          resource_size(r));
        if (!pdata->cpu_vbase[1]) {
                printk(KERN_ERR "%s: Unable to setup CPU err regs\n", __func__);
                res = -ENOMEM;
@@ -726,7 +726,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev)
 
        if (!devm_request_mem_region(&pdev->dev,
                                     r->start,
-                                    r->end - r->start + 1,
+                                    resource_size(r),
                                     pdata->name)) {
                printk(KERN_ERR "%s: Error while requesting mem region\n",
                       __func__);
@@ -736,7 +736,7 @@ static int __devinit mv64x60_mc_err_probe(struct platform_device *pdev)
 
        pdata->mc_vbase = devm_ioremap(&pdev->dev,
                                       r->start,
-                                      r->end - r->start + 1);
+                                      resource_size(r));
        if (!pdata->mc_vbase) {
                printk(KERN_ERR "%s: Unable to setup MC err regs\n", __func__);
                res = -ENOMEM;
index e4d971c..f831ea1 100644 (file)
@@ -102,6 +102,7 @@ config DRM_I915
        select BACKLIGHT_CLASS_DEVICE if ACPI
        select INPUT if ACPI
        select ACPI_VIDEO if ACPI
+       select ACPI_BUTTON if ACPI
        help
          Choose this option if you have a system that has Intel 830M, 845G,
          852GM, 855GM 865G or 915G integrated graphics.  If M is selected, the
index 230c9ff..8039199 100644 (file)
@@ -142,6 +142,19 @@ drm_gem_object_alloc(struct drm_device *dev, size_t size)
        if (IS_ERR(obj->filp))
                goto free;
 
+       /* Basically we want to disable the OOM killer and handle ENOMEM
+        * ourselves by sacrificing pages from cached buffers.
+        * XXX shmem_file_[gs]et_gfp_mask()
+        */
+       mapping_set_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping,
+                            GFP_HIGHUSER |
+                            __GFP_COLD |
+                            __GFP_FS |
+                            __GFP_RECLAIMABLE |
+                            __GFP_NORETRY |
+                            __GFP_NOWARN |
+                            __GFP_NOMEMALLOC);
+
        kref_init(&obj->refcount);
        kref_init(&obj->handlecount);
        obj->size = size;
index 5269dfa..fa7b9be 100644 (file)
@@ -9,6 +9,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \
          i915_gem.o \
          i915_gem_debug.o \
          i915_gem_tiling.o \
+         i915_trace_points.o \
          intel_display.o \
          intel_crt.o \
          intel_lvds.o \
index 1e3bdce..f8ce9a3 100644 (file)
@@ -96,11 +96,13 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data)
        {
                struct drm_gem_object *obj = obj_priv->obj;
 
-               seq_printf(m, "    %p: %s %08x %08x %d",
+               seq_printf(m, "    %p: %s %8zd %08x %08x %d %s",
                           obj,
                           get_pin_flag(obj_priv),
+                          obj->size,
                           obj->read_domains, obj->write_domain,
-                          obj_priv->last_rendering_seqno);
+                          obj_priv->last_rendering_seqno,
+                          obj_priv->dirty ? "dirty" : "");
 
                if (obj->name)
                        seq_printf(m, " (name: %d)", obj->name);
index 5a49a18..45d507e 100644 (file)
@@ -33,6 +33,7 @@
 #include "intel_drv.h"
 #include "i915_drm.h"
 #include "i915_drv.h"
+#include "i915_trace.h"
 #include <linux/vgaarb.h>
 
 /* Really want an OS-independent resettable timer.  Would like to have
@@ -50,14 +51,18 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller)
        u32 last_head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
        int i;
 
+       trace_i915_ring_wait_begin (dev);
+
        for (i = 0; i < 100000; i++) {
                ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
                acthd = I915_READ(acthd_reg);
                ring->space = ring->head - (ring->tail + 8);
                if (ring->space < 0)
                        ring->space += ring->Size;
-               if (ring->space >= n)
+               if (ring->space >= n) {
+                       trace_i915_ring_wait_end (dev);
                        return 0;
+               }
 
                if (dev->primary->master) {
                        struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
@@ -77,6 +82,7 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller)
 
        }
 
+       trace_i915_ring_wait_end (dev);
        return -EBUSY;
 }
 
@@ -922,7 +928,8 @@ static int i915_get_bridge_dev(struct drm_device *dev)
  * how much was set aside so we can use it for our own purposes.
  */
 static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size,
-                         uint32_t *preallocated_size)
+                         uint32_t *preallocated_size,
+                         uint32_t *start)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        u16 tmp = 0;
@@ -1009,10 +1016,159 @@ static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size,
                return -1;
        }
        *preallocated_size = stolen - overhead;
+       *start = overhead;
 
        return 0;
 }
 
+#define PTE_ADDRESS_MASK               0xfffff000
+#define PTE_ADDRESS_MASK_HIGH          0x000000f0 /* i915+ */
+#define PTE_MAPPING_TYPE_UNCACHED      (0 << 1)
+#define PTE_MAPPING_TYPE_DCACHE                (1 << 1) /* i830 only */
+#define PTE_MAPPING_TYPE_CACHED                (3 << 1)
+#define PTE_MAPPING_TYPE_MASK          (3 << 1)
+#define PTE_VALID                      (1 << 0)
+
+/**
+ * i915_gtt_to_phys - take a GTT address and turn it into a physical one
+ * @dev: drm device
+ * @gtt_addr: address to translate
+ *
+ * Some chip functions require allocations from stolen space but need the
+ * physical address of the memory in question.  We use this routine
+ * to get a physical address suitable for register programming from a given
+ * GTT address.
+ */
+static unsigned long i915_gtt_to_phys(struct drm_device *dev,
+                                     unsigned long gtt_addr)
+{
+       unsigned long *gtt;
+       unsigned long entry, phys;
+       int gtt_bar = IS_I9XX(dev) ? 0 : 1;
+       int gtt_offset, gtt_size;
+
+       if (IS_I965G(dev)) {
+               if (IS_G4X(dev) || IS_IGDNG(dev)) {
+                       gtt_offset = 2*1024*1024;
+                       gtt_size = 2*1024*1024;
+               } else {
+                       gtt_offset = 512*1024;
+                       gtt_size = 512*1024;
+               }
+       } else {
+               gtt_bar = 3;
+               gtt_offset = 0;
+               gtt_size = pci_resource_len(dev->pdev, gtt_bar);
+       }
+
+       gtt = ioremap_wc(pci_resource_start(dev->pdev, gtt_bar) + gtt_offset,
+                        gtt_size);
+       if (!gtt) {
+               DRM_ERROR("ioremap of GTT failed\n");
+               return 0;
+       }
+
+       entry = *(volatile u32 *)(gtt + (gtt_addr / 1024));
+
+       DRM_DEBUG("GTT addr: 0x%08lx, PTE: 0x%08lx\n", gtt_addr, entry);
+
+       /* Mask out these reserved bits on this hardware. */
+       if (!IS_I9XX(dev) || IS_I915G(dev) || IS_I915GM(dev) ||
+           IS_I945G(dev) || IS_I945GM(dev)) {
+               entry &= ~PTE_ADDRESS_MASK_HIGH;
+       }
+
+       /* If it's not a mapping type we know, then bail. */
+       if ((entry & PTE_MAPPING_TYPE_MASK) != PTE_MAPPING_TYPE_UNCACHED &&
+           (entry & PTE_MAPPING_TYPE_MASK) != PTE_MAPPING_TYPE_CACHED) {
+               iounmap(gtt);
+               return 0;
+       }
+
+       if (!(entry & PTE_VALID)) {
+               DRM_ERROR("bad GTT entry in stolen space\n");
+               iounmap(gtt);
+               return 0;
+       }
+
+       iounmap(gtt);
+
+       phys =(entry & PTE_ADDRESS_MASK) |
+               ((uint64_t)(entry & PTE_ADDRESS_MASK_HIGH) << (32 - 4));
+
+       DRM_DEBUG("GTT addr: 0x%08lx, phys addr: 0x%08lx\n", gtt_addr, phys);
+
+       return phys;
+}
+
+static void i915_warn_stolen(struct drm_device *dev)
+{
+       DRM_ERROR("not enough stolen space for compressed buffer, disabling\n");
+       DRM_ERROR("hint: you may be able to increase stolen memory size in the BIOS to avoid this\n");
+}
+
+static void i915_setup_compression(struct drm_device *dev, int size)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_mm_node *compressed_fb, *compressed_llb;
+       unsigned long cfb_base, ll_base;
+
+       /* Leave 1M for line length buffer & misc. */
+       compressed_fb = drm_mm_search_free(&dev_priv->vram, size, 4096, 0);
+       if (!compressed_fb) {
+               i915_warn_stolen(dev);
+               return;
+       }
+
+       compressed_fb = drm_mm_get_block(compressed_fb, size, 4096);
+       if (!compressed_fb) {
+               i915_warn_stolen(dev);
+               return;
+       }
+
+       cfb_base = i915_gtt_to_phys(dev, compressed_fb->start);
+       if (!cfb_base) {
+               DRM_ERROR("failed to get stolen phys addr, disabling FBC\n");
+               drm_mm_put_block(compressed_fb);
+       }
+
+       if (!IS_GM45(dev)) {
+               compressed_llb = drm_mm_search_free(&dev_priv->vram, 4096,
+                                                   4096, 0);
+               if (!compressed_llb) {
+                       i915_warn_stolen(dev);
+                       return;
+               }
+
+               compressed_llb = drm_mm_get_block(compressed_llb, 4096, 4096);
+               if (!compressed_llb) {
+                       i915_warn_stolen(dev);
+                       return;
+               }
+
+               ll_base = i915_gtt_to_phys(dev, compressed_llb->start);
+               if (!ll_base) {
+                       DRM_ERROR("failed to get stolen phys addr, disabling FBC\n");
+                       drm_mm_put_block(compressed_fb);
+                       drm_mm_put_block(compressed_llb);
+               }
+       }
+
+       dev_priv->cfb_size = size;
+
+       if (IS_GM45(dev)) {
+               g4x_disable_fbc(dev);
+               I915_WRITE(DPFC_CB_BASE, compressed_fb->start);
+       } else {
+               i8xx_disable_fbc(dev);
+               I915_WRITE(FBC_CFB_BASE, cfb_base);
+               I915_WRITE(FBC_LL_BASE, ll_base);
+       }
+
+       DRM_DEBUG("FBC base 0x%08lx, ll base 0x%08lx, size %dM\n", cfb_base,
+                 ll_base, size >> 20);
+}
+
 /* true = enable decode, false = disable decoder */
 static unsigned int i915_vga_set_decode(void *cookie, bool state)
 {
@@ -1027,6 +1183,7 @@ static unsigned int i915_vga_set_decode(void *cookie, bool state)
 }
 
 static int i915_load_modeset_init(struct drm_device *dev,
+                                 unsigned long prealloc_start,
                                  unsigned long prealloc_size,
                                  unsigned long agp_size)
 {
@@ -1047,6 +1204,10 @@ static int i915_load_modeset_init(struct drm_device *dev,
 
        /* Basic memrange allocator for stolen space (aka vram) */
        drm_mm_init(&dev_priv->vram, 0, prealloc_size);
+       DRM_INFO("set up %ldM of stolen space\n", prealloc_size / (1024*1024));
+
+       /* We're off and running w/KMS */
+       dev_priv->mm.suspended = 0;
 
        /* Let GEM Manage from end of prealloc space to end of aperture.
         *
@@ -1059,10 +1220,25 @@ static int i915_load_modeset_init(struct drm_device *dev,
         */
        i915_gem_do_init(dev, prealloc_size, agp_size - 4096);
 
+       mutex_lock(&dev->struct_mutex);
        ret = i915_gem_init_ringbuffer(dev);
+       mutex_unlock(&dev->struct_mutex);
        if (ret)
                goto out;
 
+       /* Try to set up FBC with a reasonable compressed buffer size */
+       if (IS_MOBILE(dev) && (IS_I9XX(dev) || IS_I965G(dev) || IS_GM45(dev)) &&
+           i915_powersave) {
+               int cfb_size;
+
+               /* Try to get an 8M buffer... */
+               if (prealloc_size > (9*1024*1024))
+                       cfb_size = 8*1024*1024;
+               else /* fall back to 7/8 of the stolen space */
+                       cfb_size = prealloc_size * 7 / 8;
+               i915_setup_compression(dev, cfb_size);
+       }
+
        /* Allow hardware batchbuffers unless told otherwise.
         */
        dev_priv->allow_batchbuffer = 1;
@@ -1180,7 +1356,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
        struct drm_i915_private *dev_priv = dev->dev_private;
        resource_size_t base, size;
        int ret = 0, mmio_bar = IS_I9XX(dev) ? 0 : 1;
-       uint32_t agp_size, prealloc_size;
+       uint32_t agp_size, prealloc_size, prealloc_start;
 
        /* i915 has 4 more counters */
        dev->counters += 4;
@@ -1234,7 +1410,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
                         "performance may suffer.\n");
        }
 
-       ret = i915_probe_agp(dev, &agp_size, &prealloc_size);
+       ret = i915_probe_agp(dev, &agp_size, &prealloc_size, &prealloc_start);
        if (ret)
                goto out_iomapfree;
 
@@ -1300,8 +1476,12 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
                return ret;
        }
 
+       /* Start out suspended */
+       dev_priv->mm.suspended = 1;
+
        if (drm_core_check_feature(dev, DRIVER_MODESET)) {
-               ret = i915_load_modeset_init(dev, prealloc_size, agp_size);
+               ret = i915_load_modeset_init(dev, prealloc_start,
+                                            prealloc_size, agp_size);
                if (ret < 0) {
                        DRM_ERROR("failed to init modeset\n");
                        goto out_workqueue_free;
@@ -1313,6 +1493,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
        if (!IS_IGDNG(dev))
                intel_opregion_init(dev, 0);
 
+       setup_timer(&dev_priv->hangcheck_timer, i915_hangcheck_elapsed,
+                   (unsigned long) dev);
        return 0;
 
 out_workqueue_free:
@@ -1333,6 +1515,7 @@ int i915_driver_unload(struct drm_device *dev)
        struct drm_i915_private *dev_priv = dev->dev_private;
 
        destroy_workqueue(dev_priv->wq);
+       del_timer_sync(&dev_priv->hangcheck_timer);
 
        io_mapping_free(dev_priv->mm.gtt_mapping);
        if (dev_priv->mm.gtt_mtrr >= 0) {
@@ -1472,6 +1655,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
        DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, 0),
        DRM_IOCTL_DEF(DRM_I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, 0),
        DRM_IOCTL_DEF(DRM_I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, 0),
+       DRM_IOCTL_DEF(DRM_I915_GEM_MADVISE, i915_gem_madvise_ioctl, 0),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
index dbe568c..b93814c 100644 (file)
@@ -89,6 +89,8 @@ static int i915_suspend(struct drm_device *dev, pm_message_t state)
                pci_set_power_state(dev->pdev, PCI_D3hot);
        }
 
+       dev_priv->suspended = 1;
+
        return 0;
 }
 
@@ -97,8 +99,6 @@ static int i915_resume(struct drm_device *dev)
        struct drm_i915_private *dev_priv = dev->dev_private;
        int ret = 0;
 
-       pci_set_power_state(dev->pdev, PCI_D0);
-       pci_restore_state(dev->pdev);
        if (pci_enable_device(dev->pdev))
                return -1;
        pci_set_master(dev->pdev);
@@ -124,9 +124,135 @@ static int i915_resume(struct drm_device *dev)
                drm_helper_resume_force_mode(dev);
        }
 
+       dev_priv->suspended = 0;
+
        return ret;
 }
 
+/**
+ * i965_reset - reset chip after a hang
+ * @dev: drm device to reset
+ * @flags: reset domains
+ *
+ * Reset the chip.  Useful if a hang is detected. Returns zero on successful
+ * reset or otherwise an error code.
+ *
+ * Procedure is fairly simple:
+ *   - reset the chip using the reset reg
+ *   - re-init context state
+ *   - re-init hardware status page
+ *   - re-init ring buffer
+ *   - re-init interrupt state
+ *   - re-init display
+ */
+int i965_reset(struct drm_device *dev, u8 flags)
+{
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       unsigned long timeout;
+       u8 gdrst;
+       /*
+        * We really should only reset the display subsystem if we actually
+        * need to
+        */
+       bool need_display = true;
+
+       mutex_lock(&dev->struct_mutex);
+
+       /*
+        * Clear request list
+        */
+       i915_gem_retire_requests(dev);
+
+       if (need_display)
+               i915_save_display(dev);
+
+       if (IS_I965G(dev) || IS_G4X(dev)) {
+               /*
+                * Set the domains we want to reset, then the reset bit (bit 0).
+                * Clear the reset bit after a while and wait for hardware status
+                * bit (bit 1) to be set
+                */
+               pci_read_config_byte(dev->pdev, GDRST, &gdrst);
+               pci_write_config_byte(dev->pdev, GDRST, gdrst | flags | ((flags == GDRST_FULL) ? 0x1 : 0x0));
+               udelay(50);
+               pci_write_config_byte(dev->pdev, GDRST, gdrst & 0xfe);
+
+               /* ...we don't want to loop forever though, 500ms should be plenty */
+              timeout = jiffies + msecs_to_jiffies(500);
+               do {
+                       udelay(100);
+                       pci_read_config_byte(dev->pdev, GDRST, &gdrst);
+               } while ((gdrst & 0x1) && time_after(timeout, jiffies));
+
+               if (gdrst & 0x1) {
+                       WARN(true, "i915: Failed to reset chip\n");
+                       mutex_unlock(&dev->struct_mutex);
+                       return -EIO;
+               }
+       } else {
+               DRM_ERROR("Error occurred. Don't know how to reset this chip.\n");
+               return -ENODEV;
+       }
+
+       /* Ok, now get things going again... */
+
+       /*
+        * Everything depends on having the GTT running, so we need to start
+        * there.  Fortunately we don't need to do this unless we reset the
+        * chip at a PCI level.
+        *
+        * Next we need to restore the context, but we don't use those
+        * yet either...
+        *
+        * Ring buffer needs to be re-initialized in the KMS case, or if X
+        * was running at the time of the reset (i.e. we weren't VT
+        * switched away).
+        */
+       if (drm_core_check_feature(dev, DRIVER_MODESET) ||
+           !dev_priv->mm.suspended) {
+               drm_i915_ring_buffer_t *ring = &dev_priv->ring;
+               struct drm_gem_object *obj = ring->ring_obj;
+               struct drm_i915_gem_object *obj_priv = obj->driver_private;
+               dev_priv->mm.suspended = 0;
+
+               /* Stop the ring if it's running. */
+               I915_WRITE(PRB0_CTL, 0);
+               I915_WRITE(PRB0_TAIL, 0);
+               I915_WRITE(PRB0_HEAD, 0);
+
+               /* Initialize the ring. */
+               I915_WRITE(PRB0_START, obj_priv->gtt_offset);
+               I915_WRITE(PRB0_CTL,
+                          ((obj->size - 4096) & RING_NR_PAGES) |
+                          RING_NO_REPORT |
+                          RING_VALID);
+               if (!drm_core_check_feature(dev, DRIVER_MODESET))
+                       i915_kernel_lost_context(dev);
+               else {
+                       ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
+                       ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR;
+                       ring->space = ring->head - (ring->tail + 8);
+                       if (ring->space < 0)
+                               ring->space += ring->Size;
+               }
+
+               mutex_unlock(&dev->struct_mutex);
+               drm_irq_uninstall(dev);
+               drm_irq_install(dev);
+               mutex_lock(&dev->struct_mutex);
+       }
+
+       /*
+        * Display needs restore too...
+        */
+       if (need_display)
+               i915_restore_display(dev);
+
+       mutex_unlock(&dev->struct_mutex);
+       return 0;
+}
+
+
 static int __devinit
 i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
@@ -234,6 +360,8 @@ static int __init i915_init(void)
 {
        driver.num_ioctls = i915_max_ioctl;
 
+       i915_gem_shrinker_init();
+
        /*
         * If CONFIG_DRM_I915_KMS is set, default to KMS unless
         * explicitly disabled with the module pararmeter.
@@ -260,6 +388,7 @@ static int __init i915_init(void)
 
 static void __exit i915_exit(void)
 {
+       i915_gem_shrinker_exit();
        drm_exit(&driver);
 }
 
index a0632f8..b24b2d1 100644 (file)
@@ -48,6 +48,11 @@ enum pipe {
        PIPE_B,
 };
 
+enum plane {
+       PLANE_A = 0,
+       PLANE_B,
+};
+
 #define I915_NUM_PIPE  2
 
 /* Interface history:
@@ -148,6 +153,23 @@ struct drm_i915_error_state {
        struct timeval time;
 };
 
+struct drm_i915_display_funcs {
+       void (*dpms)(struct drm_crtc *crtc, int mode);
+       bool (*fbc_enabled)(struct drm_crtc *crtc);
+       void (*enable_fbc)(struct drm_crtc *crtc, unsigned long interval);
+       void (*disable_fbc)(struct drm_device *dev);
+       int (*get_display_clock_speed)(struct drm_device *dev);
+       int (*get_fifo_size)(struct drm_device *dev, int plane);
+       void (*update_wm)(struct drm_device *dev, int planea_clock,
+                         int planeb_clock, int sr_hdisplay, int pixel_size);
+       /* clock updates for mode set */
+       /* cursor updates */
+       /* render clock increase/decrease */
+       /* display clock increase/decrease */
+       /* pll clock increase/decrease */
+       /* clock gating init */
+};
+
 typedef struct drm_i915_private {
        struct drm_device *dev;
 
@@ -198,10 +220,21 @@ typedef struct drm_i915_private {
        unsigned int sr01, adpa, ppcr, dvob, dvoc, lvds;
        int vblank_pipe;
 
+       /* For hangcheck timer */
+#define DRM_I915_HANGCHECK_PERIOD 75 /* in jiffies */
+       struct timer_list hangcheck_timer;
+       int hangcheck_count;
+       uint32_t last_acthd;
+
        bool cursor_needs_physical;
 
        struct drm_mm vram;
 
+       unsigned long cfb_size;
+       unsigned long cfb_pitch;
+       int cfb_fence;
+       int cfb_plane;
+
        int irq_enabled;
 
        struct intel_opregion opregion;
@@ -222,6 +255,8 @@ typedef struct drm_i915_private {
        unsigned int edp_support:1;
        int lvds_ssc_freq;
 
+       struct notifier_block lid_notifier;
+
        int crt_ddc_bus; /* -1 = unknown, else GPIO to use for CRT DDC */
        struct drm_i915_fence_reg fence_regs[16]; /* assume 965 */
        int fence_reg_start; /* 4 if userland hasn't ioctl'd us yet */
@@ -234,7 +269,11 @@ typedef struct drm_i915_private {
        struct work_struct error_work;
        struct workqueue_struct *wq;
 
+       /* Display functions */
+       struct drm_i915_display_funcs display;
+
        /* Register state */
+       bool suspended;
        u8 saveLBB;
        u32 saveDSPACNTR;
        u32 saveDSPBCNTR;
@@ -349,6 +388,15 @@ typedef struct drm_i915_private {
                struct io_mapping *gtt_mapping;
                int gtt_mtrr;
 
+               /**
+                * Membership on list of all loaded devices, used to evict
+                * inactive buffers under memory pressure.
+                *
+                * Modifications should only be done whilst holding the
+                * shrink_list_lock spinlock.
+                */
+               struct list_head shrink_list;
+
                /**
                 * List of objects currently involved in rendering from the
                 * ringbuffer.
@@ -432,7 +480,7 @@ typedef struct drm_i915_private {
                 * It prevents command submission from occuring and makes
                 * every pending request fail
                 */
-               int wedged;
+               atomic_t wedged;
 
                /** Bit 6 swizzling required for X tiling */
                uint32_t bit_6_swizzle_x;
@@ -491,10 +539,7 @@ struct drm_i915_gem_object {
         * This is the same as gtt_space->start
         */
        uint32_t gtt_offset;
-       /**
-        * Required alignment for the object
-        */
-       uint32_t gtt_alignment;
+
        /**
         * Fake offset for use by mmap(2)
         */
@@ -541,6 +586,11 @@ struct drm_i915_gem_object {
         * in an execbuffer object list.
         */
        int in_execbuffer;
+
+       /**
+        * Advice: are the backing pages purgeable?
+        */
+       int madv;
 };
 
 /**
@@ -585,6 +635,8 @@ extern int i915_max_ioctl;
 extern unsigned int i915_fbpercrtc;
 extern unsigned int i915_powersave;
 
+extern void i915_save_display(struct drm_device *dev);
+extern void i915_restore_display(struct drm_device *dev);
 extern int i915_master_create(struct drm_device *dev, struct drm_master *master);
 extern void i915_master_destroy(struct drm_device *dev, struct drm_master *master);
 
@@ -604,8 +656,10 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
 extern int i915_emit_box(struct drm_device *dev,
                         struct drm_clip_rect *boxes,
                         int i, int DR1, int DR4);
+extern int i965_reset(struct drm_device *dev, u8 flags);
 
 /* i915_irq.c */
+void i915_hangcheck_elapsed(unsigned long data);
 extern int i915_irq_emit(struct drm_device *dev, void *data,
                         struct drm_file *file_priv);
 extern int i915_irq_wait(struct drm_device *dev, void *data,
@@ -676,6 +730,8 @@ int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
                        struct drm_file *file_priv);
 int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
                            struct drm_file *file_priv);
+int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
+                          struct drm_file *file_priv);
 int i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
                           struct drm_file *file_priv);
 int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
@@ -695,6 +751,7 @@ int i915_gem_object_unbind(struct drm_gem_object *obj);
 void i915_gem_release_mmap(struct drm_gem_object *obj);
 void i915_gem_lastclose(struct drm_device *dev);
 uint32_t i915_get_gem_seqno(struct drm_device *dev);
+bool i915_seqno_passed(uint32_t seq1, uint32_t seq2);
 int i915_gem_object_get_fence_reg(struct drm_gem_object *obj);
 int i915_gem_object_put_fence_reg(struct drm_gem_object *obj);
 void i915_gem_retire_requests(struct drm_device *dev);
@@ -720,6 +777,9 @@ int i915_gem_object_get_pages(struct drm_gem_object *obj);
 void i915_gem_object_put_pages(struct drm_gem_object *obj);
 void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv);
 
+void i915_gem_shrinker_init(void);
+void i915_gem_shrinker_exit(void);
+
 /* i915_gem_tiling.c */
 void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
 void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj);
@@ -767,6 +827,8 @@ static inline void opregion_enable_asle(struct drm_device *dev) { return; }
 extern void intel_modeset_init(struct drm_device *dev);
 extern void intel_modeset_cleanup(struct drm_device *dev);
 extern int intel_modeset_vga_set_state(struct drm_device *dev, bool state);
+extern void i8xx_disable_fbc(struct drm_device *dev);
+extern void g4x_disable_fbc(struct drm_device *dev);
 
 /**
  * Lock test for when it's just for synchronization of ring access.
@@ -864,6 +926,7 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
                       (dev)->pci_device == 0x2E12 || \
                       (dev)->pci_device == 0x2E22 || \
                       (dev)->pci_device == 0x2E32 || \
+                      (dev)->pci_device == 0x2E42 || \
                       (dev)->pci_device == 0x0042 || \
                       (dev)->pci_device == 0x0046)
 
@@ -876,6 +939,7 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
                     (dev)->pci_device == 0x2E12 || \
                     (dev)->pci_device == 0x2E22 || \
                     (dev)->pci_device == 0x2E32 || \
+                    (dev)->pci_device == 0x2E42 || \
                     IS_GM45(dev))
 
 #define IS_IGDG(dev) ((dev)->pci_device == 0xa001)
@@ -909,12 +973,13 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
 #define SUPPORTS_INTEGRATED_HDMI(dev)  (IS_G4X(dev) || IS_IGDNG(dev))
 #define SUPPORTS_INTEGRATED_DP(dev)    (IS_G4X(dev) || IS_IGDNG(dev))
 #define SUPPORTS_EDP(dev)              (IS_IGDNG_M(dev))
-#define I915_HAS_HOTPLUG(dev) (IS_I945G(dev) || IS_I945GM(dev) || IS_I965G(dev))
+#define I915_HAS_HOTPLUG(dev) (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev) || IS_I965G(dev))
 /* dsparb controlled by hw only */
 #define DSPARB_HWCONTROL(dev) (IS_G4X(dev) || IS_IGDNG(dev))
 
 #define HAS_FW_BLC(dev) (IS_I9XX(dev) || IS_G4X(dev) || IS_IGDNG(dev))
 #define HAS_PIPE_CXSR(dev) (IS_G4X(dev) || IS_IGDNG(dev))
+#define I915_HAS_FBC(dev) (IS_MOBILE(dev) && (IS_I9XX(dev) || IS_I965G(dev)))
 
 #define PRIMARY_RINGBUFFER_SIZE         (128*1024)
 
index c673171..40727d4 100644 (file)
@@ -29,6 +29,7 @@
 #include "drm.h"
 #include "i915_drm.h"
 #include "i915_drv.h"
+#include "i915_trace.h"
 #include "intel_drv.h"
 #include <linux/swap.h>
 #include <linux/pci.h>
@@ -48,11 +49,15 @@ static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
                                           unsigned alignment);
 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
-static int i915_gem_evict_something(struct drm_device *dev);
+static int i915_gem_evict_something(struct drm_device *dev, int min_size);
+static int i915_gem_evict_from_inactive_list(struct drm_device *dev);
 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
                                struct drm_i915_gem_pwrite *args,
                                struct drm_file *file_priv);
 
+static LIST_HEAD(shrink_list);
+static DEFINE_SPINLOCK(shrink_list_lock);
+
 int i915_gem_do_init(struct drm_device *dev, unsigned long start,
                     unsigned long end)
 {
@@ -316,6 +321,45 @@ fail_unlock:
        return ret;
 }
 
+static inline gfp_t
+i915_gem_object_get_page_gfp_mask (struct drm_gem_object *obj)
+{
+       return mapping_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping);
+}
+
+static inline void
+i915_gem_object_set_page_gfp_mask (struct drm_gem_object *obj, gfp_t gfp)
+{
+       mapping_set_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping, gfp);
+}
+
+static int
+i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj)
+{
+       int ret;
+
+       ret = i915_gem_object_get_pages(obj);
+
+       /* If we've insufficient memory to map in the pages, attempt
+        * to make some space by throwing out some old buffers.
+        */
+       if (ret == -ENOMEM) {
+               struct drm_device *dev = obj->dev;
+               gfp_t gfp;
+
+               ret = i915_gem_evict_something(dev, obj->size);
+               if (ret)
+                       return ret;
+
+               gfp = i915_gem_object_get_page_gfp_mask(obj);
+               i915_gem_object_set_page_gfp_mask(obj, gfp & ~__GFP_NORETRY);
+               ret = i915_gem_object_get_pages(obj);
+               i915_gem_object_set_page_gfp_mask (obj, gfp);
+       }
+
+       return ret;
+}
+
 /**
  * This is the fallback shmem pread path, which allocates temporary storage
  * in kernel space to copy_to_user into outside of the struct_mutex, so we
@@ -367,8 +411,8 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
 
        mutex_lock(&dev->struct_mutex);
 
-       ret = i915_gem_object_get_pages(obj);
-       if (ret != 0)
+       ret = i915_gem_object_get_pages_or_evict(obj);
+       if (ret)
                goto fail_unlock;
 
        ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
@@ -842,8 +886,8 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
 
        mutex_lock(&dev->struct_mutex);
 
-       ret = i915_gem_object_get_pages(obj);
-       if (ret != 0)
+       ret = i915_gem_object_get_pages_or_evict(obj);
+       if (ret)
                goto fail_unlock;
 
        ret = i915_gem_object_set_to_cpu_domain(obj, 1);
@@ -1155,28 +1199,22 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        /* Now bind it into the GTT if needed */
        mutex_lock(&dev->struct_mutex);
        if (!obj_priv->gtt_space) {
-               ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment);
-               if (ret) {
-                       mutex_unlock(&dev->struct_mutex);
-                       return VM_FAULT_SIGBUS;
-               }
-
-               ret = i915_gem_object_set_to_gtt_domain(obj, write);
-               if (ret) {
-                       mutex_unlock(&dev->struct_mutex);
-                       return VM_FAULT_SIGBUS;
-               }
+               ret = i915_gem_object_bind_to_gtt(obj, 0);
+               if (ret)
+                       goto unlock;
 
                list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
+
+               ret = i915_gem_object_set_to_gtt_domain(obj, write);
+               if (ret)
+                       goto unlock;
        }
 
        /* Need a new fence register? */
        if (obj_priv->tiling_mode != I915_TILING_NONE) {
                ret = i915_gem_object_get_fence_reg(obj);
-               if (ret) {
-                       mutex_unlock(&dev->struct_mutex);
-                       return VM_FAULT_SIGBUS;
-               }
+               if (ret)
+                       goto unlock;
        }
 
        pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
@@ -1184,18 +1222,18 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
        /* Finally, remap it using the new GTT offset */
        ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
-
+unlock:
        mutex_unlock(&dev->struct_mutex);
 
        switch (ret) {
+       case 0:
+       case -ERESTARTSYS:
+               return VM_FAULT_NOPAGE;
        case -ENOMEM:
        case -EAGAIN:
                return VM_FAULT_OOM;
-       case -EFAULT:
-       case -EINVAL:
-               return VM_FAULT_SIGBUS;
        default:
-               return VM_FAULT_NOPAGE;
+               return VM_FAULT_SIGBUS;
        }
 }
 
@@ -1388,6 +1426,14 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
 
        obj_priv = obj->driver_private;
 
+       if (obj_priv->madv != I915_MADV_WILLNEED) {
+               DRM_ERROR("Attempting to mmap a purgeable buffer\n");
+               drm_gem_object_unreference(obj);
+               mutex_unlock(&dev->struct_mutex);
+               return -EINVAL;
+       }
+
+
        if (!obj_priv->mmap_offset) {
                ret = i915_gem_create_mmap_offset(obj);
                if (ret) {
@@ -1399,22 +1445,12 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
 
        args->offset = obj_priv->mmap_offset;
 
-       obj_priv->gtt_alignment = i915_gem_get_gtt_alignment(obj);
-
-       /* Make sure the alignment is correct for fence regs etc */
-       if (obj_priv->agp_mem &&
-           (obj_priv->gtt_offset & (obj_priv->gtt_alignment - 1))) {
-               drm_gem_object_unreference(obj);
-               mutex_unlock(&dev->struct_mutex);
-               return -EINVAL;
-       }
-
        /*
         * Pull it into the GTT so that we have a page list (makes the
         * initial fault faster and any subsequent flushing possible).
         */
        if (!obj_priv->agp_mem) {
-               ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment);
+               ret = i915_gem_object_bind_to_gtt(obj, 0);
                if (ret) {
                        drm_gem_object_unreference(obj);
                        mutex_unlock(&dev->struct_mutex);
@@ -1437,6 +1473,7 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)
        int i;
 
        BUG_ON(obj_priv->pages_refcount == 0);
+       BUG_ON(obj_priv->madv == __I915_MADV_PURGED);
 
        if (--obj_priv->pages_refcount != 0)
                return;
@@ -1444,13 +1481,21 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)
        if (obj_priv->tiling_mode != I915_TILING_NONE)
                i915_gem_object_save_bit_17_swizzle(obj);
 
-       for (i = 0; i < page_count; i++)
-               if (obj_priv->pages[i] != NULL) {
-                       if (obj_priv->dirty)
-                               set_page_dirty(obj_priv->pages[i]);
+       if (obj_priv->madv == I915_MADV_DONTNEED)
+               obj_priv->dirty = 0;
+
+       for (i = 0; i < page_count; i++) {
+               if (obj_priv->pages[i] == NULL)
+                       break;
+
+               if (obj_priv->dirty)
+                       set_page_dirty(obj_priv->pages[i]);
+
+               if (obj_priv->madv == I915_MADV_WILLNEED)
                        mark_page_accessed(obj_priv->pages[i]);
-                       page_cache_release(obj_priv->pages[i]);
-               }
+
+               page_cache_release(obj_priv->pages[i]);
+       }
        obj_priv->dirty = 0;
 
        drm_free_large(obj_priv->pages);
@@ -1489,6 +1534,26 @@ i915_gem_object_move_to_flushing(struct drm_gem_object *obj)
        obj_priv->last_rendering_seqno = 0;
 }
 
+/* Immediately discard the backing storage */
+static void
+i915_gem_object_truncate(struct drm_gem_object *obj)
+{
+       struct drm_i915_gem_object *obj_priv = obj->driver_private;
+       struct inode *inode;
+
+       inode = obj->filp->f_path.dentry->d_inode;
+       if (inode->i_op->truncate)
+               inode->i_op->truncate (inode);
+
+       obj_priv->madv = __I915_MADV_PURGED;
+}
+
+static inline int
+i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv)
+{
+       return obj_priv->madv == I915_MADV_DONTNEED;
+}
+
 static void
 i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
 {
@@ -1577,15 +1642,24 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
 
                        if ((obj->write_domain & flush_domains) ==
                            obj->write_domain) {
+                               uint32_t old_write_domain = obj->write_domain;
+
                                obj->write_domain = 0;
                                i915_gem_object_move_to_active(obj, seqno);
+
+                               trace_i915_gem_object_change_domain(obj,
+                                                                   obj->read_domains,
+                                                                   old_write_domain);
                        }
                }
 
        }
 
-       if (was_empty && !dev_priv->mm.suspended)
-               queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
+       if (!dev_priv->mm.suspended) {
+               mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
+               if (was_empty)
+                       queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
+       }
        return seqno;
 }
 
@@ -1623,6 +1697,8 @@ i915_gem_retire_request(struct drm_device *dev,
 {
        drm_i915_private_t *dev_priv = dev->dev_private;
 
+       trace_i915_gem_request_retire(dev, request->seqno);
+
        /* Move any buffers on the active list that are no longer referenced
         * by the ringbuffer to the flushing/inactive lists as appropriate.
         */
@@ -1671,7 +1747,7 @@ out:
 /**
  * Returns true if seq1 is later than seq2.
  */
-static int
+bool
 i915_seqno_passed(uint32_t seq1, uint32_t seq2)
 {
        return (int32_t)(seq1 - seq2) >= 0;
@@ -1709,7 +1785,7 @@ i915_gem_retire_requests(struct drm_device *dev)
                retiring_seqno = request->seqno;
 
                if (i915_seqno_passed(seqno, retiring_seqno) ||
-                   dev_priv->mm.wedged) {
+                   atomic_read(&dev_priv->mm.wedged)) {
                        i915_gem_retire_request(dev, request);
 
                        list_del(&request->list);
@@ -1751,6 +1827,9 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
 
        BUG_ON(seqno == 0);
 
+       if (atomic_read(&dev_priv->mm.wedged))
+               return -EIO;
+
        if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
                if (IS_IGDNG(dev))
                        ier = I915_READ(DEIER) | I915_READ(GTIER);
@@ -1763,16 +1842,20 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
                        i915_driver_irq_postinstall(dev);
                }
 
+               trace_i915_gem_request_wait_begin(dev, seqno);
+
                dev_priv->mm.waiting_gem_seqno = seqno;
                i915_user_irq_get(dev);
                ret = wait_event_interruptible(dev_priv->irq_queue,
                                               i915_seqno_passed(i915_get_gem_seqno(dev),
                                                                 seqno) ||
-                                              dev_priv->mm.wedged);
+                                              atomic_read(&dev_priv->mm.wedged));
                i915_user_irq_put(dev);
                dev_priv->mm.waiting_gem_seqno = 0;
+
+               trace_i915_gem_request_wait_end(dev, seqno);
        }
-       if (dev_priv->mm.wedged)
+       if (atomic_read(&dev_priv->mm.wedged))
                ret = -EIO;
 
        if (ret && ret != -ERESTARTSYS)
@@ -1803,6 +1886,8 @@ i915_gem_flush(struct drm_device *dev,
        DRM_INFO("%s: invalidate %08x flush %08x\n", __func__,
                  invalidate_domains, flush_domains);
 #endif
+       trace_i915_gem_request_flush(dev, dev_priv->mm.next_gem_seqno,
+                                    invalidate_domains, flush_domains);
 
        if (flush_domains & I915_GEM_DOMAIN_CPU)
                drm_agp_chipset_flush(dev);
@@ -1915,6 +2000,12 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
                return -EINVAL;
        }
 
+       /* blow away mappings if mapped through GTT */
+       i915_gem_release_mmap(obj);
+
+       if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
+               i915_gem_clear_fence_reg(obj);
+
        /* Move the object to the CPU domain to ensure that
         * any possible CPU writes while it's not in the GTT
         * are flushed when we go to remap it. This will
@@ -1928,21 +2019,16 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
                return ret;
        }
 
+       BUG_ON(obj_priv->active);
+
        if (obj_priv->agp_mem != NULL) {
                drm_unbind_agp(obj_priv->agp_mem);
                drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
                obj_priv->agp_mem = NULL;
        }
 
-       BUG_ON(obj_priv->active);
-
-       /* blow away mappings if mapped through GTT */
-       i915_gem_release_mmap(obj);
-
-       if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
-               i915_gem_clear_fence_reg(obj);
-
        i915_gem_object_put_pages(obj);
+       BUG_ON(obj_priv->pages_refcount);
 
        if (obj_priv->gtt_space) {
                atomic_dec(&dev->gtt_count);
@@ -1956,40 +2042,113 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
        if (!list_empty(&obj_priv->list))
                list_del_init(&obj_priv->list);
 
+       if (i915_gem_object_is_purgeable(obj_priv))
+               i915_gem_object_truncate(obj);
+
+       trace_i915_gem_object_unbind(obj);
+
        return 0;
 }
 
+static struct drm_gem_object *
+i915_gem_find_inactive_object(struct drm_device *dev, int min_size)
+{
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       struct drm_i915_gem_object *obj_priv;
+       struct drm_gem_object *best = NULL;
+       struct drm_gem_object *first = NULL;
+
+       /* Try to find the smallest clean object */
+       list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, list) {
+               struct drm_gem_object *obj = obj_priv->obj;
+               if (obj->size >= min_size) {
+                       if ((!obj_priv->dirty ||
+                            i915_gem_object_is_purgeable(obj_priv)) &&
+                           (!best || obj->size < best->size)) {
+                               best = obj;
+                               if (best->size == min_size)
+                                       return best;
+                       }
+                       if (!first)
+                           first = obj;
+               }
+       }
+
+       return best ? best : first;
+}
+
 static int
-i915_gem_evict_something(struct drm_device *dev)
+i915_gem_evict_everything(struct drm_device *dev)
+{
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       uint32_t seqno;
+       int ret;
+       bool lists_empty;
+
+       spin_lock(&dev_priv->mm.active_list_lock);
+       lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
+                      list_empty(&dev_priv->mm.flushing_list) &&
+                      list_empty(&dev_priv->mm.active_list));
+       spin_unlock(&dev_priv->mm.active_list_lock);
+
+       if (lists_empty)
+               return -ENOSPC;
+
+       /* Flush everything (on to the inactive lists) and evict */
+       i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+       seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS);
+       if (seqno == 0)
+               return -ENOMEM;
+
+       ret = i915_wait_request(dev, seqno);
+       if (ret)
+               return ret;
+
+       ret = i915_gem_evict_from_inactive_list(dev);
+       if (ret)
+               return ret;
+
+       spin_lock(&dev_priv->mm.active_list_lock);
+       lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
+                      list_empty(&dev_priv->mm.flushing_list) &&
+                      list_empty(&dev_priv->mm.active_list));
+       spin_unlock(&dev_priv->mm.active_list_lock);
+       BUG_ON(!lists_empty);
+
+       return 0;
+}
+
+static int
+i915_gem_evict_something(struct drm_device *dev, int min_size)
 {
        drm_i915_private_t *dev_priv = dev->dev_private;
        struct drm_gem_object *obj;
-       struct drm_i915_gem_object *obj_priv;
-       int ret = 0;
+       int ret;
 
        for (;;) {
+               i915_gem_retire_requests(dev);
+
                /* If there's an inactive buffer available now, grab it
                 * and be done.
                 */
-               if (!list_empty(&dev_priv->mm.inactive_list)) {
-                       obj_priv = list_first_entry(&dev_priv->mm.inactive_list,
-                                                   struct drm_i915_gem_object,
-                                                   list);
-                       obj = obj_priv->obj;
-                       BUG_ON(obj_priv->pin_count != 0);
+               obj = i915_gem_find_inactive_object(dev, min_size);
+               if (obj) {
+                       struct drm_i915_gem_object *obj_priv;
+
 #if WATCH_LRU
                        DRM_INFO("%s: evicting %p\n", __func__, obj);
 #endif
+                       obj_priv = obj->driver_private;
+                       BUG_ON(obj_priv->pin_count != 0);
                        BUG_ON(obj_priv->active);
 
                        /* Wait on the rendering and unbind the buffer. */
-                       ret = i915_gem_object_unbind(obj);
-                       break;
+                       return i915_gem_object_unbind(obj);
                }
 
                /* If we didn't get anything, but the ring is still processing
-                * things, wait for one of those things to finish and hopefully
-                * leave us a buffer to evict.
+                * things, wait for the next to finish and hopefully leave us
+                * a buffer to evict.
                 */
                if (!list_empty(&dev_priv->mm.request_list)) {
                        struct drm_i915_gem_request *request;
@@ -2000,16 +2159,9 @@ i915_gem_evict_something(struct drm_device *dev)
 
                        ret = i915_wait_request(dev, request->seqno);
                        if (ret)
-                               break;
+                               return ret;
 
-                       /* if waiting caused an object to become inactive,
-                        * then loop around and wait for it. Otherwise, we
-                        * assume that waiting freed and unbound something,
-                        * so there should now be some space in the GTT
-                        */
-                       if (!list_empty(&dev_priv->mm.inactive_list))
-                               continue;
-                       break;
+                       continue;
                }
 
                /* If we didn't have anything on the request list but there
@@ -2018,46 +2170,44 @@ i915_gem_evict_something(struct drm_device *dev)
                 * will get moved to inactive.
                 */
                if (!list_empty(&dev_priv->mm.flushing_list)) {
-                       obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
-                                                   struct drm_i915_gem_object,
-                                                   list);
-                       obj = obj_priv->obj;
+                       struct drm_i915_gem_object *obj_priv;
 
-                       i915_gem_flush(dev,
-                                      obj->write_domain,
-                                      obj->write_domain);
-                       i915_add_request(dev, NULL, obj->write_domain);
+                       /* Find an object that we can immediately reuse */
+                       list_for_each_entry(obj_priv, &dev_priv->mm.flushing_list, list) {
+                               obj = obj_priv->obj;
+                               if (obj->size >= min_size)
+                                       break;
 
-                       obj = NULL;
-                       continue;
-               }
+                               obj = NULL;
+                       }
 
-               DRM_ERROR("inactive empty %d request empty %d "
-                         "flushing empty %d\n",
-                         list_empty(&dev_priv->mm.inactive_list),
-                         list_empty(&dev_priv->mm.request_list),
-                         list_empty(&dev_priv->mm.flushing_list));
-               /* If we didn't do any of the above, there's nothing to be done
-                * and we just can't fit it in.
-                */
-               return -ENOSPC;
-       }
-       return ret;
-}
+                       if (obj != NULL) {
+                               uint32_t seqno;
 
-static int
-i915_gem_evict_everything(struct drm_device *dev)
-{
-       int ret;
+                               i915_gem_flush(dev,
+                                              obj->write_domain,
+                                              obj->write_domain);
+                               seqno = i915_add_request(dev, NULL, obj->write_domain);
+                               if (seqno == 0)
+                                       return -ENOMEM;
 
-       for (;;) {
-               ret = i915_gem_evict_something(dev);
-               if (ret != 0)
-                       break;
+                               ret = i915_wait_request(dev, seqno);
+                               if (ret)
+                                       return ret;
+
+                               continue;
+                       }
+               }
+
+               /* If we didn't do any of the above, there's no single buffer
+                * large enough to swap out for the new one, so just evict
+                * everything and start again. (This should be rare.)
+                */
+               if (!list_empty (&dev_priv->mm.inactive_list))
+                       return i915_gem_evict_from_inactive_list(dev);
+               else
+                       return i915_gem_evict_everything(dev);
        }
-       if (ret == -ENOSPC)
-               return 0;
-       return ret;
 }
 
 int
@@ -2080,7 +2230,6 @@ i915_gem_object_get_pages(struct drm_gem_object *obj)
        BUG_ON(obj_priv->pages != NULL);
        obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
        if (obj_priv->pages == NULL) {
-               DRM_ERROR("Faled to allocate page list\n");
                obj_priv->pages_refcount--;
                return -ENOMEM;
        }
@@ -2091,7 +2240,6 @@ i915_gem_object_get_pages(struct drm_gem_object *obj)
                page = read_mapping_page(mapping, i, NULL);
                if (IS_ERR(page)) {
                        ret = PTR_ERR(page);
-                       DRM_ERROR("read_mapping_page failed: %d\n", ret);
                        i915_gem_object_put_pages(obj);
                        return ret;
                }
@@ -2328,6 +2476,8 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
        else
                i830_write_fence_reg(reg);
 
+       trace_i915_gem_object_get_fence(obj, i, obj_priv->tiling_mode);
+
        return 0;
 }
 
@@ -2410,10 +2560,17 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
        drm_i915_private_t *dev_priv = dev->dev_private;
        struct drm_i915_gem_object *obj_priv = obj->driver_private;
        struct drm_mm_node *free_space;
-       int page_count, ret;
+       bool retry_alloc = false;
+       int ret;
 
        if (dev_priv->mm.suspended)
                return -EBUSY;
+
+       if (obj_priv->madv != I915_MADV_WILLNEED) {
+               DRM_ERROR("Attempting to bind a purgeable object\n");
+               return -EINVAL;
+       }
+
        if (alignment == 0)
                alignment = i915_gem_get_gtt_alignment(obj);
        if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
@@ -2433,30 +2590,16 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
                }
        }
        if (obj_priv->gtt_space == NULL) {
-               bool lists_empty;
-
                /* If the gtt is empty and we're still having trouble
                 * fitting our object in, we're out of memory.
                 */
 #if WATCH_LRU
                DRM_INFO("%s: GTT full, evicting something\n", __func__);
 #endif
-               spin_lock(&dev_priv->mm.active_list_lock);
-               lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
-                              list_empty(&dev_priv->mm.flushing_list) &&
-                              list_empty(&dev_priv->mm.active_list));
-               spin_unlock(&dev_priv->mm.active_list_lock);
-               if (lists_empty) {
-                       DRM_ERROR("GTT full, but LRU list empty\n");
-                       return -ENOSPC;
-               }
-
-               ret = i915_gem_evict_something(dev);
-               if (ret != 0) {
-                       if (ret != -ERESTARTSYS)
-                               DRM_ERROR("Failed to evict a buffer %d\n", ret);
+               ret = i915_gem_evict_something(dev, obj->size);
+               if (ret)
                        return ret;
-               }
+
                goto search_free;
        }
 
@@ -2464,27 +2607,56 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
        DRM_INFO("Binding object of size %zd at 0x%08x\n",
                 obj->size, obj_priv->gtt_offset);
 #endif
+       if (retry_alloc) {
+               i915_gem_object_set_page_gfp_mask (obj,
+                                                  i915_gem_object_get_page_gfp_mask (obj) & ~__GFP_NORETRY);
+       }
        ret = i915_gem_object_get_pages(obj);
+       if (retry_alloc) {
+               i915_gem_object_set_page_gfp_mask (obj,
+                                                  i915_gem_object_get_page_gfp_mask (obj) | __GFP_NORETRY);
+       }
        if (ret) {
                drm_mm_put_block(obj_priv->gtt_space);
                obj_priv->gtt_space = NULL;
+
+               if (ret == -ENOMEM) {
+                       /* first try to clear up some space from the GTT */
+                       ret = i915_gem_evict_something(dev, obj->size);
+                       if (ret) {
+                               /* now try to shrink everyone else */
+                               if (! retry_alloc) {
+                                   retry_alloc = true;
+                                   goto search_free;
+                               }
+
+                               return ret;
+                       }
+
+                       goto search_free;
+               }
+
                return ret;
        }
 
-       page_count = obj->size / PAGE_SIZE;
        /* Create an AGP memory structure pointing at our pages, and bind it
         * into the GTT.
         */
        obj_priv->agp_mem = drm_agp_bind_pages(dev,
                                               obj_priv->pages,
-                                              page_count,
+                                              obj->size >> PAGE_SHIFT,
                                               obj_priv->gtt_offset,
                                               obj_priv->agp_type);
        if (obj_priv->agp_mem == NULL) {
                i915_gem_object_put_pages(obj);
                drm_mm_put_block(obj_priv->gtt_space);
                obj_priv->gtt_space = NULL;
-               return -ENOMEM;
+
+               ret = i915_gem_evict_something(dev, obj->size);
+               if (ret)
+                       return ret;
+
+               goto search_free;
        }
        atomic_inc(&dev->gtt_count);
        atomic_add(obj->size, &dev->gtt_memory);
@@ -2496,6 +2668,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
        BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
        BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
 
+       trace_i915_gem_object_bind(obj, obj_priv->gtt_offset);
+
        return 0;
 }
 
@@ -2511,15 +2685,7 @@ i915_gem_clflush_object(struct drm_gem_object *obj)
        if (obj_priv->pages == NULL)
                return;
 
-       /* XXX: The 865 in particular appears to be weird in how it handles
-        * cache flushing.  We haven't figured it out, but the
-        * clflush+agp_chipset_flush doesn't appear to successfully get the
-        * data visible to the PGU, while wbinvd + agp_chipset_flush does.
-        */
-       if (IS_I865G(obj->dev)) {
-               wbinvd();
-               return;
-       }
+       trace_i915_gem_object_clflush(obj);
 
        drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
 }
@@ -2530,21 +2696,29 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
 {
        struct drm_device *dev = obj->dev;
        uint32_t seqno;
+       uint32_t old_write_domain;
 
        if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
                return;
 
        /* Queue the GPU write cache flushing we need. */
+       old_write_domain = obj->write_domain;
        i915_gem_flush(dev, 0, obj->write_domain);
        seqno = i915_add_request(dev, NULL, obj->write_domain);
        obj->write_domain = 0;
        i915_gem_object_move_to_active(obj, seqno);
+
+       trace_i915_gem_object_change_domain(obj,
+                                           obj->read_domains,
+                                           old_write_domain);
 }
 
 /** Flushes the GTT write domain for the object if it's dirty. */
 static void
 i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
 {
+       uint32_t old_write_domain;
+
        if (obj->write_domain != I915_GEM_DOMAIN_GTT)
                return;
 
@@ -2552,7 +2726,12 @@ i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
         * to it immediately go to main memory as far as we know, so there's
         * no chipset flush.  It also doesn't land in render cache.
         */
+       old_write_domain = obj->write_domain;
        obj->write_domain = 0;
+
+       trace_i915_gem_object_change_domain(obj,
+                                           obj->read_domains,
+                                           old_write_domain);
 }
 
 /** Flushes the CPU write domain for the object if it's dirty. */
@@ -2560,13 +2739,19 @@ static void
 i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
 {
        struct drm_device *dev = obj->dev;
+       uint32_t old_write_domain;
 
        if (obj->write_domain != I915_GEM_DOMAIN_CPU)
                return;
 
        i915_gem_clflush_object(obj);
        drm_agp_chipset_flush(dev);
+       old_write_domain = obj->write_domain;
        obj->write_domain = 0;
+
+       trace_i915_gem_object_change_domain(obj,
+                                           obj->read_domains,
+                                           old_write_domain);
 }
 
 /**
@@ -2579,6 +2764,7 @@ int
 i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
 {
        struct drm_i915_gem_object *obj_priv = obj->driver_private;
+       uint32_t old_write_domain, old_read_domains;
        int ret;
 
        /* Not valid to be called on unbound objects. */
@@ -2591,6 +2777,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
        if (ret != 0)
                return ret;
 
+       old_write_domain = obj->write_domain;
+       old_read_domains = obj->read_domains;
+
        /* If we're writing through the GTT domain, then CPU and GPU caches
         * will need to be invalidated at next use.
         */
@@ -2609,6 +2798,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
                obj_priv->dirty = 1;
        }
 
+       trace_i915_gem_object_change_domain(obj,
+                                           old_read_domains,
+                                           old_write_domain);
+
        return 0;
 }
 
@@ -2621,6 +2814,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
 static int
 i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
 {
+       uint32_t old_write_domain, old_read_domains;
        int ret;
 
        i915_gem_object_flush_gpu_write_domain(obj);
@@ -2636,6 +2830,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
         */
        i915_gem_object_set_to_full_cpu_read_domain(obj);
 
+       old_write_domain = obj->write_domain;
+       old_read_domains = obj->read_domains;
+
        /* Flush the CPU cache if it's still invalid. */
        if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
                i915_gem_clflush_object(obj);
@@ -2656,6 +2853,10 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
                obj->write_domain = I915_GEM_DOMAIN_CPU;
        }
 
+       trace_i915_gem_object_change_domain(obj,
+                                           old_read_domains,
+                                           old_write_domain);
+
        return 0;
 }
 
@@ -2777,6 +2978,7 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
        struct drm_i915_gem_object      *obj_priv = obj->driver_private;
        uint32_t                        invalidate_domains = 0;
        uint32_t                        flush_domains = 0;
+       uint32_t                        old_read_domains;
 
        BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU);
        BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU);
@@ -2823,6 +3025,8 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
                i915_gem_clflush_object(obj);
        }
 
+       old_read_domains = obj->read_domains;
+
        /* The actual obj->write_domain will be updated with
         * pending_write_domain after we emit the accumulated flush for all
         * of our domain changes in execbuffers (which clears objects'
@@ -2841,6 +3045,10 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
                 obj->read_domains, obj->write_domain,
                 dev->invalidate_domains, dev->flush_domains);
 #endif
+
+       trace_i915_gem_object_change_domain(obj,
+                                           old_read_domains,
+                                           obj->write_domain);
 }
 
 /**
@@ -2893,6 +3101,7 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
                                          uint64_t offset, uint64_t size)
 {
        struct drm_i915_gem_object *obj_priv = obj->driver_private;
+       uint32_t old_read_domains;
        int i, ret;
 
        if (offset == 0 && size == obj->size)
@@ -2939,8 +3148,13 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
         */
        BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
 
+       old_read_domains = obj->read_domains;
        obj->read_domains |= I915_GEM_DOMAIN_CPU;
 
+       trace_i915_gem_object_change_domain(obj,
+                                           old_read_domains,
+                                           obj->write_domain);
+
        return 0;
 }
 
@@ -2984,6 +3198,21 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
                }
                target_obj_priv = target_obj->driver_private;
 
+#if WATCH_RELOC
+               DRM_INFO("%s: obj %p offset %08x target %d "
+                        "read %08x write %08x gtt %08x "
+                        "presumed %08x delta %08x\n",
+                        __func__,
+                        obj,
+                        (int) reloc->offset,
+                        (int) reloc->target_handle,
+                        (int) reloc->read_domains,
+                        (int) reloc->write_domain,
+                        (int) target_obj_priv->gtt_offset,
+                        (int) reloc->presumed_offset,
+                        reloc->delta);
+#endif
+
                /* The target buffer should have appeared before us in the
                 * exec_object list, so it should have a GTT space bound by now.
                 */
@@ -2995,25 +3224,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
                        return -EINVAL;
                }
 
-               if (reloc->offset > obj->size - 4) {
-                       DRM_ERROR("Relocation beyond object bounds: "
-                                 "obj %p target %d offset %d size %d.\n",
-                                 obj, reloc->target_handle,
-                                 (int) reloc->offset, (int) obj->size);
-                       drm_gem_object_unreference(target_obj);
-                       i915_gem_object_unpin(obj);
-                       return -EINVAL;
-               }
-               if (reloc->offset & 3) {
-                       DRM_ERROR("Relocation not 4-byte aligned: "
-                                 "obj %p target %d offset %d.\n",
-                                 obj, reloc->target_handle,
-                                 (int) reloc->offset);
-                       drm_gem_object_unreference(target_obj);
-                       i915_gem_object_unpin(obj);
-                       return -EINVAL;
-               }
-
+               /* Validate that the target is in a valid r/w GPU domain */
                if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
                    reloc->read_domains & I915_GEM_DOMAIN_CPU) {
                        DRM_ERROR("reloc with read/write CPU domains: "
@@ -3027,7 +3238,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
                        i915_gem_object_unpin(obj);
                        return -EINVAL;
                }
-
                if (reloc->write_domain && target_obj->pending_write_domain &&
                    reloc->write_domain != target_obj->pending_write_domain) {
                        DRM_ERROR("Write domain conflict: "
@@ -3042,21 +3252,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
                        return -EINVAL;
                }
 
-#if WATCH_RELOC
-               DRM_INFO("%s: obj %p offset %08x target %d "
-                        "read %08x write %08x gtt %08x "
-                        "presumed %08x delta %08x\n",
-                        __func__,
-                        obj,
-                        (int) reloc->offset,
-                        (int) reloc->target_handle,
-                        (int) reloc->read_domains,
-                        (int) reloc->write_domain,
-                        (int) target_obj_priv->gtt_offset,
-                        (int) reloc->presumed_offset,
-                        reloc->delta);
-#endif
-
                target_obj->pending_read_domains |= reloc->read_domains;
                target_obj->pending_write_domain |= reloc->write_domain;
 
@@ -3068,6 +3263,37 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
                        continue;
                }
 
+               /* Check that the relocation address is valid... */
+               if (reloc->offset > obj->size - 4) {
+                       DRM_ERROR("Relocation beyond object bounds: "
+                                 "obj %p target %d offset %d size %d.\n",
+                                 obj, reloc->target_handle,
+                                 (int) reloc->offset, (int) obj->size);
+                       drm_gem_object_unreference(target_obj);
+                       i915_gem_object_unpin(obj);
+                       return -EINVAL;
+               }
+               if (reloc->offset & 3) {
+                       DRM_ERROR("Relocation not 4-byte aligned: "
+                                 "obj %p target %d offset %d.\n",
+                                 obj, reloc->target_handle,
+                                 (int) reloc->offset);
+                       drm_gem_object_unreference(target_obj);
+                       i915_gem_object_unpin(obj);
+                       return -EINVAL;
+               }
+
+               /* and points to somewhere within the target object. */
+               if (reloc->delta >= target_obj->size) {
+                       DRM_ERROR("Relocation beyond target object bounds: "
+                                 "obj %p target %d delta %d size %d.\n",
+                                 obj, reloc->target_handle,
+                                 (int) reloc->delta, (int) target_obj->size);
+                       drm_gem_object_unreference(target_obj);
+                       i915_gem_object_unpin(obj);
+                       return -EINVAL;
+               }
+
                ret = i915_gem_object_set_to_gtt_domain(obj, 1);
                if (ret != 0) {
                        drm_gem_object_unreference(target_obj);
@@ -3126,6 +3352,8 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev,
        exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
        exec_len = (uint32_t) exec->batch_len;
 
+       trace_i915_gem_request_submit(dev, dev_priv->mm.next_gem_seqno);
+
        count = nbox ? nbox : 1;
 
        for (i = 0; i < count; i++) {
@@ -3363,7 +3591,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 
        i915_verify_inactive(dev, __FILE__, __LINE__);
 
-       if (dev_priv->mm.wedged) {
+       if (atomic_read(&dev_priv->mm.wedged)) {
                DRM_ERROR("Execbuf while wedged\n");
                mutex_unlock(&dev->struct_mutex);
                ret = -EIO;
@@ -3421,8 +3649,23 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 
                /* error other than GTT full, or we've already tried again */
                if (ret != -ENOSPC || pin_tries >= 1) {
-                       if (ret != -ERESTARTSYS)
-                               DRM_ERROR("Failed to pin buffers %d\n", ret);
+                       if (ret != -ERESTARTSYS) {
+                               unsigned long long total_size = 0;
+                               for (i = 0; i < args->buffer_count; i++)
+                                       total_size += object_list[i]->size;
+                               DRM_ERROR("Failed to pin buffer %d of %d, total %llu bytes: %d\n",
+                                         pinned+1, args->buffer_count,
+                                         total_size, ret);
+                               DRM_ERROR("%d objects [%d pinned], "
+                                         "%d object bytes [%d pinned], "
+                                         "%d/%d gtt bytes\n",
+                                         atomic_read(&dev->object_count),
+                                         atomic_read(&dev->pin_count),
+                                         atomic_read(&dev->object_memory),
+                                         atomic_read(&dev->pin_memory),
+                                         atomic_read(&dev->gtt_memory),
+                                         dev->gtt_total);
+                       }
                        goto err;
                }
 
@@ -3433,7 +3676,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 
                /* evict everyone we can from the aperture */
                ret = i915_gem_evict_everything(dev);
-               if (ret)
+               if (ret && ret != -ENOSPC)
                        goto err;
        }
 
@@ -3489,8 +3732,12 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 
        for (i = 0; i < args->buffer_count; i++) {
                struct drm_gem_object *obj = object_list[i];
+               uint32_t old_write_domain = obj->write_domain;
 
                obj->write_domain = obj->pending_write_domain;
+               trace_i915_gem_object_change_domain(obj,
+                                                   obj->read_domains,
+                                                   old_write_domain);
        }
 
        i915_verify_inactive(dev, __FILE__, __LINE__);
@@ -3607,11 +3854,8 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
        i915_verify_inactive(dev, __FILE__, __LINE__);
        if (obj_priv->gtt_space == NULL) {
                ret = i915_gem_object_bind_to_gtt(obj, alignment);
-               if (ret != 0) {
-                       if (ret != -EBUSY && ret != -ERESTARTSYS)
-                               DRM_ERROR("Failure to bind: %d\n", ret);
+               if (ret)
                        return ret;
-               }
        }
        /*
         * Pre-965 chips need a fence register set up in order to
@@ -3691,6 +3935,13 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
        }
        obj_priv = obj->driver_private;
 
+       if (obj_priv->madv != I915_MADV_WILLNEED) {
+               DRM_ERROR("Attempting to pin a purgeable buffer\n");
+               drm_gem_object_unreference(obj);
+               mutex_unlock(&dev->struct_mutex);
+               return -EINVAL;
+       }
+
        if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
                DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
                          args->handle);
@@ -3803,6 +4054,56 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
     return i915_gem_ring_throttle(dev, file_priv);
 }
 
+int
+i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
+                      struct drm_file *file_priv)
+{
+       struct drm_i915_gem_madvise *args = data;
+       struct drm_gem_object *obj;
+       struct drm_i915_gem_object *obj_priv;
+
+       switch (args->madv) {
+       case I915_MADV_DONTNEED:
+       case I915_MADV_WILLNEED:
+           break;
+       default:
+           return -EINVAL;
+       }
+
+       obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+       if (obj == NULL) {
+               DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n",
+                         args->handle);
+               return -EBADF;
+       }
+
+       mutex_lock(&dev->struct_mutex);
+       obj_priv = obj->driver_private;
+
+       if (obj_priv->pin_count) {
+               drm_gem_object_unreference(obj);
+               mutex_unlock(&dev->struct_mutex);
+
+               DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n");
+               return -EINVAL;
+       }
+
+       if (obj_priv->madv != __I915_MADV_PURGED)
+               obj_priv->madv = args->madv;
+
+       /* if the object is no longer bound, discard its backing storage */
+       if (i915_gem_object_is_purgeable(obj_priv) &&
+           obj_priv->gtt_space == NULL)
+               i915_gem_object_truncate(obj);
+
+       args->retained = obj_priv->madv != __I915_MADV_PURGED;
+
+       drm_gem_object_unreference(obj);
+       mutex_unlock(&dev->struct_mutex);
+
+       return 0;
+}
+
 int i915_gem_init_object(struct drm_gem_object *obj)
 {
        struct drm_i915_gem_object *obj_priv;
@@ -3827,6 +4128,9 @@ int i915_gem_init_object(struct drm_gem_object *obj)
        obj_priv->fence_reg = I915_FENCE_REG_NONE;
        INIT_LIST_HEAD(&obj_priv->list);
        INIT_LIST_HEAD(&obj_priv->fence_list);
+       obj_priv->madv = I915_MADV_WILLNEED;
+
+       trace_i915_gem_object_create(obj);
 
        return 0;
 }
@@ -3836,6 +4140,8 @@ void i915_gem_free_object(struct drm_gem_object *obj)
        struct drm_device *dev = obj->dev;
        struct drm_i915_gem_object *obj_priv = obj->driver_private;
 
+       trace_i915_gem_object_destroy(obj);
+
        while (obj_priv->pin_count > 0)
                i915_gem_object_unpin(obj);
 
@@ -3844,43 +4150,35 @@ void i915_gem_free_object(struct drm_gem_object *obj)
 
        i915_gem_object_unbind(obj);
 
-       i915_gem_free_mmap_offset(obj);
+       if (obj_priv->mmap_offset)
+               i915_gem_free_mmap_offset(obj);
 
        kfree(obj_priv->page_cpu_valid);
        kfree(obj_priv->bit_17);
        kfree(obj->driver_private);
 }
 
-/** Unbinds all objects that are on the given buffer list. */
+/** Unbinds all inactive objects. */
 static int
-i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head)
+i915_gem_evict_from_inactive_list(struct drm_device *dev)
 {
-       struct drm_gem_object *obj;
-       struct drm_i915_gem_object *obj_priv;
-       int ret;
+       drm_i915_private_t *dev_priv = dev->dev_private;
 
-       while (!list_empty(head)) {
-               obj_priv = list_first_entry(head,
-                                           struct drm_i915_gem_object,
-                                           list);
-               obj = obj_priv->obj;
+       while (!list_empty(&dev_priv->mm.inactive_list)) {
+               struct drm_gem_object *obj;
+               int ret;
 
-               if (obj_priv->pin_count != 0) {
-                       DRM_ERROR("Pinned object in unbind list\n");
-                       mutex_unlock(&dev->struct_mutex);
-                       return -EINVAL;
-               }
+               obj = list_first_entry(&dev_priv->mm.inactive_list,
+                                      struct drm_i915_gem_object,
+                                      list)->obj;
 
                ret = i915_gem_object_unbind(obj);
                if (ret != 0) {
-                       DRM_ERROR("Error unbinding object in LeaveVT: %d\n",
-                                 ret);
-                       mutex_unlock(&dev->struct_mutex);
+                       DRM_ERROR("Error unbinding object: %d\n", ret);
                        return ret;
                }
        }
 
-
        return 0;
 }
 
@@ -3902,6 +4200,7 @@ i915_gem_idle(struct drm_device *dev)
         * We need to replace this with a semaphore, or something.
         */
        dev_priv->mm.suspended = 1;
+       del_timer(&dev_priv->hangcheck_timer);
 
        /* Cancel the retire work handler, wait for it to finish if running
         */
@@ -3931,7 +4230,7 @@ i915_gem_idle(struct drm_device *dev)
                if (last_seqno == cur_seqno) {
                        if (stuck++ > 100) {
                                DRM_ERROR("hardware wedged\n");
-                               dev_priv->mm.wedged = 1;
+                               atomic_set(&dev_priv->mm.wedged, 1);
                                DRM_WAKEUP(&dev_priv->irq_queue);
                                break;
                        }
@@ -3944,7 +4243,7 @@ i915_gem_idle(struct drm_device *dev)
        i915_gem_retire_requests(dev);
 
        spin_lock(&dev_priv->mm.active_list_lock);
-       if (!dev_priv->mm.wedged) {
+       if (!atomic_read(&dev_priv->mm.wedged)) {
                /* Active and flushing should now be empty as we've
                 * waited for a sequence higher than any pending execbuffer
                 */
@@ -3962,29 +4261,41 @@ i915_gem_idle(struct drm_device *dev)
         * the GPU domains and just stuff them onto inactive.
         */
        while (!list_empty(&dev_priv->mm.active_list)) {
-               struct drm_i915_gem_object *obj_priv;
+               struct drm_gem_object *obj;
+               uint32_t old_write_domain;
 
-               obj_priv = list_first_entry(&dev_priv->mm.active_list,
-                                           struct drm_i915_gem_object,
-                                           list);
-               obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
-               i915_gem_object_move_to_inactive(obj_priv->obj);
+               obj = list_first_entry(&dev_priv->mm.active_list,
+                                      struct drm_i915_gem_object,
+                                      list)->obj;
+               old_write_domain = obj->write_domain;
+               obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
+               i915_gem_object_move_to_inactive(obj);
+
+               trace_i915_gem_object_change_domain(obj,
+                                                   obj->read_domains,
+                                                   old_write_domain);
        }
        spin_unlock(&dev_priv->mm.active_list_lock);
 
        while (!list_empty(&dev_priv->mm.flushing_list)) {
-               struct drm_i915_gem_object *obj_priv;
+               struct drm_gem_object *obj;
+               uint32_t old_write_domain;
 
-               obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
-                                           struct drm_i915_gem_object,
-                                           list);
-               obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
-               i915_gem_object_move_to_inactive(obj_priv->obj);
+               obj = list_first_entry(&dev_priv->mm.flushing_list,
+                                      struct drm_i915_gem_object,
+                                      list)->obj;
+               old_write_domain = obj->write_domain;
+               obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
+               i915_gem_object_move_to_inactive(obj);
+
+               trace_i915_gem_object_change_domain(obj,
+                                                   obj->read_domains,
+                                                   old_write_domain);
        }
 
 
        /* Move all inactive buffers out of the GTT. */
-       ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list);
+       ret = i915_gem_evict_from_inactive_list(dev);
        WARN_ON(!list_empty(&dev_priv->mm.inactive_list));
        if (ret) {
                mutex_unlock(&dev->struct_mutex);
@@ -4206,9 +4517,9 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
        if (drm_core_check_feature(dev, DRIVER_MODESET))
                return 0;
 
-       if (dev_priv->mm.wedged) {
+       if (atomic_read(&dev_priv->mm.wedged)) {
                DRM_ERROR("Reenabling wedged hardware, good luck\n");
-               dev_priv->mm.wedged = 0;
+               atomic_set(&dev_priv->mm.wedged, 0);
        }
 
        mutex_lock(&dev->struct_mutex);
@@ -4274,6 +4585,10 @@ i915_gem_load(struct drm_device *dev)
                          i915_gem_retire_work_handler);
        dev_priv->mm.next_gem_seqno = 1;
 
+       spin_lock(&shrink_list_lock);
+       list_add(&dev_priv->mm.shrink_list, &shrink_list);
+       spin_unlock(&shrink_list_lock);
+
        /* Old X drivers will take 0-2 for front, back, depth buffers */
        dev_priv->fence_reg_start = 3;
 
@@ -4491,3 +4806,116 @@ void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
                list_del_init(i915_file_priv->mm.request_list.next);
        mutex_unlock(&dev->struct_mutex);
 }
+
+static int
+i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask)
+{
+       drm_i915_private_t *dev_priv, *next_dev;
+       struct drm_i915_gem_object *obj_priv, *next_obj;
+       int cnt = 0;
+       int would_deadlock = 1;
+
+       /* "fast-path" to count number of available objects */
+       if (nr_to_scan == 0) {
+               spin_lock(&shrink_list_lock);
+               list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
+                       struct drm_device *dev = dev_priv->dev;
+
+                       if (mutex_trylock(&dev->struct_mutex)) {
+                               list_for_each_entry(obj_priv,
+                                                   &dev_priv->mm.inactive_list,
+                                                   list)
+                                       cnt++;
+                               mutex_unlock(&dev->struct_mutex);
+                       }
+               }
+               spin_unlock(&shrink_list_lock);
+
+               return (cnt / 100) * sysctl_vfs_cache_pressure;
+       }
+
+       spin_lock(&shrink_list_lock);
+
+       /* first scan for clean buffers */
+       list_for_each_entry_safe(dev_priv, next_dev,
+                                &shrink_list, mm.shrink_list) {
+               struct drm_device *dev = dev_priv->dev;
+
+               if (! mutex_trylock(&dev->struct_mutex))
+                       continue;
+
+               spin_unlock(&shrink_list_lock);
+
+               i915_gem_retire_requests(dev);
+
+               list_for_each_entry_safe(obj_priv, next_obj,
+                                        &dev_priv->mm.inactive_list,
+                                        list) {
+                       if (i915_gem_object_is_purgeable(obj_priv)) {
+                               i915_gem_object_unbind(obj_priv->obj);
+                               if (--nr_to_scan <= 0)
+                                       break;
+                       }
+               }
+
+               spin_lock(&shrink_list_lock);
+               mutex_unlock(&dev->struct_mutex);
+
+               would_deadlock = 0;
+
+               if (nr_to_scan <= 0)
+                       break;
+       }
+
+       /* second pass, evict/count anything still on the inactive list */
+       list_for_each_entry_safe(dev_priv, next_dev,
+                                &shrink_list, mm.shrink_list) {
+               struct drm_device *dev = dev_priv->dev;
+
+               if (! mutex_trylock(&dev->struct_mutex))
+                       continue;
+
+               spin_unlock(&shrink_list_lock);
+
+               list_for_each_entry_safe(obj_priv, next_obj,
+                                        &dev_priv->mm.inactive_list,
+                                        list) {
+                       if (nr_to_scan > 0) {
+                               i915_gem_object_unbind(obj_priv->obj);
+                               nr_to_scan--;
+                       } else
+                               cnt++;
+               }
+
+               spin_lock(&shrink_list_lock);
+               mutex_unlock(&dev->struct_mutex);
+
+               would_deadlock = 0;
+       }
+
+       spin_unlock(&shrink_list_lock);
+
+       if (would_deadlock)
+               return -1;
+       else if (cnt > 0)
+               return (cnt / 100) * sysctl_vfs_cache_pressure;
+       else
+               return 0;
+}
+
+static struct shrinker shrinker = {
+       .shrink = i915_gem_shrink,
+       .seeks = DEFAULT_SEEKS,
+};
+
+__init void
+i915_gem_shrinker_init(void)
+{
+    register_shrinker(&shrinker);
+}
+
+__exit void
+i915_gem_shrinker_exit(void)
+{
+    unregister_shrinker(&shrinker);
+}
index 6c89f2f..4dfeec7 100644 (file)
@@ -31,6 +31,7 @@
 #include "drm.h"
 #include "i915_drm.h"
 #include "i915_drv.h"
+#include "i915_trace.h"
 #include "intel_drv.h"
 
 #define MAX_NOPID ((u32)~0)
@@ -279,7 +280,9 @@ irqreturn_t igdng_irq_handler(struct drm_device *dev)
                }
 
                if (gt_iir & GT_USER_INTERRUPT) {
-                       dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev);
+                       u32 seqno = i915_get_gem_seqno(dev);
+                       dev_priv->mm.irq_gem_seqno = seqno;
+                       trace_i915_gem_request_complete(dev, seqno);
                        DRM_WAKEUP(&dev_priv->irq_queue);
                }
 
@@ -302,12 +305,25 @@ static void i915_error_work_func(struct work_struct *work)
        drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
                                                    error_work);
        struct drm_device *dev = dev_priv->dev;
-       char *event_string = "ERROR=1";
-       char *envp[] = { event_string, NULL };
+       char *error_event[] = { "ERROR=1", NULL };
+       char *reset_event[] = { "RESET=1", NULL };
+       char *reset_done_event[] = { "ERROR=0", NULL };
 
        DRM_DEBUG("generating error event\n");
-
-       kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, envp);
+       kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event);
+
+       if (atomic_read(&dev_priv->mm.wedged)) {
+               if (IS_I965G(dev)) {
+                       DRM_DEBUG("resetting chip\n");
+                       kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_event);
+                       if (!i965_reset(dev, GDRST_RENDER)) {
+                               atomic_set(&dev_priv->mm.wedged, 0);
+                               kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_done_event);
+                       }
+               } else {
+                       printk("reboot required\n");
+               }
+       }
 }
 
 /**
@@ -372,7 +388,7 @@ out:
  * so userspace knows something bad happened (should trigger collection
  * of a ring dump etc.).
  */
-static void i915_handle_error(struct drm_device *dev)
+static void i915_handle_error(struct drm_device *dev, bool wedged)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        u32 eir = I915_READ(EIR);
@@ -482,6 +498,16 @@ static void i915_handle_error(struct drm_device *dev)
                I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
        }
 
+       if (wedged) {
+               atomic_set(&dev_priv->mm.wedged, 1);
+
+               /*
+                * Wakeup waiting processes so they don't hang
+                */
+               printk("i915: Waking up sleeping processes\n");
+               DRM_WAKEUP(&dev_priv->irq_queue);
+       }
+
        queue_work(dev_priv->wq, &dev_priv->error_work);
 }
 
@@ -527,7 +553,7 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
                pipeb_stats = I915_READ(PIPEBSTAT);
 
                if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
-                       i915_handle_error(dev);
+                       i915_handle_error(dev, false);
 
                /*
                 * Clear the PIPE(A|B)STAT regs before the IIR
@@ -599,8 +625,12 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
                }
 
                if (iir & I915_USER_INTERRUPT) {
-                       dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev);
+                       u32 seqno = i915_get_gem_seqno(dev);
+                       dev_priv->mm.irq_gem_seqno = seqno;
+                       trace_i915_gem_request_complete(dev, seqno);
                        DRM_WAKEUP(&dev_priv->irq_queue);
+                       dev_priv->hangcheck_count = 0;
+                       mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
                }
 
                if (pipea_stats & vblank_status) {
@@ -880,6 +910,52 @@ int i915_vblank_swap(struct drm_device *dev, void *data,
        return -EINVAL;
 }
 
+struct drm_i915_gem_request *i915_get_tail_request(struct drm_device *dev) {
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       return list_entry(dev_priv->mm.request_list.prev, struct drm_i915_gem_request, list);
+}
+
+/**
+ * This is called when the chip hasn't reported back with completed
+ * batchbuffers in a long time. The first time this is called we simply record
+ * ACTHD. If ACTHD hasn't changed by the time the hangcheck timer elapses
+ * again, we assume the chip is wedged and try to fix it.
+ */
+void i915_hangcheck_elapsed(unsigned long data)
+{
+       struct drm_device *dev = (struct drm_device *)data;
+       drm_i915_private_t *dev_priv = dev->dev_private;
+       uint32_t acthd;
+       
+       if (!IS_I965G(dev))
+               acthd = I915_READ(ACTHD);
+       else
+               acthd = I915_READ(ACTHD_I965);
+
+       /* If all work is done then ACTHD clearly hasn't advanced. */
+       if (list_empty(&dev_priv->mm.request_list) ||
+                      i915_seqno_passed(i915_get_gem_seqno(dev), i915_get_tail_request(dev)->seqno)) {
+               dev_priv->hangcheck_count = 0;
+               return;
+       }
+
+       if (dev_priv->last_acthd == acthd && dev_priv->hangcheck_count > 0) {
+               DRM_ERROR("Hangcheck timer elapsed... GPU hung\n");
+               i915_handle_error(dev, true);
+               return;
+       } 
+
+       /* Reset timer case chip hangs without another request being added */
+       mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
+
+       if (acthd != dev_priv->last_acthd)
+               dev_priv->hangcheck_count = 0;
+       else
+               dev_priv->hangcheck_count++;
+
+       dev_priv->last_acthd = acthd;
+}
+
 /* drm_dma.h hooks
 */
 static void igdng_irq_preinstall(struct drm_device *dev)
index e4b4e88..2d51935 100644 (file)
@@ -148,6 +148,7 @@ static u32 asle_set_backlight(struct drm_device *dev, u32 bclp)
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct opregion_asle *asle = dev_priv->opregion.asle;
        u32 blc_pwm_ctl, blc_pwm_ctl2;
+       u32 max_backlight, level, shift;
 
        if (!(bclp & ASLE_BCLP_VALID))
                return ASLE_BACKLIGHT_FAIL;
@@ -157,14 +158,25 @@ static u32 asle_set_backlight(struct drm_device *dev, u32 bclp)
                return ASLE_BACKLIGHT_FAIL;
 
        blc_pwm_ctl = I915_READ(BLC_PWM_CTL);
-       blc_pwm_ctl &= ~BACKLIGHT_DUTY_CYCLE_MASK;
        blc_pwm_ctl2 = I915_READ(BLC_PWM_CTL2);
 
-       if (blc_pwm_ctl2 & BLM_COMBINATION_MODE)
+       if (IS_I965G(dev) && (blc_pwm_ctl2 & BLM_COMBINATION_MODE))
                pci_write_config_dword(dev->pdev, PCI_LBPC, bclp);
-       else
-               I915_WRITE(BLC_PWM_CTL, blc_pwm_ctl | ((bclp * 0x101)-1));
-
+       else {
+               if (IS_IGD(dev)) {
+                       blc_pwm_ctl &= ~(BACKLIGHT_DUTY_CYCLE_MASK - 1);
+                       max_backlight = (blc_pwm_ctl & BACKLIGHT_MODULATION_FREQ_MASK) >> 
+                                       BACKLIGHT_MODULATION_FREQ_SHIFT;
+                       shift = BACKLIGHT_DUTY_CYCLE_SHIFT + 1;
+               } else {
+                       blc_pwm_ctl &= ~BACKLIGHT_DUTY_CYCLE_MASK;
+                       max_backlight = ((blc_pwm_ctl & BACKLIGHT_MODULATION_FREQ_MASK) >> 
+                                       BACKLIGHT_MODULATION_FREQ_SHIFT) * 2;
+                       shift = BACKLIGHT_DUTY_CYCLE_SHIFT;
+               }
+               level = (bclp * max_backlight) / 255;
+               I915_WRITE(BLC_PWM_CTL, blc_pwm_ctl | (level << shift));
+       }
        asle->cblv = (bclp*0x64)/0xff | ASLE_CBLV_VALID;
 
        return 0;
index 3f79635..0466ddb 100644 (file)
 #define   I915_GC_RENDER_CLOCK_200_MHZ (1 << 0)
 #define   I915_GC_RENDER_CLOCK_333_MHZ (4 << 0)
 #define LBB    0xf4
+#define GDRST 0xc0
+#define  GDRST_FULL    (0<<2)
+#define  GDRST_RENDER  (1<<2)
+#define  GDRST_MEDIA   (3<<2)
 
 /* VGA stuff */
 
 #define   FBC_CTL_PLANEA       (0<<0)
 #define   FBC_CTL_PLANEB       (1<<0)
 #define FBC_FENCE_OFF          0x0321b
+#define FBC_TAG                        0x03300
 
 #define FBC_LL_SIZE            (1536)
 
+/* Framebuffer compression for GM45+ */
+#define DPFC_CB_BASE           0x3200
+#define DPFC_CONTROL           0x3208
+#define   DPFC_CTL_EN          (1<<31)
+#define   DPFC_CTL_PLANEA      (0<<30)
+#define   DPFC_CTL_PLANEB      (1<<30)
+#define   DPFC_CTL_FENCE_EN    (1<<29)
+#define   DPFC_SR_EN           (1<<10)
+#define   DPFC_CTL_LIMIT_1X    (0<<6)
+#define   DPFC_CTL_LIMIT_2X    (1<<6)
+#define   DPFC_CTL_LIMIT_4X    (2<<6)
+#define DPFC_RECOMP_CTL                0x320c
+#define   DPFC_RECOMP_STALL_EN (1<<27)
+#define   DPFC_RECOMP_STALL_WM_SHIFT (16)
+#define   DPFC_RECOMP_STALL_WM_MASK (0x07ff0000)
+#define   DPFC_RECOMP_TIMER_COUNT_SHIFT (0)
+#define   DPFC_RECOMP_TIMER_COUNT_MASK (0x0000003f)
+#define DPFC_STATUS            0x3210
+#define   DPFC_INVAL_SEG_SHIFT  (16)
+#define   DPFC_INVAL_SEG_MASK  (0x07ff0000)
+#define   DPFC_COMP_SEG_SHIFT  (0)
+#define   DPFC_COMP_SEG_MASK   (0x000003ff)
+#define DPFC_STATUS2           0x3214
+#define DPFC_FENCE_YOFF                0x3218
+#define DPFC_CHICKEN           0x3224
+#define   DPFC_HT_MODIFY       (1<<31)
+
 /*
  * GPIO regs
  */
 #define  PF_ENABLE              (1<<31)
 #define PFA_WIN_SZ             0x68074
 #define PFB_WIN_SZ             0x68874
+#define PFA_WIN_POS            0x68070
+#define PFB_WIN_POS            0x68870
 
 /* legacy palette */
 #define LGC_PALETTE_A           0x4a000
index 20d4d19..bd6d8d9 100644 (file)
@@ -228,6 +228,7 @@ static void i915_save_modeset_reg(struct drm_device *dev)
 
        if (drm_core_check_feature(dev, DRIVER_MODESET))
                return;
+
        /* Pipe & plane A info */
        dev_priv->savePIPEACONF = I915_READ(PIPEACONF);
        dev_priv->savePIPEASRC = I915_READ(PIPEASRC);
@@ -285,6 +286,7 @@ static void i915_save_modeset_reg(struct drm_device *dev)
        dev_priv->savePIPEBSTAT = I915_READ(PIPEBSTAT);
        return;
 }
+
 static void i915_restore_modeset_reg(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -379,19 +381,10 @@ static void i915_restore_modeset_reg(struct drm_device *dev)
 
        return;
 }
-int i915_save_state(struct drm_device *dev)
+
+void i915_save_display(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       int i;
-
-       pci_read_config_byte(dev->pdev, LBB, &dev_priv->saveLBB);
-
-       /* Render Standby */
-       if (IS_I965G(dev) && IS_MOBILE(dev))
-               dev_priv->saveRENDERSTANDBY = I915_READ(MCHBAR_RENDER_STANDBY);
-
-       /* Hardware status page */
-       dev_priv->saveHWS = I915_READ(HWS_PGA);
 
        /* Display arbitration control */
        dev_priv->saveDSPARB = I915_READ(DSPARB);
@@ -399,6 +392,7 @@ int i915_save_state(struct drm_device *dev)
        /* This is only meaningful in non-KMS mode */
        /* Don't save them in KMS mode */
        i915_save_modeset_reg(dev);
+
        /* Cursor state */
        dev_priv->saveCURACNTR = I915_READ(CURACNTR);
        dev_priv->saveCURAPOS = I915_READ(CURAPOS);
@@ -448,81 +442,22 @@ int i915_save_state(struct drm_device *dev)
        dev_priv->saveFBC_CONTROL2 = I915_READ(FBC_CONTROL2);
        dev_priv->saveFBC_CONTROL = I915_READ(FBC_CONTROL);
 
-       /* Interrupt state */
-       dev_priv->saveIIR = I915_READ(IIR);
-       dev_priv->saveIER = I915_READ(IER);
-       dev_priv->saveIMR = I915_READ(IMR);
-
        /* VGA state */
        dev_priv->saveVGA0 = I915_READ(VGA0);
        dev_priv->saveVGA1 = I915_READ(VGA1);
        dev_priv->saveVGA_PD = I915_READ(VGA_PD);
        dev_priv->saveVGACNTRL = I915_READ(VGACNTRL);
 
-       /* Clock gating state */
-       dev_priv->saveD_STATE = I915_READ(D_STATE);
-       dev_priv->saveDSPCLK_GATE_D = I915_READ(DSPCLK_GATE_D);
-
-       /* Cache mode state */
-       dev_priv->saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0);
-
-       /* Memory Arbitration state */
-       dev_priv->saveMI_ARB_STATE = I915_READ(MI_ARB_STATE);
-
-       /* Scratch space */
-       for (i = 0; i < 16; i++) {
-               dev_priv->saveSWF0[i] = I915_READ(SWF00 + (i << 2));
-               dev_priv->saveSWF1[i] = I915_READ(SWF10 + (i << 2));
-       }
-       for (i = 0; i < 3; i++)
-               dev_priv->saveSWF2[i] = I915_READ(SWF30 + (i << 2));
-
-       /* Fences */
-       if (IS_I965G(dev)) {
-               for (i = 0; i < 16; i++)
-                       dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_965_0 + (i * 8));
-       } else {
-               for (i = 0; i < 8; i++)
-                       dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4));
-
-               if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
-                       for (i = 0; i < 8; i++)
-                               dev_priv->saveFENCE[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4));
-       }
        i915_save_vga(dev);
-
-       return 0;
 }
 
-int i915_restore_state(struct drm_device *dev)
+void i915_restore_display(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
-       int i;
-
-       pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB);
-
-       /* Render Standby */
-       if (IS_I965G(dev) && IS_MOBILE(dev))
-               I915_WRITE(MCHBAR_RENDER_STANDBY, dev_priv->saveRENDERSTANDBY);
-
-       /* Hardware status page */
-       I915_WRITE(HWS_PGA, dev_priv->saveHWS);
 
        /* Display arbitration */
        I915_WRITE(DSPARB, dev_priv->saveDSPARB);
 
-       /* Fences */
-       if (IS_I965G(dev)) {
-               for (i = 0; i < 16; i++)
-                       I915_WRITE64(FENCE_REG_965_0 + (i * 8), dev_priv->saveFENCE[i]);
-       } else {
-               for (i = 0; i < 8; i++)
-                       I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]);
-               if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
-                       for (i = 0; i < 8; i++)
-                               I915_WRITE(FENCE_REG_945_8 + (i * 4), dev_priv->saveFENCE[i+8]);
-       }
-       
        /* Display port ratios (must be done before clock is set) */
        if (SUPPORTS_INTEGRATED_DP(dev)) {
                I915_WRITE(PIPEA_GMCH_DATA_M, dev_priv->savePIPEA_GMCH_DATA_M);
@@ -534,9 +469,11 @@ int i915_restore_state(struct drm_device *dev)
                I915_WRITE(PIPEA_DP_LINK_N, dev_priv->savePIPEA_DP_LINK_N);
                I915_WRITE(PIPEB_DP_LINK_N, dev_priv->savePIPEB_DP_LINK_N);
        }
+
        /* This is only meaningful in non-KMS mode */
        /* Don't restore them in KMS mode */
        i915_restore_modeset_reg(dev);
+
        /* Cursor state */
        I915_WRITE(CURAPOS, dev_priv->saveCURAPOS);
        I915_WRITE(CURACNTR, dev_priv->saveCURACNTR);
@@ -586,6 +523,95 @@ int i915_restore_state(struct drm_device *dev)
        I915_WRITE(VGA_PD, dev_priv->saveVGA_PD);
        DRM_UDELAY(150);
 
+       i915_restore_vga(dev);
+}
+
+int i915_save_state(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       int i;
+
+       pci_read_config_byte(dev->pdev, LBB, &dev_priv->saveLBB);
+
+       /* Render Standby */
+       if (IS_I965G(dev) && IS_MOBILE(dev))
+               dev_priv->saveRENDERSTANDBY = I915_READ(MCHBAR_RENDER_STANDBY);
+
+       /* Hardware status page */
+       dev_priv->saveHWS = I915_READ(HWS_PGA);
+
+       i915_save_display(dev);
+
+       /* Interrupt state */
+       dev_priv->saveIER = I915_READ(IER);
+       dev_priv->saveIMR = I915_READ(IMR);
+
+       /* Clock gating state */
+       dev_priv->saveD_STATE = I915_READ(D_STATE);
+       dev_priv->saveDSPCLK_GATE_D = I915_READ(DSPCLK_GATE_D); /* Not sure about this */
+
+       /* Cache mode state */
+       dev_priv->saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0);
+
+       /* Memory Arbitration state */
+       dev_priv->saveMI_ARB_STATE = I915_READ(MI_ARB_STATE);
+
+       /* Scratch space */
+       for (i = 0; i < 16; i++) {
+               dev_priv->saveSWF0[i] = I915_READ(SWF00 + (i << 2));
+               dev_priv->saveSWF1[i] = I915_READ(SWF10 + (i << 2));
+       }
+       for (i = 0; i < 3; i++)
+               dev_priv->saveSWF2[i] = I915_READ(SWF30 + (i << 2));
+
+       /* Fences */
+       if (IS_I965G(dev)) {
+               for (i = 0; i < 16; i++)
+                       dev_priv->saveFENCE[i] = I915_READ64(FENCE_REG_965_0 + (i * 8));
+       } else {
+               for (i = 0; i < 8; i++)
+                       dev_priv->saveFENCE[i] = I915_READ(FENCE_REG_830_0 + (i * 4));
+
+               if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
+                       for (i = 0; i < 8; i++)
+                               dev_priv->saveFENCE[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4));
+       }
+
+       return 0;
+}
+
+int i915_restore_state(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       int i;
+
+       pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB);
+
+       /* Render Standby */
+       if (IS_I965G(dev) && IS_MOBILE(dev))
+               I915_WRITE(MCHBAR_RENDER_STANDBY, dev_priv->saveRENDERSTANDBY);
+
+       /* Hardware status page */
+       I915_WRITE(HWS_PGA, dev_priv->saveHWS);
+
+       /* Fences */
+       if (IS_I965G(dev)) {
+               for (i = 0; i < 16; i++)
+                       I915_WRITE64(FENCE_REG_965_0 + (i * 8), dev_priv->saveFENCE[i]);
+       } else {
+               for (i = 0; i < 8; i++)
+                       I915_WRITE(FENCE_REG_830_0 + (i * 4), dev_priv->saveFENCE[i]);
+               if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
+                       for (i = 0; i < 8; i++)
+                               I915_WRITE(FENCE_REG_945_8 + (i * 4), dev_priv->saveFENCE[i+8]);
+       }
+
+       i915_restore_display(dev);
+
+       /* Interrupt state */
+       I915_WRITE (IER, dev_priv->saveIER);
+       I915_WRITE (IMR,  dev_priv->saveIMR);
+
        /* Clock gating state */
        I915_WRITE (D_STATE, dev_priv->saveD_STATE);
        I915_WRITE (DSPCLK_GATE_D, dev_priv->saveDSPCLK_GATE_D);
@@ -603,8 +629,6 @@ int i915_restore_state(struct drm_device *dev)
        for (i = 0; i < 3; i++)
                I915_WRITE(SWF30 + (i << 2), dev_priv->saveSWF2[i]);
 
-       i915_restore_vga(dev);
-
        return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
new file mode 100644 (file)
index 0000000..5567a40
--- /dev/null
@@ -0,0 +1,315 @@
+#if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _I915_TRACE_H_
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#include <drm/drmP.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM i915
+#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM)
+#define TRACE_INCLUDE_FILE i915_trace
+
+/* object tracking */
+
+TRACE_EVENT(i915_gem_object_create,
+
+           TP_PROTO(struct drm_gem_object *obj),
+
+           TP_ARGS(obj),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_gem_object *, obj)
+                            __field(u32, size)
+                            ),
+
+           TP_fast_assign(
+                          __entry->obj = obj;
+                          __entry->size = obj->size;
+                          ),
+
+           TP_printk("obj=%p, size=%u", __entry->obj, __entry->size)
+);
+
+TRACE_EVENT(i915_gem_object_bind,
+
+           TP_PROTO(struct drm_gem_object *obj, u32 gtt_offset),
+
+           TP_ARGS(obj, gtt_offset),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_gem_object *, obj)
+                            __field(u32, gtt_offset)
+                            ),
+
+           TP_fast_assign(
+                          __entry->obj = obj;
+                          __entry->gtt_offset = gtt_offset;
+                          ),
+
+           TP_printk("obj=%p, gtt_offset=%08x",
+                     __entry->obj, __entry->gtt_offset)
+);
+
+TRACE_EVENT(i915_gem_object_clflush,
+
+           TP_PROTO(struct drm_gem_object *obj),
+
+           TP_ARGS(obj),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_gem_object *, obj)
+                            ),
+
+           TP_fast_assign(
+                          __entry->obj = obj;
+                          ),
+
+           TP_printk("obj=%p", __entry->obj)
+);
+
+TRACE_EVENT(i915_gem_object_change_domain,
+
+           TP_PROTO(struct drm_gem_object *obj, uint32_t old_read_domains, uint32_t old_write_domain),
+
+           TP_ARGS(obj, old_read_domains, old_write_domain),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_gem_object *, obj)
+                            __field(u32, read_domains)
+                            __field(u32, write_domain)
+                            ),
+
+           TP_fast_assign(
+                          __entry->obj = obj;
+                          __entry->read_domains = obj->read_domains | (old_read_domains << 16);
+                          __entry->write_domain = obj->write_domain | (old_write_domain << 16);
+                          ),
+
+           TP_printk("obj=%p, read=%04x, write=%04x",
+                     __entry->obj,
+                     __entry->read_domains, __entry->write_domain)
+);
+
+TRACE_EVENT(i915_gem_object_get_fence,
+
+           TP_PROTO(struct drm_gem_object *obj, int fence, int tiling_mode),
+
+           TP_ARGS(obj, fence, tiling_mode),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_gem_object *, obj)
+                            __field(int, fence)
+                            __field(int, tiling_mode)
+                            ),
+
+           TP_fast_assign(
+                          __entry->obj = obj;
+                          __entry->fence = fence;
+                          __entry->tiling_mode = tiling_mode;
+                          ),
+
+           TP_printk("obj=%p, fence=%d, tiling=%d",
+                     __entry->obj, __entry->fence, __entry->tiling_mode)
+);
+
+TRACE_EVENT(i915_gem_object_unbind,
+
+           TP_PROTO(struct drm_gem_object *obj),
+
+           TP_ARGS(obj),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_gem_object *, obj)
+                            ),
+
+           TP_fast_assign(
+                          __entry->obj = obj;
+                          ),
+
+           TP_printk("obj=%p", __entry->obj)
+);
+
+TRACE_EVENT(i915_gem_object_destroy,
+
+           TP_PROTO(struct drm_gem_object *obj),
+
+           TP_ARGS(obj),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_gem_object *, obj)
+                            ),
+
+           TP_fast_assign(
+                          __entry->obj = obj;
+                          ),
+
+           TP_printk("obj=%p", __entry->obj)
+);
+
+/* batch tracing */
+
+TRACE_EVENT(i915_gem_request_submit,
+
+           TP_PROTO(struct drm_device *dev, u32 seqno),
+
+           TP_ARGS(dev, seqno),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_device *, dev)
+                            __field(u32, seqno)
+                            ),
+
+           TP_fast_assign(
+                          __entry->dev = dev;
+                          __entry->seqno = seqno;
+                          ),
+
+           TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_gem_request_flush,
+
+           TP_PROTO(struct drm_device *dev, u32 seqno,
+                    u32 flush_domains, u32 invalidate_domains),
+
+           TP_ARGS(dev, seqno, flush_domains, invalidate_domains),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_device *, dev)
+                            __field(u32, seqno)
+                            __field(u32, flush_domains)
+                            __field(u32, invalidate_domains)
+                            ),
+
+           TP_fast_assign(
+                          __entry->dev = dev;
+                          __entry->seqno = seqno;
+                          __entry->flush_domains = flush_domains;
+                          __entry->invalidate_domains = invalidate_domains;
+                          ),
+
+           TP_printk("dev=%p, seqno=%u, flush=%04x, invalidate=%04x",
+                     __entry->dev, __entry->seqno,
+                     __entry->flush_domains, __entry->invalidate_domains)
+);
+
+
+TRACE_EVENT(i915_gem_request_complete,
+
+           TP_PROTO(struct drm_device *dev, u32 seqno),
+
+           TP_ARGS(dev, seqno),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_device *, dev)
+                            __field(u32, seqno)
+                            ),
+
+           TP_fast_assign(
+                          __entry->dev = dev;
+                          __entry->seqno = seqno;
+                          ),
+
+           TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_gem_request_retire,
+
+           TP_PROTO(struct drm_device *dev, u32 seqno),
+
+           TP_ARGS(dev, seqno),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_device *, dev)
+                            __field(u32, seqno)
+                            ),
+
+           TP_fast_assign(
+                          __entry->dev = dev;
+                          __entry->seqno = seqno;
+                          ),
+
+           TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_gem_request_wait_begin,
+
+           TP_PROTO(struct drm_device *dev, u32 seqno),
+
+           TP_ARGS(dev, seqno),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_device *, dev)
+                            __field(u32, seqno)
+                            ),
+
+           TP_fast_assign(
+                          __entry->dev = dev;
+                          __entry->seqno = seqno;
+                          ),
+
+           TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_gem_request_wait_end,
+
+           TP_PROTO(struct drm_device *dev, u32 seqno),
+
+           TP_ARGS(dev, seqno),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_device *, dev)
+                            __field(u32, seqno)
+                            ),
+
+           TP_fast_assign(
+                          __entry->dev = dev;
+                          __entry->seqno = seqno;
+                          ),
+
+           TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno)
+);
+
+TRACE_EVENT(i915_ring_wait_begin,
+
+           TP_PROTO(struct drm_device *dev),
+
+           TP_ARGS(dev),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_device *, dev)
+                            ),
+
+           TP_fast_assign(
+                          __entry->dev = dev;
+                          ),
+
+           TP_printk("dev=%p", __entry->dev)
+);
+
+TRACE_EVENT(i915_ring_wait_end,
+
+           TP_PROTO(struct drm_device *dev),
+
+           TP_ARGS(dev),
+
+           TP_STRUCT__entry(
+                            __field(struct drm_device *, dev)
+                            ),
+
+           TP_fast_assign(
+                          __entry->dev = dev;
+                          ),
+
+           TP_printk("dev=%p", __entry->dev)
+);
+
+#endif /* _I915_TRACE_H_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/i915
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/i915/i915_trace_points.c b/drivers/gpu/drm/i915/i915_trace_points.c
new file mode 100644 (file)
index 0000000..ead876e
--- /dev/null
@@ -0,0 +1,11 @@
+/*
+ * Copyright Â© 2009 Intel Corporation
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ */
+
+#include "i915_drv.h"
+
+#define CREATE_TRACE_POINTS
+#include "i915_trace.h"
index 1e28c16..4337414 100644 (file)
@@ -217,6 +217,9 @@ parse_general_features(struct drm_i915_private *dev_priv,
                        if (IS_I85X(dev_priv->dev))
                                dev_priv->lvds_ssc_freq =
                                        general->ssc_freq ? 66 : 48;
+                       else if (IS_IGDNG(dev_priv->dev))
+                               dev_priv->lvds_ssc_freq =
+                                       general->ssc_freq ? 100 : 120;
                        else
                                dev_priv->lvds_ssc_freq =
                                        general->ssc_freq ? 100 : 96;
index 88814fa..212e227 100644 (file)
@@ -179,13 +179,10 @@ static bool intel_igdng_crt_detect_hotplug(struct drm_connector *connector)
 {
        struct drm_device *dev = connector->dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
-       u32 adpa, temp;
+       u32 adpa;
        bool ret;
 
-       temp = adpa = I915_READ(PCH_ADPA);
-
-       adpa &= ~ADPA_DAC_ENABLE;
-       I915_WRITE(PCH_ADPA, adpa);
+       adpa = I915_READ(PCH_ADPA);
 
        adpa &= ~ADPA_CRT_HOTPLUG_MASK;
 
@@ -212,8 +209,6 @@ static bool intel_igdng_crt_detect_hotplug(struct drm_connector *connector)
        else
                ret = false;
 
-       /* restore origin register */
-       I915_WRITE(PCH_ADPA, temp);
        return ret;
 }
 
index 0227b16..93ff6c0 100644 (file)
@@ -24,6 +24,8 @@
  *     Eric Anholt <eric@anholt.net>
  */
 
+#include <linux/module.h>
+#include <linux/input.h>
 #include <linux/i2c.h>
 #include <linux/kernel.h>
 #include "drmP.h"
@@ -875,7 +877,7 @@ intel_igdng_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc,
                                               refclk, best_clock);
 
        if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) {
-               if ((I915_READ(LVDS) & LVDS_CLKB_POWER_MASK) ==
+               if ((I915_READ(PCH_LVDS) & LVDS_CLKB_POWER_MASK) ==
                    LVDS_CLKB_POWER_UP)
                        clock.p2 = limit->p2.p2_fast;
                else
@@ -952,6 +954,241 @@ intel_wait_for_vblank(struct drm_device *dev)
        mdelay(20);
 }
 
+/* Parameters have changed, update FBC info */
+static void i8xx_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
+{
+       struct drm_device *dev = crtc->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_framebuffer *fb = crtc->fb;
+       struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+       struct drm_i915_gem_object *obj_priv = intel_fb->obj->driver_private;
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       int plane, i;
+       u32 fbc_ctl, fbc_ctl2;
+
+       dev_priv->cfb_pitch = dev_priv->cfb_size / FBC_LL_SIZE;
+
+       if (fb->pitch < dev_priv->cfb_pitch)
+               dev_priv->cfb_pitch = fb->pitch;
+
+       /* FBC_CTL wants 64B units */
+       dev_priv->cfb_pitch = (dev_priv->cfb_pitch / 64) - 1;
+       dev_priv->cfb_fence = obj_priv->fence_reg;
+       dev_priv->cfb_plane = intel_crtc->plane;
+       plane = dev_priv->cfb_plane == 0 ? FBC_CTL_PLANEA : FBC_CTL_PLANEB;
+
+       /* Clear old tags */
+       for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++)
+               I915_WRITE(FBC_TAG + (i * 4), 0);
+
+       /* Set it up... */
+       fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM | plane;
+       if (obj_priv->tiling_mode != I915_TILING_NONE)
+               fbc_ctl2 |= FBC_CTL_CPU_FENCE;
+       I915_WRITE(FBC_CONTROL2, fbc_ctl2);
+       I915_WRITE(FBC_FENCE_OFF, crtc->y);
+
+       /* enable it... */
+       fbc_ctl = FBC_CTL_EN | FBC_CTL_PERIODIC;
+       fbc_ctl |= (dev_priv->cfb_pitch & 0xff) << FBC_CTL_STRIDE_SHIFT;
+       fbc_ctl |= (interval & 0x2fff) << FBC_CTL_INTERVAL_SHIFT;
+       if (obj_priv->tiling_mode != I915_TILING_NONE)
+               fbc_ctl |= dev_priv->cfb_fence;
+       I915_WRITE(FBC_CONTROL, fbc_ctl);
+
+       DRM_DEBUG("enabled FBC, pitch %ld, yoff %d, plane %d, ",
+                 dev_priv->cfb_pitch, crtc->y, dev_priv->cfb_plane);
+}
+
+void i8xx_disable_fbc(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       u32 fbc_ctl;
+
+       if (!I915_HAS_FBC(dev))
+               return;
+
+       /* Disable compression */
+       fbc_ctl = I915_READ(FBC_CONTROL);
+       fbc_ctl &= ~FBC_CTL_EN;
+       I915_WRITE(FBC_CONTROL, fbc_ctl);
+
+       /* Wait for compressing bit to clear */
+       while (I915_READ(FBC_STATUS) & FBC_STAT_COMPRESSING)
+               ; /* nothing */
+
+       intel_wait_for_vblank(dev);
+
+       DRM_DEBUG("disabled FBC\n");
+}
+
+static bool i8xx_fbc_enabled(struct drm_crtc *crtc)
+{
+       struct drm_device *dev = crtc->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       return I915_READ(FBC_CONTROL) & FBC_CTL_EN;
+}
+
+static void g4x_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
+{
+       struct drm_device *dev = crtc->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_framebuffer *fb = crtc->fb;
+       struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
+       struct drm_i915_gem_object *obj_priv = intel_fb->obj->driver_private;
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       int plane = (intel_crtc->plane == 0 ? DPFC_CTL_PLANEA :
+                    DPFC_CTL_PLANEB);
+       unsigned long stall_watermark = 200;
+       u32 dpfc_ctl;
+
+       dev_priv->cfb_pitch = (dev_priv->cfb_pitch / 64) - 1;
+       dev_priv->cfb_fence = obj_priv->fence_reg;
+       dev_priv->cfb_plane = intel_crtc->plane;
+
+       dpfc_ctl = plane | DPFC_SR_EN | DPFC_CTL_LIMIT_1X;
+       if (obj_priv->tiling_mode != I915_TILING_NONE) {
+               dpfc_ctl |= DPFC_CTL_FENCE_EN | dev_priv->cfb_fence;
+               I915_WRITE(DPFC_CHICKEN, DPFC_HT_MODIFY);
+       } else {
+               I915_WRITE(DPFC_CHICKEN, ~DPFC_HT_MODIFY);
+       }
+
+       I915_WRITE(DPFC_CONTROL, dpfc_ctl);
+       I915_WRITE(DPFC_RECOMP_CTL, DPFC_RECOMP_STALL_EN |
+                  (stall_watermark << DPFC_RECOMP_STALL_WM_SHIFT) |
+                  (interval << DPFC_RECOMP_TIMER_COUNT_SHIFT));
+       I915_WRITE(DPFC_FENCE_YOFF, crtc->y);
+
+       /* enable it... */
+       I915_WRITE(DPFC_CONTROL, I915_READ(DPFC_CONTROL) | DPFC_CTL_EN);
+
+       DRM_DEBUG("enabled fbc on plane %d\n", intel_crtc->plane);
+}
+
+void g4x_disable_fbc(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       u32 dpfc_ctl;
+
+       /* Disable compression */
+       dpfc_ctl = I915_READ(DPFC_CONTROL);
+       dpfc_ctl &= ~DPFC_CTL_EN;
+       I915_WRITE(DPFC_CONTROL, dpfc_ctl);
+       intel_wait_for_vblank(dev);
+
+       DRM_DEBUG("disabled FBC\n");
+}
+
+static bool g4x_fbc_enabled(struct drm_crtc *crtc)
+{
+       struct drm_device *dev = crtc->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN;
+}
+
+/**
+ * intel_update_fbc - enable/disable FBC as needed
+ * @crtc: CRTC to point the compressor at
+ * @mode: mode in use
+ *
+ * Set up the framebuffer compression hardware at mode set time.  We
+ * enable it if possible:
+ *   - plane A only (on pre-965)
+ *   - no pixel mulitply/line duplication
+ *   - no alpha buffer discard
+ *   - no dual wide
+ *   - framebuffer <= 2048 in width, 1536 in height
+ *
+ * We can't assume that any compression will take place (worst case),
+ * so the compressed buffer has to be the same size as the uncompressed
+ * one.  It also must reside (along with the line length buffer) in
+ * stolen memory.
+ *
+ * We need to enable/disable FBC on a global basis.
+ */
+static void intel_update_fbc(struct drm_crtc *crtc,
+                            struct drm_display_mode *mode)
+{
+       struct drm_device *dev = crtc->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_framebuffer *fb = crtc->fb;
+       struct intel_framebuffer *intel_fb;
+       struct drm_i915_gem_object *obj_priv;
+       struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+       int plane = intel_crtc->plane;
+
+       if (!i915_powersave)
+               return;
+
+       if (!dev_priv->display.fbc_enabled ||
+           !dev_priv->display.enable_fbc ||
+           !dev_priv->display.disable_fbc)
+               return;
+
+       if (!crtc->fb)
+               return;
+
+       intel_fb = to_intel_framebuffer(fb);
+       obj_priv = intel_fb->obj->driver_private;
+
+       /*
+        * If FBC is already on, we just have to verify that we can
+        * keep it that way...
+        * Need to disable if:
+        *   - changing FBC params (stride, fence, mode)
+        *   - new fb is too large to fit in compressed buffer
+        *   - going to an unsupported config (interlace, pixel multiply, etc.)
+        */
+       if (intel_fb->obj->size > dev_priv->cfb_size) {
+               DRM_DEBUG("framebuffer too large, disabling compression\n");
+               goto out_disable;
+       }
+       if ((mode->flags & DRM_MODE_FLAG_INTERLACE) ||
+           (mode->flags & DRM_MODE_FLAG_DBLSCAN)) {
+               DRM_DEBUG("mode incompatible with compression, disabling\n");
+               goto out_disable;
+       }
+       if ((mode->hdisplay > 2048) ||
+           (mode->vdisplay > 1536)) {
+               DRM_DEBUG("mode too large for compression, disabling\n");
+               goto out_disable;
+       }
+       if ((IS_I915GM(dev) || IS_I945GM(dev)) && plane != 0) {
+               DRM_DEBUG("plane not 0, disabling compression\n");
+               goto out_disable;
+       }
+       if (obj_priv->tiling_mode != I915_TILING_X) {
+               DRM_DEBUG("framebuffer not tiled, disabling compression\n");
+               goto out_disable;
+       }
+
+       if (dev_priv->display.fbc_enabled(crtc)) {
+               /* We can re-enable it in this case, but need to update pitch */
+               if (fb->pitch > dev_priv->cfb_pitch)
+                       dev_priv->display.disable_fbc(dev);
+               if (obj_priv->fence_reg != dev_priv->cfb_fence)
+                       dev_priv->display.disable_fbc(dev);
+               if (plane != dev_priv->cfb_plane)
+                       dev_priv->display.disable_fbc(dev);
+       }
+
+       if (!dev_priv->display.fbc_enabled(crtc)) {
+               /* Now try to turn it back on if possible */
+               dev_priv->display.enable_fbc(crtc, 500);
+       }
+
+       return;
+
+out_disable:
+       DRM_DEBUG("unsupported config, disabling FBC\n");
+       /* Multiple disables should be harmless */
+       if (dev_priv->display.fbc_enabled(crtc))
+               dev_priv->display.disable_fbc(dev);
+}
+
 static int
 intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
                    struct drm_framebuffer *old_fb)
@@ -964,12 +1201,13 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
        struct drm_i915_gem_object *obj_priv;
        struct drm_gem_object *obj;
        int pipe = intel_crtc->pipe;
+       int plane = intel_crtc->plane;
        unsigned long Start, Offset;
-       int dspbase = (pipe == 0 ? DSPAADDR : DSPBADDR);
-       int dspsurf = (pipe == 0 ? DSPASURF : DSPBSURF);
-       int dspstride = (pipe == 0) ? DSPASTRIDE : DSPBSTRIDE;
-       int dsptileoff = (pipe == 0 ? DSPATILEOFF : DSPBTILEOFF);
-       int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR;
+       int dspbase = (plane == 0 ? DSPAADDR : DSPBADDR);
+       int dspsurf = (plane == 0 ? DSPASURF : DSPBSURF);
+       int dspstride = (plane == 0) ? DSPASTRIDE : DSPBSTRIDE;
+       int dsptileoff = (plane == 0 ? DSPATILEOFF : DSPBTILEOFF);
+       int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR;
        u32 dspcntr, alignment;
        int ret;
 
@@ -979,12 +1217,12 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
                return 0;
        }
 
-       switch (pipe) {
+       switch (plane) {
        case 0:
        case 1:
                break;
        default:
-               DRM_ERROR("Can't update pipe %d in SAREA\n", pipe);
+               DRM_ERROR("Can't update plane %d in SAREA\n", plane);
                return -EINVAL;
        }
 
@@ -1086,6 +1324,9 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
                I915_READ(dspbase);
        }
 
+       if ((IS_I965G(dev) || plane == 0))
+               intel_update_fbc(crtc, &crtc->mode);
+
        intel_wait_for_vblank(dev);
 
        if (old_fb) {
@@ -1217,6 +1458,7 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode)
        int transconf_reg = (pipe == 0) ? TRANSACONF : TRANSBCONF;
        int pf_ctl_reg = (pipe == 0) ? PFA_CTL_1 : PFB_CTL_1;
        int pf_win_size = (pipe == 0) ? PFA_WIN_SZ : PFB_WIN_SZ;
+       int pf_win_pos = (pipe == 0) ? PFA_WIN_POS : PFB_WIN_POS;
        int cpu_htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B;
        int cpu_hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B;
        int cpu_hsync_reg = (pipe == 0) ? HSYNC_A : HSYNC_B;
@@ -1268,6 +1510,19 @@ static void igdng_crtc_dpms(struct drm_crtc *crtc, int mode)
                        }
                }
 
+               /* Enable panel fitting for LVDS */
+               if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) {
+                       temp = I915_READ(pf_ctl_reg);
+                       I915_WRITE(pf_ctl_reg, temp | PF_ENABLE);
+
+                       /* currently full aspect */
+                       I915_WRITE(pf_win_pos, 0);
+
+                       I915_WRITE(pf_win_size,
+                                  (dev_priv->panel_fixed_mode->hdisplay << 16) |
+                                  (dev_priv->panel_fixed_mode->vdisplay));
+               }
+
                /* Enable CPU pipe */
                temp = I915_READ(pipeconf_reg);
                if ((temp & PIPEACONF_ENABLE) == 0) {
@@ -1532,9 +1787,10 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode)
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        int pipe = intel_crtc->pipe;
+       int plane = intel_crtc->plane;
        int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B;
-       int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR;
-       int dspbase_reg = (pipe == 0) ? DSPAADDR : DSPBADDR;
+       int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR;
+       int dspbase_reg = (plane == 0) ? DSPAADDR : DSPBADDR;
        int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF;
        u32 temp;
 
@@ -1577,6 +1833,9 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode)
 
                intel_crtc_load_lut(crtc);
 
+               if ((IS_I965G(dev) || plane == 0))
+                       intel_update_fbc(crtc, &crtc->mode);
+
                /* Give the overlay scaler a chance to enable if it's on this pipe */
                //intel_crtc_dpms_video(crtc, true); TODO
                intel_update_watermarks(dev);
@@ -1586,6 +1845,10 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode)
                /* Give the overlay scaler a chance to disable if it's on this pipe */
                //intel_crtc_dpms_video(crtc, FALSE); TODO
 
+               if (dev_priv->cfb_plane == plane &&
+                   dev_priv->display.disable_fbc)
+                       dev_priv->display.disable_fbc(dev);
+
                /* Disable the VGA plane that we never use */
                i915_disable_vga(dev);
 
@@ -1634,15 +1897,13 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode)
 static void intel_crtc_dpms(struct drm_crtc *crtc, int mode)
 {
        struct drm_device *dev = crtc->dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
        struct drm_i915_master_private *master_priv;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        int pipe = intel_crtc->pipe;
        bool enabled;
 
-       if (IS_IGDNG(dev))
-               igdng_crtc_dpms(crtc, mode);
-       else
-               i9xx_crtc_dpms(crtc, mode);
+       dev_priv->display.dpms(crtc, mode);
 
        intel_crtc->dpms_mode = mode;
 
@@ -1709,56 +1970,68 @@ static bool intel_crtc_mode_fixup(struct drm_crtc *crtc,
        return true;
 }
 
+static int i945_get_display_clock_speed(struct drm_device *dev)
+{
+       return 400000;
+}
 
-/** Returns the core display clock speed for i830 - i945 */
-static int intel_get_core_clock_speed(struct drm_device *dev)
+static int i915_get_display_clock_speed(struct drm_device *dev)
 {
+       return 333000;
+}
 
-       /* Core clock values taken from the published datasheets.
-        * The 830 may go up to 166 Mhz, which we should check.
-        */
-       if (IS_I945G(dev))
-               return 400000;
-       else if (IS_I915G(dev))
-               return 333000;
-       else if (IS_I945GM(dev) || IS_845G(dev) || IS_IGDGM(dev))
-               return 200000;
-       else if (IS_I915GM(dev)) {
-               u16 gcfgc = 0;
+static int i9xx_misc_get_display_clock_speed(struct drm_device *dev)
+{
+       return 200000;
+}
 
-               pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
+static int i915gm_get_display_clock_speed(struct drm_device *dev)
+{
+       u16 gcfgc = 0;
 
-               if (gcfgc & GC_LOW_FREQUENCY_ENABLE)
-                       return 133000;
-               else {
-                       switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
-                       case GC_DISPLAY_CLOCK_333_MHZ:
-                               return 333000;
-                       default:
-                       case GC_DISPLAY_CLOCK_190_200_MHZ:
-                               return 190000;
-                       }
-               }
-       } else if (IS_I865G(dev))
-               return 266000;
-       else if (IS_I855(dev)) {
-               u16 hpllcc = 0;
-               /* Assume that the hardware is in the high speed state.  This
-                * should be the default.
-                */
-               switch (hpllcc & GC_CLOCK_CONTROL_MASK) {
-               case GC_CLOCK_133_200:
-               case GC_CLOCK_100_200:
-                       return 200000;
-               case GC_CLOCK_166_250:
-                       return 250000;
-               case GC_CLOCK_100_133:
-                       return 133000;
+       pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
+
+       if (gcfgc & GC_LOW_FREQUENCY_ENABLE)
+               return 133000;
+       else {
+               switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
+               case GC_DISPLAY_CLOCK_333_MHZ:
+                       return 333000;
+               default:
+               case GC_DISPLAY_CLOCK_190_200_MHZ:
+                       return 190000;
                }
-       } else /* 852, 830 */
+       }
+}
+
+static int i865_get_display_clock_speed(struct drm_device *dev)
+{
+       return 266000;
+}
+
+static int i855_get_display_clock_speed(struct drm_device *dev)
+{
+       u16 hpllcc = 0;
+       /* Assume that the hardware is in the high speed state.  This
+        * should be the default.
+        */
+       switch (hpllcc & GC_CLOCK_CONTROL_MASK) {
+       case GC_CLOCK_133_200:
+       case GC_CLOCK_100_200:
+               return 200000;
+       case GC_CLOCK_166_250:
+               return 250000;
+       case GC_CLOCK_100_133:
                return 133000;
+       }
+
+       /* Shouldn't happen */
+       return 0;
+}
 
-       return 0; /* Silence gcc warning */
+static int i830_get_display_clock_speed(struct drm_device *dev)
+{
+       return 133000;
 }
 
 /**
@@ -1921,7 +2194,14 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
 {
        long entries_required, wm_size;
 
-       entries_required = (clock_in_khz * pixel_size * latency_ns) / 1000000;
+       /*
+        * Note: we need to make sure we don't overflow for various clock &
+        * latency values.
+        * clocks go from a few thousand to several hundred thousand.
+        * latency is usually a few thousand
+        */
+       entries_required = ((clock_in_khz / 1000) * pixel_size * latency_ns) /
+               1000;
        entries_required /= wm->cacheline_size;
 
        DRM_DEBUG("FIFO entries required for mode: %d\n", entries_required);
@@ -1986,14 +2266,13 @@ static struct cxsr_latency *intel_get_cxsr_latency(int is_desktop, int fsb,
        for (i = 0; i < ARRAY_SIZE(cxsr_latency_table); i++) {
                latency = &cxsr_latency_table[i];
                if (is_desktop == latency->is_desktop &&
-                       fsb == latency->fsb_freq && mem == latency->mem_freq)
-                       break;
+                   fsb == latency->fsb_freq && mem == latency->mem_freq)
+                       return latency;
        }
-       if (i >= ARRAY_SIZE(cxsr_latency_table)) {
-               DRM_DEBUG("Unknown FSB/MEM found, disable CxSR\n");
-               return NULL;
-       }
-       return latency;
+
+       DRM_DEBUG("Unknown FSB/MEM found, disable CxSR\n");
+
+       return NULL;
 }
 
 static void igd_disable_cxsr(struct drm_device *dev)
@@ -2084,32 +2363,36 @@ static void igd_enable_cxsr(struct drm_device *dev, unsigned long clock,
  */
 const static int latency_ns = 5000;
 
-static int intel_get_fifo_size(struct drm_device *dev, int plane)
+static int i9xx_get_fifo_size(struct drm_device *dev, int plane)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        uint32_t dsparb = I915_READ(DSPARB);
        int size;
 
-       if (IS_I9XX(dev)) {
-               if (plane == 0)
-                       size = dsparb & 0x7f;
-               else
-                       size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) -
-                               (dsparb & 0x7f);
-       } else if (IS_I85X(dev)) {
-               if (plane == 0)
-                       size = dsparb & 0x1ff;
-               else
-                       size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) -
-                               (dsparb & 0x1ff);
-               size >>= 1; /* Convert to cachelines */
-       } else if (IS_845G(dev)) {
+       if (plane == 0)
                size = dsparb & 0x7f;
-               size >>= 2; /* Convert to cachelines */
-       } else {
-               size = dsparb & 0x7f;
-               size >>= 1; /* Convert to cachelines */
-       }
+       else
+               size = ((dsparb >> DSPARB_CSTART_SHIFT) & 0x7f) -
+                       (dsparb & 0x7f);
+
+       DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A",
+                 size);
+
+       return size;
+}
+
+static int i85x_get_fifo_size(struct drm_device *dev, int plane)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       uint32_t dsparb = I915_READ(DSPARB);
+       int size;
+
+       if (plane == 0)
+               size = dsparb & 0x1ff;
+       else
+               size = ((dsparb >> DSPARB_BEND_SHIFT) & 0x1ff) -
+                       (dsparb & 0x1ff);
+       size >>= 1; /* Convert to cachelines */
 
        DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A",
                  size);
@@ -2117,7 +2400,38 @@ static int intel_get_fifo_size(struct drm_device *dev, int plane)
        return size;
 }
 
-static void g4x_update_wm(struct drm_device *dev)
+static int i845_get_fifo_size(struct drm_device *dev, int plane)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       uint32_t dsparb = I915_READ(DSPARB);
+       int size;
+
+       size = dsparb & 0x7f;
+       size >>= 2; /* Convert to cachelines */
+
+       DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A",
+                 size);
+
+       return size;
+}
+
+static int i830_get_fifo_size(struct drm_device *dev, int plane)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       uint32_t dsparb = I915_READ(DSPARB);
+       int size;
+
+       size = dsparb & 0x7f;
+       size >>= 1; /* Convert to cachelines */
+
+       DRM_DEBUG("FIFO size - (0x%08x) %s: %d\n", dsparb, plane ? "B" : "A",
+                 size);
+
+       return size;
+}
+
+static void g4x_update_wm(struct drm_device *dev, int unused, int unused2,
+                         int unused3, int unused4)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        u32 fw_blc_self = I915_READ(FW_BLC_SELF);
@@ -2129,7 +2443,8 @@ static void g4x_update_wm(struct drm_device *dev)
        I915_WRITE(FW_BLC_SELF, fw_blc_self);
 }
 
-static void i965_update_wm(struct drm_device *dev)
+static void i965_update_wm(struct drm_device *dev, int unused, int unused2,
+                          int unused3, int unused4)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
 
@@ -2165,8 +2480,8 @@ static void i9xx_update_wm(struct drm_device *dev, int planea_clock,
        cacheline_size = planea_params.cacheline_size;
 
        /* Update per-plane FIFO sizes */
-       planea_params.fifo_size = intel_get_fifo_size(dev, 0);
-       planeb_params.fifo_size = intel_get_fifo_size(dev, 1);
+       planea_params.fifo_size = dev_priv->display.get_fifo_size(dev, 0);
+       planeb_params.fifo_size = dev_priv->display.get_fifo_size(dev, 1);
 
        planea_wm = intel_calculate_wm(planea_clock, &planea_params,
                                       pixel_size, latency_ns);
@@ -2213,14 +2528,14 @@ static void i9xx_update_wm(struct drm_device *dev, int planea_clock,
        I915_WRITE(FW_BLC2, fwater_hi);
 }
 
-static void i830_update_wm(struct drm_device *dev, int planea_clock,
-                          int pixel_size)
+static void i830_update_wm(struct drm_device *dev, int planea_clock, int unused,
+                          int unused2, int pixel_size)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
        uint32_t fwater_lo = I915_READ(FW_BLC) & ~0xfff;
        int planea_wm;
 
-       i830_wm_info.fifo_size = intel_get_fifo_size(dev, 0);
+       i830_wm_info.fifo_size = dev_priv->display.get_fifo_size(dev, 0);
 
        planea_wm = intel_calculate_wm(planea_clock, &i830_wm_info,
                                       pixel_size, latency_ns);
@@ -2264,6 +2579,7 @@ static void i830_update_wm(struct drm_device *dev, int planea_clock,
   */
 static void intel_update_watermarks(struct drm_device *dev)
 {
+       struct drm_i915_private *dev_priv = dev->dev_private;
        struct drm_crtc *crtc;
        struct intel_crtc *intel_crtc;
        int sr_hdisplay = 0;
@@ -2302,15 +2618,8 @@ static void intel_update_watermarks(struct drm_device *dev)
        else if (IS_IGD(dev))
                igd_disable_cxsr(dev);
 
-       if (IS_G4X(dev))
-               g4x_update_wm(dev);
-       else if (IS_I965G(dev))
-               i965_update_wm(dev);
-       else if (IS_I9XX(dev) || IS_MOBILE(dev))
-               i9xx_update_wm(dev, planea_clock, planeb_clock, sr_hdisplay,
-                              pixel_size);
-       else
-               i830_update_wm(dev, planea_clock, pixel_size);
+       dev_priv->display.update_wm(dev, planea_clock, planeb_clock,
+                                   sr_hdisplay, pixel_size);
 }
 
 static int intel_crtc_mode_set(struct drm_crtc *crtc,
@@ -2323,10 +2632,11 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
        struct drm_i915_private *dev_priv = dev->dev_private;
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
        int pipe = intel_crtc->pipe;
+       int plane = intel_crtc->plane;
        int fp_reg = (pipe == 0) ? FPA0 : FPB0;
        int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B;
        int dpll_md_reg = (intel_crtc->pipe == 0) ? DPLL_A_MD : DPLL_B_MD;
-       int dspcntr_reg = (pipe == 0) ? DSPACNTR : DSPBCNTR;
+       int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR;
        int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF;
        int htot_reg = (pipe == 0) ? HTOTAL_A : HTOTAL_B;
        int hblank_reg = (pipe == 0) ? HBLANK_A : HBLANK_B;
@@ -2334,8 +2644,8 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
        int vtot_reg = (pipe == 0) ? VTOTAL_A : VTOTAL_B;
        int vblank_reg = (pipe == 0) ? VBLANK_A : VBLANK_B;
        int vsync_reg = (pipe == 0) ? VSYNC_A : VSYNC_B;
-       int dspsize_reg = (pipe == 0) ? DSPASIZE : DSPBSIZE;
-       int dsppos_reg = (pipe == 0) ? DSPAPOS : DSPBPOS;
+       int dspsize_reg = (plane == 0) ? DSPASIZE : DSPBSIZE;
+       int dsppos_reg = (plane == 0) ? DSPAPOS : DSPBPOS;
        int pipesrc_reg = (pipe == 0) ? PIPEASRC : PIPEBSRC;
        int refclk, num_outputs = 0;
        intel_clock_t clock, reduced_clock;
@@ -2568,7 +2878,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
           enable color space conversion */
        if (!IS_IGDNG(dev)) {
                if (pipe == 0)
-                       dspcntr |= DISPPLANE_SEL_PIPE_A;
+                       dspcntr &= ~DISPPLANE_SEL_PIPE_MASK;
                else
                        dspcntr |= DISPPLANE_SEL_PIPE_B;
        }
@@ -2580,7 +2890,8 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
                 * XXX: No double-wide on 915GM pipe B. Is that the only reason for the
                 * pipe == 0 check?
                 */
-               if (mode->clock > intel_get_core_clock_speed(dev) * 9 / 10)
+               if (mode->clock >
+                   dev_priv->display.get_display_clock_speed(dev) * 9 / 10)
                        pipeconf |= PIPEACONF_DOUBLE_WIDE;
                else
                        pipeconf &= ~PIPEACONF_DOUBLE_WIDE;
@@ -2652,9 +2963,12 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
                udelay(150);
 
                if (IS_I965G(dev) && !IS_IGDNG(dev)) {
-                       sdvo_pixel_multiply = adjusted_mode->clock / mode->clock;
-                       I915_WRITE(dpll_md_reg, (0 << DPLL_MD_UDI_DIVIDER_SHIFT) |
+                       if (is_sdvo) {
+                               sdvo_pixel_multiply = adjusted_mode->clock / mode->clock;
+                               I915_WRITE(dpll_md_reg, (0 << DPLL_MD_UDI_DIVIDER_SHIFT) |
                                        ((sdvo_pixel_multiply - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT));
+                       } else
+                               I915_WRITE(dpll_md_reg, 0);
                } else {
                        /* write it again -- the BIOS does, after all */
                        I915_WRITE(dpll_reg, dpll);
@@ -2734,6 +3048,9 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
        /* Flush the plane changes */
        ret = intel_pipe_set_base(crtc, x, y, old_fb);
 
+       if ((IS_I965G(dev) || plane == 0))
+               intel_update_fbc(crtc, &crtc->mode);
+
        intel_update_watermarks(dev);
 
        drm_vblank_post_modeset(dev, pipe);
@@ -2778,6 +3095,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
        struct drm_gem_object *bo;
        struct drm_i915_gem_object *obj_priv;
        int pipe = intel_crtc->pipe;
+       int plane = intel_crtc->plane;
        uint32_t control = (pipe == 0) ? CURACNTR : CURBCNTR;
        uint32_t base = (pipe == 0) ? CURABASE : CURBBASE;
        uint32_t temp = I915_READ(control);
@@ -2863,6 +3181,10 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
                        i915_gem_object_unpin(intel_crtc->cursor_bo);
                drm_gem_object_unreference(intel_crtc->cursor_bo);
        }
+
+       if ((IS_I965G(dev) || plane == 0))
+               intel_update_fbc(crtc, &crtc->mode);
+
        mutex_unlock(&dev->struct_mutex);
 
        intel_crtc->cursor_addr = addr;
@@ -3544,6 +3866,14 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
                intel_crtc->lut_b[i] = i;
        }
 
+       /* Swap pipes & planes for FBC on pre-965 */
+       intel_crtc->pipe = pipe;
+       intel_crtc->plane = pipe;
+       if (IS_MOBILE(dev) && (IS_I9XX(dev) && !IS_I965G(dev))) {
+               DRM_DEBUG("swapping pipes & planes for FBC\n");
+               intel_crtc->plane = ((pipe == 0) ? 1 : 0);
+       }
+
        intel_crtc->cursor_addr = 0;
        intel_crtc->dpms_mode = DRM_MODE_DPMS_OFF;
        drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs);
@@ -3826,6 +4156,73 @@ void intel_init_clock_gating(struct drm_device *dev)
        }
 }
 
+/* Set up chip specific display functions */
+static void intel_init_display(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       /* We always want a DPMS function */
+       if (IS_IGDNG(dev))
+               dev_priv->display.dpms = igdng_crtc_dpms;
+       else
+               dev_priv->display.dpms = i9xx_crtc_dpms;
+
+       /* Only mobile has FBC, leave pointers NULL for other chips */
+       if (IS_MOBILE(dev)) {
+               if (IS_GM45(dev)) {
+                       dev_priv->display.fbc_enabled = g4x_fbc_enabled;
+                       dev_priv->display.enable_fbc = g4x_enable_fbc;
+                       dev_priv->display.disable_fbc = g4x_disable_fbc;
+               } else if (IS_I965GM(dev) || IS_I945GM(dev) || IS_I915GM(dev)) {
+                       dev_priv->display.fbc_enabled = i8xx_fbc_enabled;
+                       dev_priv->display.enable_fbc = i8xx_enable_fbc;
+                       dev_priv->display.disable_fbc = i8xx_disable_fbc;
+               }
+               /* 855GM needs testing */
+       }
+
+       /* Returns the core display clock speed */
+       if (IS_I945G(dev))
+               dev_priv->display.get_display_clock_speed =
+                       i945_get_display_clock_speed;
+       else if (IS_I915G(dev))
+               dev_priv->display.get_display_clock_speed =
+                       i915_get_display_clock_speed;
+       else if (IS_I945GM(dev) || IS_845G(dev) || IS_IGDGM(dev))
+               dev_priv->display.get_display_clock_speed =
+                       i9xx_misc_get_display_clock_speed;
+       else if (IS_I915GM(dev))
+               dev_priv->display.get_display_clock_speed =
+                       i915gm_get_display_clock_speed;
+       else if (IS_I865G(dev))
+               dev_priv->display.get_display_clock_speed =
+                       i865_get_display_clock_speed;
+       else if (IS_I855(dev))
+               dev_priv->display.get_display_clock_speed =
+                       i855_get_display_clock_speed;
+       else /* 852, 830 */
+               dev_priv->display.get_display_clock_speed =
+                       i830_get_display_clock_speed;
+
+       /* For FIFO watermark updates */
+       if (IS_G4X(dev))
+               dev_priv->display.update_wm = g4x_update_wm;
+       else if (IS_I965G(dev))
+               dev_priv->display.update_wm = i965_update_wm;
+       else if (IS_I9XX(dev) || IS_MOBILE(dev)) {
+               dev_priv->display.update_wm = i9xx_update_wm;
+               dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
+       } else {
+               if (IS_I85X(dev))
+                       dev_priv->display.get_fifo_size = i85x_get_fifo_size;
+               else if (IS_845G(dev))
+                       dev_priv->display.get_fifo_size = i845_get_fifo_size;
+               else
+                       dev_priv->display.get_fifo_size = i830_get_fifo_size;
+               dev_priv->display.update_wm = i830_update_wm;
+       }
+}
+
 void intel_modeset_init(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3839,6 +4236,8 @@ void intel_modeset_init(struct drm_device *dev)
 
        dev->mode_config.funcs = (void *)&intel_mode_funcs;
 
+       intel_init_display(dev);
+
        if (IS_I965G(dev)) {
                dev->mode_config.max_width = 8192;
                dev->mode_config.max_height = 8192;
@@ -3904,6 +4303,9 @@ void intel_modeset_cleanup(struct drm_device *dev)
 
        mutex_unlock(&dev->struct_mutex);
 
+       if (dev_priv->display.disable_fbc)
+               dev_priv->display.disable_fbc(dev);
+
        drm_mode_config_cleanup(dev);
 }
 
index 3ebbbab..8aa4b7f 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/i2c.h>
 #include <linux/i2c-id.h>
 #include <linux/i2c-algo-bit.h>
+#include "i915_drv.h"
 #include "drm_crtc.h"
 
 #include "drm_crtc_helper.h"
@@ -111,8 +112,8 @@ struct intel_output {
 
 struct intel_crtc {
        struct drm_crtc base;
-       int pipe;
-       int plane;
+       enum pipe pipe;
+       enum plane plane;
        struct drm_gem_object *cursor_bo;
        uint32_t cursor_addr;
        u8 lut_r[256], lut_g[256], lut_b[256];
index dafc0da..98ae3d7 100644 (file)
@@ -27,6 +27,7 @@
  *      Jesse Barnes <jesse.barnes@intel.com>
  */
 
+#include <acpi/button.h>
 #include <linux/dmi.h>
 #include <linux/i2c.h>
 #include "drmP.h"
@@ -295,6 +296,10 @@ static bool intel_lvds_mode_fixup(struct drm_encoder *encoder,
                goto out;
        }
 
+       /* full screen scale for now */
+       if (IS_IGDNG(dev))
+               goto out;
+
        /* 965+ wants fuzzy fitting */
        if (IS_I965G(dev))
                pfit_control |= (intel_crtc->pipe << PFIT_PIPE_SHIFT) |
@@ -322,8 +327,10 @@ static bool intel_lvds_mode_fixup(struct drm_encoder *encoder,
         * to register description and PRM.
         * Change the value here to see the borders for debugging
         */
-       I915_WRITE(BCLRPAT_A, 0);
-       I915_WRITE(BCLRPAT_B, 0);
+       if (!IS_IGDNG(dev)) {
+               I915_WRITE(BCLRPAT_A, 0);
+               I915_WRITE(BCLRPAT_B, 0);
+       }
 
        switch (lvds_priv->fitting_mode) {
        case DRM_MODE_SCALE_CENTER:
@@ -572,7 +579,6 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder,
         * settings.
         */
 
-       /* No panel fitting yet, fixme */
        if (IS_IGDNG(dev))
                return;
 
@@ -585,15 +591,33 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder,
        I915_WRITE(PFIT_CONTROL, lvds_priv->pfit_control);
 }
 
+/* Some lid devices report incorrect lid status, assume they're connected */
+static const struct dmi_system_id bad_lid_status[] = {
+       {
+               .ident = "Aspire One",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Aspire one"),
+               },
+       },
+       { }
+};
+
 /**
  * Detect the LVDS connection.
  *
- * This always returns CONNECTOR_STATUS_CONNECTED.  This connector should only have
- * been set up if the LVDS was actually connected anyway.
+ * Since LVDS doesn't have hotlug, we use the lid as a proxy.  Open means
+ * connected and closed means disconnected.  We also send hotplug events as
+ * needed, using lid status notification from the input layer.
  */
 static enum drm_connector_status intel_lvds_detect(struct drm_connector *connector)
 {
-       return connector_status_connected;
+       enum drm_connector_status status = connector_status_connected;
+
+       if (!acpi_lid_open() && !dmi_check_system(bad_lid_status))
+               status = connector_status_disconnected;
+
+       return status;
 }
 
 /**
@@ -632,6 +656,24 @@ static int intel_lvds_get_modes(struct drm_connector *connector)
        return 0;
 }
 
+static int intel_lid_notify(struct notifier_block *nb, unsigned long val,
+                           void *unused)
+{
+       struct drm_i915_private *dev_priv =
+               container_of(nb, struct drm_i915_private, lid_notifier);
+       struct drm_device *dev = dev_priv->dev;
+
+       if (acpi_lid_open() && !dev_priv->suspended) {
+               mutex_lock(&dev->mode_config.mutex);
+               drm_helper_resume_force_mode(dev);
+               mutex_unlock(&dev->mode_config.mutex);
+       }
+
+       drm_sysfs_hotplug_event(dev_priv->dev);
+
+       return NOTIFY_OK;
+}
+
 /**
  * intel_lvds_destroy - unregister and free LVDS structures
  * @connector: connector to free
@@ -641,10 +683,14 @@ static int intel_lvds_get_modes(struct drm_connector *connector)
  */
 static void intel_lvds_destroy(struct drm_connector *connector)
 {
+       struct drm_device *dev = connector->dev;
        struct intel_output *intel_output = to_intel_output(connector);
+       struct drm_i915_private *dev_priv = dev->dev_private;
 
        if (intel_output->ddc_bus)
                intel_i2c_destroy(intel_output->ddc_bus);
+       if (dev_priv->lid_notifier.notifier_call)
+               acpi_lid_notifier_unregister(&dev_priv->lid_notifier);
        drm_sysfs_connector_remove(connector);
        drm_connector_cleanup(connector);
        kfree(connector);
@@ -1011,6 +1057,11 @@ out:
                pwm |= PWM_PCH_ENABLE;
                I915_WRITE(BLC_PWM_PCH_CTL1, pwm);
        }
+       dev_priv->lid_notifier.notifier_call = intel_lid_notify;
+       if (acpi_lid_notifier_register(&dev_priv->lid_notifier)) {
+               DRM_DEBUG("lid notifier registration failed\n");
+               dev_priv->lid_notifier.notifier_call = NULL;
+       }
        drm_sysfs_connector_add(connector);
        return;
 
index 0bf28ef..083bec2 100644 (file)
@@ -135,6 +135,30 @@ struct intel_sdvo_priv {
        struct intel_sdvo_dtd save_input_dtd_1, save_input_dtd_2;
        struct intel_sdvo_dtd save_output_dtd[16];
        u32 save_SDVOX;
+       /* add the property for the SDVO-TV */
+       struct drm_property *left_property;
+       struct drm_property *right_property;
+       struct drm_property *top_property;
+       struct drm_property *bottom_property;
+       struct drm_property *hpos_property;
+       struct drm_property *vpos_property;
+
+       /* add the property for the SDVO-TV/LVDS */
+       struct drm_property *brightness_property;
+       struct drm_property *contrast_property;
+       struct drm_property *saturation_property;
+       struct drm_property *hue_property;
+
+       /* Add variable to record current setting for the above property */
+       u32     left_margin, right_margin, top_margin, bottom_margin;
+       /* this is to get the range of margin.*/
+       u32     max_hscan,  max_vscan;
+       u32     max_hpos, cur_hpos;
+       u32     max_vpos, cur_vpos;
+       u32     cur_brightness, max_brightness;
+       u32     cur_contrast,   max_contrast;
+       u32     cur_saturation, max_saturation;
+       u32     cur_hue,        max_hue;
 };
 
 static bool
@@ -281,6 +305,31 @@ static const struct _sdvo_cmd_name {
     SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SDTV_RESOLUTION_SUPPORT),
     SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SCALED_HDTV_RESOLUTION_SUPPORT),
     SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS),
+    /* Add the op code for SDVO enhancements */
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_POSITION_H),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_POSITION_H),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_POSITION_H),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_POSITION_V),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_POSITION_V),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_POSITION_V),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_SATURATION),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SATURATION),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_SATURATION),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_HUE),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HUE),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HUE),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_CONTRAST),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_CONTRAST),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CONTRAST),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_BRIGHTNESS),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_BRIGHTNESS),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_BRIGHTNESS),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_H),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_H),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_H),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_V),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_V),
+    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_V),
     /* HDMI op code */
     SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPP_ENCODE),
     SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ENCODE),
@@ -981,7 +1030,7 @@ static void intel_sdvo_set_tv_format(struct intel_output *output)
 
        status = intel_sdvo_read_response(output, NULL, 0);
        if (status != SDVO_CMD_STATUS_SUCCESS)
-               DRM_DEBUG("%s: Failed to set TV format\n",
+               DRM_DEBUG_KMS("%s: Failed to set TV format\n",
                          SDVO_NAME(sdvo_priv));
 }
 
@@ -1792,6 +1841,45 @@ static int intel_sdvo_get_modes(struct drm_connector *connector)
        return 1;
 }
 
+static
+void intel_sdvo_destroy_enhance_property(struct drm_connector *connector)
+{
+       struct intel_output *intel_output = to_intel_output(connector);
+       struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
+       struct drm_device *dev = connector->dev;
+
+       if (sdvo_priv->is_tv) {
+               if (sdvo_priv->left_property)
+                       drm_property_destroy(dev, sdvo_priv->left_property);
+               if (sdvo_priv->right_property)
+                       drm_property_destroy(dev, sdvo_priv->right_property);
+               if (sdvo_priv->top_property)
+                       drm_property_destroy(dev, sdvo_priv->top_property);
+               if (sdvo_priv->bottom_property)
+                       drm_property_destroy(dev, sdvo_priv->bottom_property);
+               if (sdvo_priv->hpos_property)
+                       drm_property_destroy(dev, sdvo_priv->hpos_property);
+               if (sdvo_priv->vpos_property)
+                       drm_property_destroy(dev, sdvo_priv->vpos_property);
+       }
+       if (sdvo_priv->is_tv) {
+               if (sdvo_priv->saturation_property)
+                       drm_property_destroy(dev,
+                                       sdvo_priv->saturation_property);
+               if (sdvo_priv->contrast_property)
+                       drm_property_destroy(dev,
+                                       sdvo_priv->contrast_property);
+               if (sdvo_priv->hue_property)
+                       drm_property_destroy(dev, sdvo_priv->hue_property);
+       }
+       if (sdvo_priv->is_tv || sdvo_priv->is_lvds) {
+               if (sdvo_priv->brightness_property)
+                       drm_property_destroy(dev,
+                                       sdvo_priv->brightness_property);
+       }
+       return;
+}
+
 static void intel_sdvo_destroy(struct drm_connector *connector)
 {
        struct intel_output *intel_output = to_intel_output(connector);
@@ -1812,6 +1900,9 @@ static void intel_sdvo_destroy(struct drm_connector *connector)
                drm_property_destroy(connector->dev,
                                     sdvo_priv->tv_format_property);
 
+       if (sdvo_priv->is_tv || sdvo_priv->is_lvds)
+               intel_sdvo_destroy_enhance_property(connector);
+
        drm_sysfs_connector_remove(connector);
        drm_connector_cleanup(connector);
 
@@ -1829,6 +1920,8 @@ intel_sdvo_set_property(struct drm_connector *connector,
        struct drm_crtc *crtc = encoder->crtc;
        int ret = 0;
        bool changed = false;
+       uint8_t cmd, status;
+       uint16_t temp_value;
 
        ret = drm_connector_property_set_value(connector, property, val);
        if (ret < 0)
@@ -1845,11 +1938,102 @@ intel_sdvo_set_property(struct drm_connector *connector,
 
                sdvo_priv->tv_format_name = sdvo_priv->tv_format_supported[val];
                changed = true;
-       } else {
-               ret = -EINVAL;
-               goto out;
        }
 
+       if (sdvo_priv->is_tv || sdvo_priv->is_lvds) {
+               cmd = 0;
+               temp_value = val;
+               if (sdvo_priv->left_property == property) {
+                       drm_connector_property_set_value(connector,
+                               sdvo_priv->right_property, val);
+                       if (sdvo_priv->left_margin == temp_value)
+                               goto out;
+
+                       sdvo_priv->left_margin = temp_value;
+                       sdvo_priv->right_margin = temp_value;
+                       temp_value = sdvo_priv->max_hscan -
+                                       sdvo_priv->left_margin;
+                       cmd = SDVO_CMD_SET_OVERSCAN_H;
+               } else if (sdvo_priv->right_property == property) {
+                       drm_connector_property_set_value(connector,
+                               sdvo_priv->left_property, val);
+                       if (sdvo_priv->right_margin == temp_value)
+                               goto out;
+
+                       sdvo_priv->left_margin = temp_value;
+                       sdvo_priv->right_margin = temp_value;
+                       temp_value = sdvo_priv->max_hscan -
+                               sdvo_priv->left_margin;
+                       cmd = SDVO_CMD_SET_OVERSCAN_H;
+               } else if (sdvo_priv->top_property == property) {
+                       drm_connector_property_set_value(connector,
+                               sdvo_priv->bottom_property, val);
+                       if (sdvo_priv->top_margin == temp_value)
+                               goto out;
+
+                       sdvo_priv->top_margin = temp_value;
+                       sdvo_priv->bottom_margin = temp_value;
+                       temp_value = sdvo_priv->max_vscan -
+                                       sdvo_priv->top_margin;
+                       cmd = SDVO_CMD_SET_OVERSCAN_V;
+               } else if (sdvo_priv->bottom_property == property) {
+                       drm_connector_property_set_value(connector,
+                               sdvo_priv->top_property, val);
+                       if (sdvo_priv->bottom_margin == temp_value)
+                               goto out;
+                       sdvo_priv->top_margin = temp_value;
+                       sdvo_priv->bottom_margin = temp_value;
+                       temp_value = sdvo_priv->max_vscan -
+                                       sdvo_priv->top_margin;
+                       cmd = SDVO_CMD_SET_OVERSCAN_V;
+               } else if (sdvo_priv->hpos_property == property) {
+                       if (sdvo_priv->cur_hpos == temp_value)
+                               goto out;
+
+                       cmd = SDVO_CMD_SET_POSITION_H;
+                       sdvo_priv->cur_hpos = temp_value;
+               } else if (sdvo_priv->vpos_property == property) {
+                       if (sdvo_priv->cur_vpos == temp_value)
+                               goto out;
+
+                       cmd = SDVO_CMD_SET_POSITION_V;
+                       sdvo_priv->cur_vpos = temp_value;
+               } else if (sdvo_priv->saturation_property == property) {
+                       if (sdvo_priv->cur_saturation == temp_value)
+                               goto out;
+
+                       cmd = SDVO_CMD_SET_SATURATION;
+                       sdvo_priv->cur_saturation = temp_value;
+               } else if (sdvo_priv->contrast_property == property) {
+                       if (sdvo_priv->cur_contrast == temp_value)
+                               goto out;
+
+                       cmd = SDVO_CMD_SET_CONTRAST;
+                       sdvo_priv->cur_contrast = temp_value;
+               } else if (sdvo_priv->hue_property == property) {
+                       if (sdvo_priv->cur_hue == temp_value)
+                               goto out;
+
+                       cmd = SDVO_CMD_SET_HUE;
+                       sdvo_priv->cur_hue = temp_value;
+               } else if (sdvo_priv->brightness_property == property) {
+                       if (sdvo_priv->cur_brightness == temp_value)
+                               goto out;
+
+                       cmd = SDVO_CMD_SET_BRIGHTNESS;
+                       sdvo_priv->cur_brightness = temp_value;
+               }
+               if (cmd) {
+                       intel_sdvo_write_cmd(intel_output, cmd, &temp_value, 2);
+                       status = intel_sdvo_read_response(intel_output,
+                                                               NULL, 0);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO command \n");
+                               return -EINVAL;
+                       }
+                       changed = true;
+               }
+       }
        if (changed && crtc)
                drm_crtc_helper_set_mode(crtc, &crtc->mode, crtc->x,
                                crtc->y, crtc->fb);
@@ -2090,6 +2274,8 @@ intel_sdvo_output_setup(struct intel_output *intel_output, uint16_t flags)
                sdvo_priv->controlled_output = SDVO_OUTPUT_RGB1;
                encoder->encoder_type = DRM_MODE_ENCODER_DAC;
                connector->connector_type = DRM_MODE_CONNECTOR_VGA;
+               intel_output->clone_mask = (1 << INTEL_SDVO_NON_TV_CLONE_BIT) |
+                                       (1 << INTEL_ANALOG_CLONE_BIT);
        } else if (flags & SDVO_OUTPUT_LVDS0) {
 
                sdvo_priv->controlled_output = SDVO_OUTPUT_LVDS0;
@@ -2176,6 +2362,310 @@ static void intel_sdvo_tv_create_property(struct drm_connector *connector)
 
 }
 
+static void intel_sdvo_create_enhance_property(struct drm_connector *connector)
+{
+       struct intel_output *intel_output = to_intel_output(connector);
+       struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
+       struct intel_sdvo_enhancements_reply sdvo_data;
+       struct drm_device *dev = connector->dev;
+       uint8_t status;
+       uint16_t response, data_value[2];
+
+       intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS,
+                                               NULL, 0);
+       status = intel_sdvo_read_response(intel_output, &sdvo_data,
+                                       sizeof(sdvo_data));
+       if (status != SDVO_CMD_STATUS_SUCCESS) {
+               DRM_DEBUG_KMS(" incorrect response is returned\n");
+               return;
+       }
+       response = *((uint16_t *)&sdvo_data);
+       if (!response) {
+               DRM_DEBUG_KMS("No enhancement is supported\n");
+               return;
+       }
+       if (sdvo_priv->is_tv) {
+               /* when horizontal overscan is supported, Add the left/right
+                * property
+                */
+               if (sdvo_data.overscan_h) {
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_MAX_OVERSCAN_H, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &data_value, 4);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO max "
+                                               "h_overscan\n");
+                               return;
+                       }
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_OVERSCAN_H, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &response, 2);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO h_overscan\n");
+                               return;
+                       }
+                       sdvo_priv->max_hscan = data_value[0];
+                       sdvo_priv->left_margin = data_value[0] - response;
+                       sdvo_priv->right_margin = sdvo_priv->left_margin;
+                       sdvo_priv->left_property =
+                               drm_property_create(dev, DRM_MODE_PROP_RANGE,
+                                               "left_margin", 2);
+                       sdvo_priv->left_property->values[0] = 0;
+                       sdvo_priv->left_property->values[1] = data_value[0];
+                       drm_connector_attach_property(connector,
+                                               sdvo_priv->left_property,
+                                               sdvo_priv->left_margin);
+                       sdvo_priv->right_property =
+                               drm_property_create(dev, DRM_MODE_PROP_RANGE,
+                                               "right_margin", 2);
+                       sdvo_priv->right_property->values[0] = 0;
+                       sdvo_priv->right_property->values[1] = data_value[0];
+                       drm_connector_attach_property(connector,
+                                               sdvo_priv->right_property,
+                                               sdvo_priv->right_margin);
+                       DRM_DEBUG_KMS("h_overscan: max %d, "
+                                       "default %d, current %d\n",
+                                       data_value[0], data_value[1], response);
+               }
+               if (sdvo_data.overscan_v) {
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_MAX_OVERSCAN_V, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &data_value, 4);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO max "
+                                               "v_overscan\n");
+                               return;
+                       }
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_OVERSCAN_V, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &response, 2);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO v_overscan\n");
+                               return;
+                       }
+                       sdvo_priv->max_vscan = data_value[0];
+                       sdvo_priv->top_margin = data_value[0] - response;
+                       sdvo_priv->bottom_margin = sdvo_priv->top_margin;
+                       sdvo_priv->top_property =
+                               drm_property_create(dev, DRM_MODE_PROP_RANGE,
+                                               "top_margin", 2);
+                       sdvo_priv->top_property->values[0] = 0;
+                       sdvo_priv->top_property->values[1] = data_value[0];
+                       drm_connector_attach_property(connector,
+                                               sdvo_priv->top_property,
+                                               sdvo_priv->top_margin);
+                       sdvo_priv->bottom_property =
+                               drm_property_create(dev, DRM_MODE_PROP_RANGE,
+                                               "bottom_margin", 2);
+                       sdvo_priv->bottom_property->values[0] = 0;
+                       sdvo_priv->bottom_property->values[1] = data_value[0];
+                       drm_connector_attach_property(connector,
+                                               sdvo_priv->bottom_property,
+                                               sdvo_priv->bottom_margin);
+                       DRM_DEBUG_KMS("v_overscan: max %d, "
+                                       "default %d, current %d\n",
+                                       data_value[0], data_value[1], response);
+               }
+               if (sdvo_data.position_h) {
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_MAX_POSITION_H, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &data_value, 4);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO Max h_pos\n");
+                               return;
+                       }
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_POSITION_H, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &response, 2);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO get h_postion\n");
+                               return;
+                       }
+                       sdvo_priv->max_hpos = data_value[0];
+                       sdvo_priv->cur_hpos = response;
+                       sdvo_priv->hpos_property =
+                               drm_property_create(dev, DRM_MODE_PROP_RANGE,
+                                               "hpos", 2);
+                       sdvo_priv->hpos_property->values[0] = 0;
+                       sdvo_priv->hpos_property->values[1] = data_value[0];
+                       drm_connector_attach_property(connector,
+                                               sdvo_priv->hpos_property,
+                                               sdvo_priv->cur_hpos);
+                       DRM_DEBUG_KMS("h_position: max %d, "
+                                       "default %d, current %d\n",
+                                       data_value[0], data_value[1], response);
+               }
+               if (sdvo_data.position_v) {
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_MAX_POSITION_V, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &data_value, 4);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO Max v_pos\n");
+                               return;
+                       }
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_POSITION_V, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &response, 2);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO get v_postion\n");
+                               return;
+                       }
+                       sdvo_priv->max_vpos = data_value[0];
+                       sdvo_priv->cur_vpos = response;
+                       sdvo_priv->vpos_property =
+                               drm_property_create(dev, DRM_MODE_PROP_RANGE,
+                                               "vpos", 2);
+                       sdvo_priv->vpos_property->values[0] = 0;
+                       sdvo_priv->vpos_property->values[1] = data_value[0];
+                       drm_connector_attach_property(connector,
+                                               sdvo_priv->vpos_property,
+                                               sdvo_priv->cur_vpos);
+                       DRM_DEBUG_KMS("v_position: max %d, "
+                                       "default %d, current %d\n",
+                                       data_value[0], data_value[1], response);
+               }
+       }
+       if (sdvo_priv->is_tv) {
+               if (sdvo_data.saturation) {
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_MAX_SATURATION, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &data_value, 4);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO Max sat\n");
+                               return;
+                       }
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_SATURATION, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &response, 2);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO get sat\n");
+                               return;
+                       }
+                       sdvo_priv->max_saturation = data_value[0];
+                       sdvo_priv->cur_saturation = response;
+                       sdvo_priv->saturation_property =
+                               drm_property_create(dev, DRM_MODE_PROP_RANGE,
+                                               "saturation", 2);
+                       sdvo_priv->saturation_property->values[0] = 0;
+                       sdvo_priv->saturation_property->values[1] =
+                                                       data_value[0];
+                       drm_connector_attach_property(connector,
+                                               sdvo_priv->saturation_property,
+                                               sdvo_priv->cur_saturation);
+                       DRM_DEBUG_KMS("saturation: max %d, "
+                                       "default %d, current %d\n",
+                                       data_value[0], data_value[1], response);
+               }
+               if (sdvo_data.contrast) {
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_MAX_CONTRAST, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &data_value, 4);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO Max contrast\n");
+                               return;
+                       }
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_CONTRAST, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &response, 2);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO get contrast\n");
+                               return;
+                       }
+                       sdvo_priv->max_contrast = data_value[0];
+                       sdvo_priv->cur_contrast = response;
+                       sdvo_priv->contrast_property =
+                               drm_property_create(dev, DRM_MODE_PROP_RANGE,
+                                               "contrast", 2);
+                       sdvo_priv->contrast_property->values[0] = 0;
+                       sdvo_priv->contrast_property->values[1] = data_value[0];
+                       drm_connector_attach_property(connector,
+                                               sdvo_priv->contrast_property,
+                                               sdvo_priv->cur_contrast);
+                       DRM_DEBUG_KMS("contrast: max %d, "
+                                       "default %d, current %d\n",
+                                       data_value[0], data_value[1], response);
+               }
+               if (sdvo_data.hue) {
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_MAX_HUE, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &data_value, 4);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO Max hue\n");
+                               return;
+                       }
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_HUE, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &response, 2);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO get hue\n");
+                               return;
+                       }
+                       sdvo_priv->max_hue = data_value[0];
+                       sdvo_priv->cur_hue = response;
+                       sdvo_priv->hue_property =
+                               drm_property_create(dev, DRM_MODE_PROP_RANGE,
+                                               "hue", 2);
+                       sdvo_priv->hue_property->values[0] = 0;
+                       sdvo_priv->hue_property->values[1] =
+                                                       data_value[0];
+                       drm_connector_attach_property(connector,
+                                               sdvo_priv->hue_property,
+                                               sdvo_priv->cur_hue);
+                       DRM_DEBUG_KMS("hue: max %d, default %d, current %d\n",
+                                       data_value[0], data_value[1], response);
+               }
+       }
+       if (sdvo_priv->is_tv || sdvo_priv->is_lvds) {
+               if (sdvo_data.brightness) {
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_MAX_BRIGHTNESS, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &data_value, 4);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO Max bright\n");
+                               return;
+                       }
+                       intel_sdvo_write_cmd(intel_output,
+                               SDVO_CMD_GET_BRIGHTNESS, NULL, 0);
+                       status = intel_sdvo_read_response(intel_output,
+                               &response, 2);
+                       if (status != SDVO_CMD_STATUS_SUCCESS) {
+                               DRM_DEBUG_KMS("Incorrect SDVO get brigh\n");
+                               return;
+                       }
+                       sdvo_priv->max_brightness = data_value[0];
+                       sdvo_priv->cur_brightness = response;
+                       sdvo_priv->brightness_property =
+                               drm_property_create(dev, DRM_MODE_PROP_RANGE,
+                                               "brightness", 2);
+                       sdvo_priv->brightness_property->values[0] = 0;
+                       sdvo_priv->brightness_property->values[1] =
+                                                       data_value[0];
+                       drm_connector_attach_property(connector,
+                                               sdvo_priv->brightness_property,
+                                               sdvo_priv->cur_brightness);
+                       DRM_DEBUG_KMS("brightness: max %d, "
+                                       "default %d, current %d\n",
+                                       data_value[0], data_value[1], response);
+               }
+       }
+       return;
+}
+
 bool intel_sdvo_init(struct drm_device *dev, int output_device)
 {
        struct drm_connector *connector;
@@ -2264,6 +2754,10 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
        drm_mode_connector_attach_encoder(&intel_output->base, &intel_output->enc);
        if (sdvo_priv->is_tv)
                intel_sdvo_tv_create_property(connector);
+
+       if (sdvo_priv->is_tv || sdvo_priv->is_lvds)
+               intel_sdvo_create_enhance_property(connector);
+
        drm_sysfs_connector_add(connector);
 
        intel_sdvo_select_ddc_bus(sdvo_priv);
index 6bedd2f..737335f 100644 (file)
@@ -477,8 +477,8 @@ config I2C_PNX
          will be called i2c-pnx.
 
 config I2C_PXA
-       tristate "Intel PXA2XX I2C adapter (EXPERIMENTAL)"
-       depends on EXPERIMENTAL && ARCH_PXA
+       tristate "Intel PXA2XX I2C adapter"
+       depends on ARCH_PXA || ARCH_MMP
        help
          If you have devices in the PXA I2C bus, say yes to this option.
          This driver can also be built as a module.  If so, the module
index 949c97f..1f20a04 100644 (file)
@@ -29,8 +29,8 @@
 
 #include <asm/idle.h>
 
-#include "../dma/ioatdma_hw.h"
-#include "../dma/ioatdma_registers.h"
+#include "../dma/ioat/hw.h"
+#include "../dma/ioat/registers.h"
 
 #define I7300_IDLE_DRIVER_VERSION      "1.55"
 #define I7300_PRINT                    "i7300_idle:"
@@ -126,9 +126,9 @@ static void i7300_idle_ioat_stop(void)
                udelay(10);
 
                sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
-                       IOAT_CHANSTS_DMA_TRANSFER_STATUS;
+                       IOAT_CHANSTS_STATUS;
 
-               if (sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE)
+               if (sts != IOAT_CHANSTS_ACTIVE)
                        break;
 
        }
@@ -160,9 +160,9 @@ static int __init i7300_idle_ioat_selftest(u8 *ctl,
        udelay(1000);
 
        chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
-                       IOAT_CHANSTS_DMA_TRANSFER_STATUS;
+                       IOAT_CHANSTS_STATUS;
 
-       if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE) {
+       if (chan_sts != IOAT_CHANSTS_DONE) {
                /* Not complete, reset the channel */
                writeb(IOAT_CHANCMD_RESET,
                       ioat_chanbase + IOAT1_CHANCMD_OFFSET);
@@ -288,9 +288,9 @@ static void __exit i7300_idle_ioat_exit(void)
                       ioat_chanbase + IOAT1_CHANCMD_OFFSET);
 
                chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
-                       IOAT_CHANSTS_DMA_TRANSFER_STATUS;
+                       IOAT_CHANSTS_STATUS;
 
-               if (chan_sts != IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) {
+               if (chan_sts != IOAT_CHANSTS_ACTIVE) {
                        writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET);
                        break;
                }
@@ -298,14 +298,14 @@ static void __exit i7300_idle_ioat_exit(void)
        }
 
        chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
-                       IOAT_CHANSTS_DMA_TRANSFER_STATUS;
+                       IOAT_CHANSTS_STATUS;
 
        /*
         * We tried to reset multiple times. If IO A/T channel is still active
         * flag an error and return without cleanup. Memory leak is better
         * than random corruption in that extreme error situation.
         */
-       if (chan_sts == IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE) {
+       if (chan_sts == IOAT_CHANSTS_ACTIVE) {
                printk(KERN_ERR I7300_PRINT "Unable to stop IO A/T channels."
                        " Not freeing resources\n");
                return;
index 57a3c6f..4e0f282 100644 (file)
@@ -37,7 +37,8 @@
 enum rmpp_state {
        RMPP_STATE_ACTIVE,
        RMPP_STATE_TIMEOUT,
-       RMPP_STATE_COMPLETE
+       RMPP_STATE_COMPLETE,
+       RMPP_STATE_CANCELING
 };
 
 struct mad_rmpp_recv {
@@ -86,19 +87,23 @@ void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent)
        unsigned long flags;
 
        spin_lock_irqsave(&agent->lock, flags);
+       list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
+               if (rmpp_recv->state != RMPP_STATE_COMPLETE)
+                       ib_free_recv_mad(rmpp_recv->rmpp_wc);
+               rmpp_recv->state = RMPP_STATE_CANCELING;
+       }
+       spin_unlock_irqrestore(&agent->lock, flags);
+
        list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
                cancel_delayed_work(&rmpp_recv->timeout_work);
                cancel_delayed_work(&rmpp_recv->cleanup_work);
        }
-       spin_unlock_irqrestore(&agent->lock, flags);
 
        flush_workqueue(agent->qp_info->port_priv->wq);
 
        list_for_each_entry_safe(rmpp_recv, temp_rmpp_recv,
                                 &agent->rmpp_list, list) {
                list_del(&rmpp_recv->list);
-               if (rmpp_recv->state != RMPP_STATE_COMPLETE)
-                       ib_free_recv_mad(rmpp_recv->rmpp_wc);
                destroy_rmpp_recv(rmpp_recv);
        }
 }
@@ -260,6 +265,10 @@ static void recv_cleanup_handler(struct work_struct *work)
        unsigned long flags;
 
        spin_lock_irqsave(&rmpp_recv->agent->lock, flags);
+       if (rmpp_recv->state == RMPP_STATE_CANCELING) {
+               spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
+               return;
+       }
        list_del(&rmpp_recv->list);
        spin_unlock_irqrestore(&rmpp_recv->agent->lock, flags);
        destroy_rmpp_recv(rmpp_recv);
index 056b2a4..0aa0110 100644 (file)
@@ -68,11 +68,16 @@ static void catas_reset(struct work_struct *work)
        spin_unlock_irq(&catas_lock);
 
        list_for_each_entry_safe(dev, tmpdev, &tlist, catas_err.list) {
+               struct pci_dev *pdev = dev->pdev;
                ret = __mthca_restart_one(dev->pdev);
+               /* 'dev' now is not valid */
                if (ret)
-                       mthca_err(dev, "Reset failed (%d)\n", ret);
-               else
-                       mthca_dbg(dev, "Reset succeeded\n");
+                       printk(KERN_ERR "mthca %s: Reset failed (%d)\n",
+                              pci_name(pdev), ret);
+               else {
+                       struct mthca_dev *d = pci_get_drvdata(pdev);
+                       mthca_dbg(d, "Reset succeeded\n");
+               }
        }
 
        mutex_unlock(&mthca_device_mutex);
index 538e409..e593af3 100644 (file)
@@ -1566,7 +1566,6 @@ static const struct net_device_ops nes_netdev_ops = {
        .ndo_set_mac_address    = nes_netdev_set_mac_address,
        .ndo_set_multicast_list = nes_netdev_set_multicast_list,
        .ndo_change_mtu         = nes_netdev_change_mtu,
-       .ndo_set_mac_address    = eth_mac_addr,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_vlan_rx_register   = nes_netdev_vlan_rx_register,
 };
index 25874fc..8763c1e 100644 (file)
@@ -362,12 +362,19 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
 {
        struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
                                                   carrier_on_task);
+       struct ib_port_attr attr;
 
        /*
         * Take rtnl_lock to avoid racing with ipoib_stop() and
         * turning the carrier back on while a device is being
         * removed.
         */
+       if (ib_query_port(priv->ca, priv->port, &attr) ||
+           attr.state != IB_PORT_ACTIVE) {
+               ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
+               return;
+       }
+
        rtnl_lock();
        netif_carrier_on(priv->dev);
        rtnl_unlock();
index 76d6751..02f4f8f 100644 (file)
@@ -225,6 +225,7 @@ config INPUT_SGI_BTNS
 config INPUT_WINBOND_CIR
        tristate "Winbond IR remote control"
        depends on X86 && PNP
+       select NEW_LEDS
        select LEDS_CLASS
        select BITREVERSE
        help
index 020f957..2158377 100644 (file)
@@ -124,6 +124,8 @@ config MD_RAID456
        select MD_RAID6_PQ
        select ASYNC_MEMCPY
        select ASYNC_XOR
+       select ASYNC_PQ
+       select ASYNC_RAID6_RECOV
        ---help---
          A RAID-5 set of N drives with a capacity of C MB per drive provides
          the capacity of C * (N - 1) MB, and protects against a failure
@@ -152,9 +154,33 @@ config MD_RAID456
 
          If unsure, say Y.
 
+config MULTICORE_RAID456
+       bool "RAID-4/RAID-5/RAID-6 Multicore processing (EXPERIMENTAL)"
+       depends on MD_RAID456
+       depends on SMP
+       depends on EXPERIMENTAL
+       ---help---
+         Enable the raid456 module to dispatch per-stripe raid operations to a
+         thread pool.
+
+         If unsure, say N.
+
 config MD_RAID6_PQ
        tristate
 
+config ASYNC_RAID6_TEST
+       tristate "Self test for hardware accelerated raid6 recovery"
+       depends on MD_RAID6_PQ
+       select ASYNC_RAID6_RECOV
+       ---help---
+         This is a one-shot self test that permutes through the
+         recovery of all the possible two disk failure scenarios for a
+         N-disk array.  Recovery is performed with the asynchronous
+         raid6 recovery routines, and will optionally use an offload
+         engine if one is available.
+
+         If unsure, say N.
+
 config MD_MULTIPATH
        tristate "Multipath I/O support"
        depends on BLK_DEV_MD
index 3319c2f..6986b00 100644 (file)
@@ -108,6 +108,8 @@ static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page)
  * allocated while we're using it
  */
 static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create)
+__releases(bitmap->lock)
+__acquires(bitmap->lock)
 {
        unsigned char *mappage;
 
@@ -325,7 +327,6 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
        return 0;
 
  bad_alignment:
-       rcu_read_unlock();
        return -EINVAL;
 }
 
@@ -1207,6 +1208,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
 static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
                                            sector_t offset, int *blocks,
                                            int create)
+__releases(bitmap->lock)
+__acquires(bitmap->lock)
 {
        /* If 'create', we might release the lock and reclaim it.
         * The lock must have been taken with interrupts enabled.
index ea48429..1ceceb3 100644 (file)
@@ -108,6 +108,9 @@ static int linear_congested(void *data, int bits)
        linear_conf_t *conf;
        int i, ret = 0;
 
+       if (mddev_congested(mddev, bits))
+               return 1;
+
        rcu_read_lock();
        conf = rcu_dereference(mddev->private);
 
index 6aa497e..26ba42a 100644 (file)
@@ -262,6 +262,12 @@ static void mddev_resume(mddev_t *mddev)
        mddev->pers->quiesce(mddev, 0);
 }
 
+int mddev_congested(mddev_t *mddev, int bits)
+{
+       return mddev->suspended;
+}
+EXPORT_SYMBOL(mddev_congested);
+
 
 static inline mddev_t *mddev_get(mddev_t *mddev)
 {
@@ -4218,7 +4224,7 @@ static int do_md_run(mddev_t * mddev)
                        set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
                        mddev->sync_thread = md_register_thread(md_do_sync,
                                                                mddev,
-                                                               "%s_resync");
+                                                               "resync");
                        if (!mddev->sync_thread) {
                                printk(KERN_ERR "%s: could not start resync"
                                       " thread...\n",
@@ -4575,10 +4581,10 @@ static int get_version(void __user * arg)
 static int get_array_info(mddev_t * mddev, void __user * arg)
 {
        mdu_array_info_t info;
-       int nr,working,active,failed,spare;
+       int nr,working,insync,failed,spare;
        mdk_rdev_t *rdev;
 
-       nr=working=active=failed=spare=0;
+       nr=working=insync=failed=spare=0;
        list_for_each_entry(rdev, &mddev->disks, same_set) {
                nr++;
                if (test_bit(Faulty, &rdev->flags))
@@ -4586,7 +4592,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
                else {
                        working++;
                        if (test_bit(In_sync, &rdev->flags))
-                               active++;       
+                               insync++;       
                        else
                                spare++;
                }
@@ -4611,7 +4617,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
                info.state = (1<<MD_SB_CLEAN);
        if (mddev->bitmap && mddev->bitmap_offset)
                info.state = (1<<MD_SB_BITMAP_PRESENT);
-       info.active_disks  = active;
+       info.active_disks  = insync;
        info.working_disks = working;
        info.failed_disks  = failed;
        info.spare_disks   = spare;
@@ -4721,7 +4727,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
                if (!list_empty(&mddev->disks)) {
                        mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
                                                        mdk_rdev_t, same_set);
-                       int err = super_types[mddev->major_version]
+                       err = super_types[mddev->major_version]
                                .load_super(rdev, rdev0, mddev->minor_version);
                        if (err < 0) {
                                printk(KERN_WARNING 
@@ -5631,7 +5637,10 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
        thread->run = run;
        thread->mddev = mddev;
        thread->timeout = MAX_SCHEDULE_TIMEOUT;
-       thread->tsk = kthread_run(md_thread, thread, name, mdname(thread->mddev));
+       thread->tsk = kthread_run(md_thread, thread,
+                                 "%s_%s",
+                                 mdname(thread->mddev),
+                                 name ?: mddev->pers->name);
        if (IS_ERR(thread->tsk)) {
                kfree(thread);
                return NULL;
@@ -6745,7 +6754,7 @@ void md_check_recovery(mddev_t *mddev)
                        }
                        mddev->sync_thread = md_register_thread(md_do_sync,
                                                                mddev,
-                                                               "%s_resync");
+                                                               "resync");
                        if (!mddev->sync_thread) {
                                printk(KERN_ERR "%s: could not start resync"
                                        " thread...\n", 
index f55d2ff..f184b69 100644 (file)
@@ -430,6 +430,7 @@ extern void md_write_end(mddev_t *mddev);
 extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
 extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev);
 
+extern int mddev_congested(mddev_t *mddev, int bits);
 extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
                           sector_t sector, int size, struct page *page);
 extern void md_super_wait(mddev_t *mddev);
index d2d3fd5..ee7646f 100644 (file)
@@ -150,7 +150,6 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio)
        }
 
        mp_bh = mempool_alloc(conf->pool, GFP_NOIO);
-       memset(mp_bh, 0, sizeof(*mp_bh));
 
        mp_bh->master_bio = bio;
        mp_bh->mddev = mddev;
@@ -199,6 +198,9 @@ static int multipath_congested(void *data, int bits)
        multipath_conf_t *conf = mddev->private;
        int i, ret = 0;
 
+       if (mddev_congested(mddev, bits))
+               return 1;
+
        rcu_read_lock();
        for (i = 0; i < mddev->raid_disks ; i++) {
                mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev);
@@ -504,7 +506,7 @@ static int multipath_run (mddev_t *mddev)
        }
 
        {
-               mddev->thread = md_register_thread(multipathd, mddev, "%s_multipath");
+               mddev->thread = md_register_thread(multipathd, mddev, NULL);
                if (!mddev->thread) {
                        printk(KERN_ERR "multipath: couldn't allocate thread"
                                " for %s\n", mdname(mddev));
index f845ed9..d3a4ce0 100644 (file)
@@ -44,6 +44,9 @@ static int raid0_congested(void *data, int bits)
        mdk_rdev_t **devlist = conf->devlist;
        int i, ret = 0;
 
+       if (mddev_congested(mddev, bits))
+               return 1;
+
        for (i = 0; i < mddev->raid_disks && !ret ; i++) {
                struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
 
@@ -86,7 +89,7 @@ static void dump_zones(mddev_t *mddev)
 
 static int create_strip_zones(mddev_t *mddev)
 {
-       int i, c, j, err;
+       int i, c, err;
        sector_t curr_zone_end, sectors;
        mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev;
        struct strip_zone *zone;
@@ -198,6 +201,8 @@ static int create_strip_zones(mddev_t *mddev)
        /* now do the other zones */
        for (i = 1; i < conf->nr_strip_zones; i++)
        {
+               int j;
+
                zone = conf->strip_zone + i;
                dev = conf->devlist + i * mddev->raid_disks;
 
@@ -207,7 +212,6 @@ static int create_strip_zones(mddev_t *mddev)
                c = 0;
 
                for (j=0; j<cnt; j++) {
-                       char b[BDEVNAME_SIZE];
                        rdev = conf->devlist[j];
                        printk(KERN_INFO "raid0: checking %s ...",
                                bdevname(rdev->bdev, b));
index ff7ed33..d1b9bd5 100644 (file)
@@ -576,6 +576,9 @@ static int raid1_congested(void *data, int bits)
        conf_t *conf = mddev->private;
        int i, ret = 0;
 
+       if (mddev_congested(mddev, bits))
+               return 1;
+
        rcu_read_lock();
        for (i = 0; i < mddev->raid_disks; i++) {
                mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
@@ -851,7 +854,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
                read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset;
                read_bio->bi_bdev = mirror->rdev->bdev;
                read_bio->bi_end_io = raid1_end_read_request;
-               read_bio->bi_rw = READ | do_sync;
+               read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
                read_bio->bi_private = r1_bio;
 
                generic_make_request(read_bio);
@@ -943,7 +946,8 @@ static int make_request(struct request_queue *q, struct bio * bio)
                mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
                mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
                mbio->bi_end_io = raid1_end_write_request;
-               mbio->bi_rw = WRITE | do_barriers | do_sync;
+               mbio->bi_rw = WRITE | (do_barriers << BIO_RW_BARRIER) |
+                       (do_sync << BIO_RW_SYNCIO);
                mbio->bi_private = r1_bio;
 
                if (behind_pages) {
@@ -1623,7 +1627,8 @@ static void raid1d(mddev_t *mddev)
                                                conf->mirrors[i].rdev->data_offset;
                                        bio->bi_bdev = conf->mirrors[i].rdev->bdev;
                                        bio->bi_end_io = raid1_end_write_request;
-                                       bio->bi_rw = WRITE | do_sync;
+                                       bio->bi_rw = WRITE |
+                                               (do_sync << BIO_RW_SYNCIO);
                                        bio->bi_private = r1_bio;
                                        r1_bio->bios[i] = bio;
                                        generic_make_request(bio);
@@ -1672,7 +1677,7 @@ static void raid1d(mddev_t *mddev)
                                bio->bi_sector = r1_bio->sector + rdev->data_offset;
                                bio->bi_bdev = rdev->bdev;
                                bio->bi_end_io = raid1_end_read_request;
-                               bio->bi_rw = READ | do_sync;
+                               bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
                                bio->bi_private = r1_bio;
                                unplug = 1;
                                generic_make_request(bio);
@@ -2047,7 +2052,7 @@ static int run(mddev_t *mddev)
        conf->last_used = j;
 
 
-       mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1");
+       mddev->thread = md_register_thread(raid1d, mddev, NULL);
        if (!mddev->thread) {
                printk(KERN_ERR
                       "raid1: couldn't allocate thread for %s\n",
index d0a2152..51c4c5c 100644 (file)
@@ -631,6 +631,8 @@ static int raid10_congested(void *data, int bits)
        conf_t *conf = mddev->private;
        int i, ret = 0;
 
+       if (mddev_congested(mddev, bits))
+               return 1;
        rcu_read_lock();
        for (i = 0; i < mddev->raid_disks && ret == 0; i++) {
                mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
@@ -882,7 +884,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
                        mirror->rdev->data_offset;
                read_bio->bi_bdev = mirror->rdev->bdev;
                read_bio->bi_end_io = raid10_end_read_request;
-               read_bio->bi_rw = READ | do_sync;
+               read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
                read_bio->bi_private = r10_bio;
 
                generic_make_request(read_bio);
@@ -950,7 +952,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
                        conf->mirrors[d].rdev->data_offset;
                mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
                mbio->bi_end_io = raid10_end_write_request;
-               mbio->bi_rw = WRITE | do_sync;
+               mbio->bi_rw = WRITE | (do_sync << BIO_RW_SYNCIO);
                mbio->bi_private = r10_bio;
 
                atomic_inc(&r10_bio->remaining);
@@ -1623,7 +1625,7 @@ static void raid10d(mddev_t *mddev)
                                bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr
                                        + rdev->data_offset;
                                bio->bi_bdev = rdev->bdev;
-                               bio->bi_rw = READ | do_sync;
+                               bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
                                bio->bi_private = r10_bio;
                                bio->bi_end_io = raid10_end_read_request;
                                unplug = 1;
@@ -1773,7 +1775,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
        max_sync = RESYNC_PAGES << (PAGE_SHIFT-9);
        if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
                /* recovery... the complicated one */
-               int i, j, k;
+               int j, k;
                r10_bio = NULL;
 
                for (i=0 ; i<conf->raid_disks; i++)
@@ -2188,7 +2190,7 @@ static int run(mddev_t *mddev)
        }
 
 
-       mddev->thread = md_register_thread(raid10d, mddev, "%s_raid10");
+       mddev->thread = md_register_thread(raid10d, mddev, NULL);
        if (!mddev->thread) {
                printk(KERN_ERR
                       "raid10: couldn't allocate thread for %s\n",
index 826eb34..9482980 100644 (file)
@@ -47,7 +47,9 @@
 #include <linux/kthread.h>
 #include <linux/raid/pq.h>
 #include <linux/async_tx.h>
+#include <linux/async.h>
 #include <linux/seq_file.h>
+#include <linux/cpu.h>
 #include "md.h"
 #include "raid5.h"
 #include "bitmap.h"
@@ -499,11 +501,18 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
        struct page *bio_page;
        int i;
        int page_offset;
+       struct async_submit_ctl submit;
+       enum async_tx_flags flags = 0;
 
        if (bio->bi_sector >= sector)
                page_offset = (signed)(bio->bi_sector - sector) * 512;
        else
                page_offset = (signed)(sector - bio->bi_sector) * -512;
+
+       if (frombio)
+               flags |= ASYNC_TX_FENCE;
+       init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
+
        bio_for_each_segment(bvl, bio, i) {
                int len = bio_iovec_idx(bio, i)->bv_len;
                int clen;
@@ -525,15 +534,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
                        bio_page = bio_iovec_idx(bio, i)->bv_page;
                        if (frombio)
                                tx = async_memcpy(page, bio_page, page_offset,
-                                       b_offset, clen,
-                                       ASYNC_TX_DEP_ACK,
-                                       tx, NULL, NULL);
+                                                 b_offset, clen, &submit);
                        else
                                tx = async_memcpy(bio_page, page, b_offset,
-                                       page_offset, clen,
-                                       ASYNC_TX_DEP_ACK,
-                                       tx, NULL, NULL);
+                                                 page_offset, clen, &submit);
                }
+               /* chain the operations */
+               submit.depend_tx = tx;
+
                if (clen < len) /* hit end of page */
                        break;
                page_offset +=  len;
@@ -592,6 +600,7 @@ static void ops_run_biofill(struct stripe_head *sh)
 {
        struct dma_async_tx_descriptor *tx = NULL;
        raid5_conf_t *conf = sh->raid_conf;
+       struct async_submit_ctl submit;
        int i;
 
        pr_debug("%s: stripe %llu\n", __func__,
@@ -615,22 +624,34 @@ static void ops_run_biofill(struct stripe_head *sh)
        }
 
        atomic_inc(&sh->count);
-       async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
-               ops_complete_biofill, sh);
+       init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL);
+       async_trigger_callback(&submit);
 }
 
-static void ops_complete_compute5(void *stripe_head_ref)
+static void mark_target_uptodate(struct stripe_head *sh, int target)
 {
-       struct stripe_head *sh = stripe_head_ref;
-       int target = sh->ops.target;
-       struct r5dev *tgt = &sh->dev[target];
+       struct r5dev *tgt;
 
-       pr_debug("%s: stripe %llu\n", __func__,
-               (unsigned long long)sh->sector);
+       if (target < 0)
+               return;
 
+       tgt = &sh->dev[target];
        set_bit(R5_UPTODATE, &tgt->flags);
        BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
        clear_bit(R5_Wantcompute, &tgt->flags);
+}
+
+static void ops_complete_compute(void *stripe_head_ref)
+{
+       struct stripe_head *sh = stripe_head_ref;
+
+       pr_debug("%s: stripe %llu\n", __func__,
+               (unsigned long long)sh->sector);
+
+       /* mark the computed target(s) as uptodate */
+       mark_target_uptodate(sh, sh->ops.target);
+       mark_target_uptodate(sh, sh->ops.target2);
+
        clear_bit(STRIPE_COMPUTE_RUN, &sh->state);
        if (sh->check_state == check_state_compute_run)
                sh->check_state = check_state_compute_result;
@@ -638,16 +659,24 @@ static void ops_complete_compute5(void *stripe_head_ref)
        release_stripe(sh);
 }
 
-static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
+/* return a pointer to the address conversion region of the scribble buffer */
+static addr_conv_t *to_addr_conv(struct stripe_head *sh,
+                                struct raid5_percpu *percpu)
+{
+       return percpu->scribble + sizeof(struct page *) * (sh->disks + 2);
+}
+
+static struct dma_async_tx_descriptor *
+ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
 {
-       /* kernel stack size limits the total number of disks */
        int disks = sh->disks;
-       struct page *xor_srcs[disks];
+       struct page **xor_srcs = percpu->scribble;
        int target = sh->ops.target;
        struct r5dev *tgt = &sh->dev[target];
        struct page *xor_dest = tgt->page;
        int count = 0;
        struct dma_async_tx_descriptor *tx;
+       struct async_submit_ctl submit;
        int i;
 
        pr_debug("%s: stripe %llu block: %d\n",
@@ -660,17 +689,215 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
 
        atomic_inc(&sh->count);
 
+       init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
+                         ops_complete_compute, sh, to_addr_conv(sh, percpu));
        if (unlikely(count == 1))
-               tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
-                       0, NULL, ops_complete_compute5, sh);
+               tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
        else
-               tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-                       ASYNC_TX_XOR_ZERO_DST, NULL,
-                       ops_complete_compute5, sh);
+               tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
 
        return tx;
 }
 
+/* set_syndrome_sources - populate source buffers for gen_syndrome
+ * @srcs - (struct page *) array of size sh->disks
+ * @sh - stripe_head to parse
+ *
+ * Populates srcs in proper layout order for the stripe and returns the
+ * 'count' of sources to be used in a call to async_gen_syndrome.  The P
+ * destination buffer is recorded in srcs[count] and the Q destination
+ * is recorded in srcs[count+1]].
+ */
+static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
+{
+       int disks = sh->disks;
+       int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
+       int d0_idx = raid6_d0(sh);
+       int count;
+       int i;
+
+       for (i = 0; i < disks; i++)
+               srcs[i] = (void *)raid6_empty_zero_page;
+
+       count = 0;
+       i = d0_idx;
+       do {
+               int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
+
+               srcs[slot] = sh->dev[i].page;
+               i = raid6_next_disk(i, disks);
+       } while (i != d0_idx);
+       BUG_ON(count != syndrome_disks);
+
+       return count;
+}
+
+static struct dma_async_tx_descriptor *
+ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
+{
+       int disks = sh->disks;
+       struct page **blocks = percpu->scribble;
+       int target;
+       int qd_idx = sh->qd_idx;
+       struct dma_async_tx_descriptor *tx;
+       struct async_submit_ctl submit;
+       struct r5dev *tgt;
+       struct page *dest;
+       int i;
+       int count;
+
+       if (sh->ops.target < 0)
+               target = sh->ops.target2;
+       else if (sh->ops.target2 < 0)
+               target = sh->ops.target;
+       else
+               /* we should only have one valid target */
+               BUG();
+       BUG_ON(target < 0);
+       pr_debug("%s: stripe %llu block: %d\n",
+               __func__, (unsigned long long)sh->sector, target);
+
+       tgt = &sh->dev[target];
+       BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
+       dest = tgt->page;
+
+       atomic_inc(&sh->count);
+
+       if (target == qd_idx) {
+               count = set_syndrome_sources(blocks, sh);
+               blocks[count] = NULL; /* regenerating p is not necessary */
+               BUG_ON(blocks[count+1] != dest); /* q should already be set */
+               init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+                                 ops_complete_compute, sh,
+                                 to_addr_conv(sh, percpu));
+               tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
+       } else {
+               /* Compute any data- or p-drive using XOR */
+               count = 0;
+               for (i = disks; i-- ; ) {
+                       if (i == target || i == qd_idx)
+                               continue;
+                       blocks[count++] = sh->dev[i].page;
+               }
+
+               init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
+                                 NULL, ops_complete_compute, sh,
+                                 to_addr_conv(sh, percpu));
+               tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
+       }
+
+       return tx;
+}
+
+static struct dma_async_tx_descriptor *
+ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
+{
+       int i, count, disks = sh->disks;
+       int syndrome_disks = sh->ddf_layout ? disks : disks-2;
+       int d0_idx = raid6_d0(sh);
+       int faila = -1, failb = -1;
+       int target = sh->ops.target;
+       int target2 = sh->ops.target2;
+       struct r5dev *tgt = &sh->dev[target];
+       struct r5dev *tgt2 = &sh->dev[target2];
+       struct dma_async_tx_descriptor *tx;
+       struct page **blocks = percpu->scribble;
+       struct async_submit_ctl submit;
+
+       pr_debug("%s: stripe %llu block1: %d block2: %d\n",
+                __func__, (unsigned long long)sh->sector, target, target2);
+       BUG_ON(target < 0 || target2 < 0);
+       BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
+       BUG_ON(!test_bit(R5_Wantcompute, &tgt2->flags));
+
+       /* we need to open-code set_syndrome_sources to handle the
+        * slot number conversion for 'faila' and 'failb'
+        */
+       for (i = 0; i < disks ; i++)
+               blocks[i] = (void *)raid6_empty_zero_page;
+       count = 0;
+       i = d0_idx;
+       do {
+               int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
+
+               blocks[slot] = sh->dev[i].page;
+
+               if (i == target)
+                       faila = slot;
+               if (i == target2)
+                       failb = slot;
+               i = raid6_next_disk(i, disks);
+       } while (i != d0_idx);
+       BUG_ON(count != syndrome_disks);
+
+       BUG_ON(faila == failb);
+       if (failb < faila)
+               swap(faila, failb);
+       pr_debug("%s: stripe: %llu faila: %d failb: %d\n",
+                __func__, (unsigned long long)sh->sector, faila, failb);
+
+       atomic_inc(&sh->count);
+
+       if (failb == syndrome_disks+1) {
+               /* Q disk is one of the missing disks */
+               if (faila == syndrome_disks) {
+                       /* Missing P+Q, just recompute */
+                       init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+                                         ops_complete_compute, sh,
+                                         to_addr_conv(sh, percpu));
+                       return async_gen_syndrome(blocks, 0, count+2,
+                                                 STRIPE_SIZE, &submit);
+               } else {
+                       struct page *dest;
+                       int data_target;
+                       int qd_idx = sh->qd_idx;
+
+                       /* Missing D+Q: recompute D from P, then recompute Q */
+                       if (target == qd_idx)
+                               data_target = target2;
+                       else
+                               data_target = target;
+
+                       count = 0;
+                       for (i = disks; i-- ; ) {
+                               if (i == data_target || i == qd_idx)
+                                       continue;
+                               blocks[count++] = sh->dev[i].page;
+                       }
+                       dest = sh->dev[data_target].page;
+                       init_async_submit(&submit,
+                                         ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
+                                         NULL, NULL, NULL,
+                                         to_addr_conv(sh, percpu));
+                       tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
+                                      &submit);
+
+                       count = set_syndrome_sources(blocks, sh);
+                       init_async_submit(&submit, ASYNC_TX_FENCE, tx,
+                                         ops_complete_compute, sh,
+                                         to_addr_conv(sh, percpu));
+                       return async_gen_syndrome(blocks, 0, count+2,
+                                                 STRIPE_SIZE, &submit);
+               }
+       } else {
+               init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+                                 ops_complete_compute, sh,
+                                 to_addr_conv(sh, percpu));
+               if (failb == syndrome_disks) {
+                       /* We're missing D+P. */
+                       return async_raid6_datap_recov(syndrome_disks+2,
+                                                      STRIPE_SIZE, faila,
+                                                      blocks, &submit);
+               } else {
+                       /* We're missing D+D. */
+                       return async_raid6_2data_recov(syndrome_disks+2,
+                                                      STRIPE_SIZE, faila, failb,
+                                                      blocks, &submit);
+               }
+       }
+}
+
+
 static void ops_complete_prexor(void *stripe_head_ref)
 {
        struct stripe_head *sh = stripe_head_ref;
@@ -680,12 +907,13 @@ static void ops_complete_prexor(void *stripe_head_ref)
 }
 
 static struct dma_async_tx_descriptor *
-ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
+ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
+              struct dma_async_tx_descriptor *tx)
 {
-       /* kernel stack size limits the total number of disks */
        int disks = sh->disks;
-       struct page *xor_srcs[disks];
+       struct page **xor_srcs = percpu->scribble;
        int count = 0, pd_idx = sh->pd_idx, i;
+       struct async_submit_ctl submit;
 
        /* existing parity data subtracted */
        struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
@@ -700,9 +928,9 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
                        xor_srcs[count++] = dev->page;
        }
 
-       tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-               ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx,
-               ops_complete_prexor, sh);
+       init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+                         ops_complete_prexor, sh, to_addr_conv(sh, percpu));
+       tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
 
        return tx;
 }
@@ -742,17 +970,21 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
        return tx;
 }
 
-static void ops_complete_postxor(void *stripe_head_ref)
+static void ops_complete_reconstruct(void *stripe_head_ref)
 {
        struct stripe_head *sh = stripe_head_ref;
-       int disks = sh->disks, i, pd_idx = sh->pd_idx;
+       int disks = sh->disks;
+       int pd_idx = sh->pd_idx;
+       int qd_idx = sh->qd_idx;
+       int i;
 
        pr_debug("%s: stripe %llu\n", __func__,
                (unsigned long long)sh->sector);
 
        for (i = disks; i--; ) {
                struct r5dev *dev = &sh->dev[i];
-               if (dev->written || i == pd_idx)
+
+               if (dev->written || i == pd_idx || i == qd_idx)
                        set_bit(R5_UPTODATE, &dev->flags);
        }
 
@@ -770,12 +1002,12 @@ static void ops_complete_postxor(void *stripe_head_ref)
 }
 
 static void
-ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
+ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
+                    struct dma_async_tx_descriptor *tx)
 {
-       /* kernel stack size limits the total number of disks */
        int disks = sh->disks;
-       struct page *xor_srcs[disks];
-
+       struct page **xor_srcs = percpu->scribble;
+       struct async_submit_ctl submit;
        int count = 0, pd_idx = sh->pd_idx, i;
        struct page *xor_dest;
        int prexor = 0;
@@ -809,18 +1041,36 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
         * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
         * for the synchronous xor case
         */
-       flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK |
+       flags = ASYNC_TX_ACK |
                (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
 
        atomic_inc(&sh->count);
 
-       if (unlikely(count == 1)) {
-               flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST);
-               tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
-                       flags, tx, ops_complete_postxor, sh);
-       } else
-               tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-                       flags, tx, ops_complete_postxor, sh);
+       init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh,
+                         to_addr_conv(sh, percpu));
+       if (unlikely(count == 1))
+               tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
+       else
+               tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
+}
+
+static void
+ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
+                    struct dma_async_tx_descriptor *tx)
+{
+       struct async_submit_ctl submit;
+       struct page **blocks = percpu->scribble;
+       int count;
+
+       pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
+
+       count = set_syndrome_sources(blocks, sh);
+
+       atomic_inc(&sh->count);
+
+       init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct,
+                         sh, to_addr_conv(sh, percpu));
+       async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE,  &submit);
 }
 
 static void ops_complete_check(void *stripe_head_ref)
@@ -835,63 +1085,115 @@ static void ops_complete_check(void *stripe_head_ref)
        release_stripe(sh);
 }
 
-static void ops_run_check(struct stripe_head *sh)
+static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
 {
-       /* kernel stack size limits the total number of disks */
        int disks = sh->disks;
-       struct page *xor_srcs[disks];
+       int pd_idx = sh->pd_idx;
+       int qd_idx = sh->qd_idx;
+       struct page *xor_dest;
+       struct page **xor_srcs = percpu->scribble;
        struct dma_async_tx_descriptor *tx;
-
-       int count = 0, pd_idx = sh->pd_idx, i;
-       struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
+       struct async_submit_ctl submit;
+       int count;
+       int i;
 
        pr_debug("%s: stripe %llu\n", __func__,
                (unsigned long long)sh->sector);
 
+       count = 0;
+       xor_dest = sh->dev[pd_idx].page;
+       xor_srcs[count++] = xor_dest;
        for (i = disks; i--; ) {
-               struct r5dev *dev = &sh->dev[i];
-               if (i != pd_idx)
-                       xor_srcs[count++] = dev->page;
+               if (i == pd_idx || i == qd_idx)
+                       continue;
+               xor_srcs[count++] = sh->dev[i].page;
        }
 
-       tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-               &sh->ops.zero_sum_result, 0, NULL, NULL, NULL);
+       init_async_submit(&submit, 0, NULL, NULL, NULL,
+                         to_addr_conv(sh, percpu));
+       tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
+                          &sh->ops.zero_sum_result, &submit);
+
+       atomic_inc(&sh->count);
+       init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
+       tx = async_trigger_callback(&submit);
+}
+
+static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp)
+{
+       struct page **srcs = percpu->scribble;
+       struct async_submit_ctl submit;
+       int count;
+
+       pr_debug("%s: stripe %llu checkp: %d\n", __func__,
+               (unsigned long long)sh->sector, checkp);
+
+       count = set_syndrome_sources(srcs, sh);
+       if (!checkp)
+               srcs[count] = NULL;
 
        atomic_inc(&sh->count);
-       tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
-               ops_complete_check, sh);
+       init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check,
+                         sh, to_addr_conv(sh, percpu));
+       async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE,
+                          &sh->ops.zero_sum_result, percpu->spare_page, &submit);
 }
 
-static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
+static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
 {
        int overlap_clear = 0, i, disks = sh->disks;
        struct dma_async_tx_descriptor *tx = NULL;
+       raid5_conf_t *conf = sh->raid_conf;
+       int level = conf->level;
+       struct raid5_percpu *percpu;
+       unsigned long cpu;
 
+       cpu = get_cpu();
+       percpu = per_cpu_ptr(conf->percpu, cpu);
        if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
                ops_run_biofill(sh);
                overlap_clear++;
        }
 
        if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) {
-               tx = ops_run_compute5(sh);
-               /* terminate the chain if postxor is not set to be run */
-               if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request))
+               if (level < 6)
+                       tx = ops_run_compute5(sh, percpu);
+               else {
+                       if (sh->ops.target2 < 0 || sh->ops.target < 0)
+                               tx = ops_run_compute6_1(sh, percpu);
+                       else
+                               tx = ops_run_compute6_2(sh, percpu);
+               }
+               /* terminate the chain if reconstruct is not set to be run */
+               if (tx && !test_bit(STRIPE_OP_RECONSTRUCT, &ops_request))
                        async_tx_ack(tx);
        }
 
        if (test_bit(STRIPE_OP_PREXOR, &ops_request))
-               tx = ops_run_prexor(sh, tx);
+               tx = ops_run_prexor(sh, percpu, tx);
 
        if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
                tx = ops_run_biodrain(sh, tx);
                overlap_clear++;
        }
 
-       if (test_bit(STRIPE_OP_POSTXOR, &ops_request))
-               ops_run_postxor(sh, tx);
+       if (test_bit(STRIPE_OP_RECONSTRUCT, &ops_request)) {
+               if (level < 6)
+                       ops_run_reconstruct5(sh, percpu, tx);
+               else
+                       ops_run_reconstruct6(sh, percpu, tx);
+       }
 
-       if (test_bit(STRIPE_OP_CHECK, &ops_request))
-               ops_run_check(sh);
+       if (test_bit(STRIPE_OP_CHECK, &ops_request)) {
+               if (sh->check_state == check_state_run)
+                       ops_run_check_p(sh, percpu);
+               else if (sh->check_state == check_state_run_q)
+                       ops_run_check_pq(sh, percpu, 0);
+               else if (sh->check_state == check_state_run_pq)
+                       ops_run_check_pq(sh, percpu, 1);
+               else
+                       BUG();
+       }
 
        if (overlap_clear)
                for (i = disks; i--; ) {
@@ -899,6 +1201,7 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
                        if (test_and_clear_bit(R5_Overlap, &dev->flags))
                                wake_up(&sh->raid_conf->wait_for_overlap);
                }
+       put_cpu();
 }
 
 static int grow_one_stripe(raid5_conf_t *conf)
@@ -948,6 +1251,28 @@ static int grow_stripes(raid5_conf_t *conf, int num)
        return 0;
 }
 
+/**
+ * scribble_len - return the required size of the scribble region
+ * @num - total number of disks in the array
+ *
+ * The size must be enough to contain:
+ * 1/ a struct page pointer for each device in the array +2
+ * 2/ room to convert each entry in (1) to its corresponding dma
+ *    (dma_map_page()) or page (page_address()) address.
+ *
+ * Note: the +2 is for the destination buffers of the ddf/raid6 case where we
+ * calculate over all devices (not just the data blocks), using zeros in place
+ * of the P and Q blocks.
+ */
+static size_t scribble_len(int num)
+{
+       size_t len;
+
+       len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2);
+
+       return len;
+}
+
 static int resize_stripes(raid5_conf_t *conf, int newsize)
 {
        /* Make all the stripes able to hold 'newsize' devices.
@@ -976,6 +1301,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
        struct stripe_head *osh, *nsh;
        LIST_HEAD(newstripes);
        struct disk_info *ndisks;
+       unsigned long cpu;
        int err;
        struct kmem_cache *sc;
        int i;
@@ -1041,7 +1367,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
        /* Step 3.
         * At this point, we are holding all the stripes so the array
         * is completely stalled, so now is a good time to resize
-        * conf->disks.
+        * conf->disks and the scribble region
         */
        ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
        if (ndisks) {
@@ -1052,10 +1378,30 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
        } else
                err = -ENOMEM;
 
+       get_online_cpus();
+       conf->scribble_len = scribble_len(newsize);
+       for_each_present_cpu(cpu) {
+               struct raid5_percpu *percpu;
+               void *scribble;
+
+               percpu = per_cpu_ptr(conf->percpu, cpu);
+               scribble = kmalloc(conf->scribble_len, GFP_NOIO);
+
+               if (scribble) {
+                       kfree(percpu->scribble);
+                       percpu->scribble = scribble;
+               } else {
+                       err = -ENOMEM;
+                       break;
+               }
+       }
+       put_online_cpus();
+
        /* Step 4, return new stripes to service */
        while(!list_empty(&newstripes)) {
                nsh = list_entry(newstripes.next, struct stripe_head, lru);
                list_del_init(&nsh->lru);
+
                for (i=conf->raid_disks; i < newsize; i++)
                        if (nsh->dev[i].page == NULL) {
                                struct page *p = alloc_page(GFP_NOIO);
@@ -1594,258 +1940,13 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous)
 }
 
 
-
-/*
- * Copy data between a page in the stripe cache, and one or more bion
- * The page could align with the middle of the bio, or there could be
- * several bion, each with several bio_vecs, which cover part of the page
- * Multiple bion are linked together on bi_next.  There may be extras
- * at the end of this list.  We ignore them.
- */
-static void copy_data(int frombio, struct bio *bio,
-                    struct page *page,
-                    sector_t sector)
-{
-       char *pa = page_address(page);
-       struct bio_vec *bvl;
-       int i;
-       int page_offset;
-
-       if (bio->bi_sector >= sector)
-               page_offset = (signed)(bio->bi_sector - sector) * 512;
-       else
-               page_offset = (signed)(sector - bio->bi_sector) * -512;
-       bio_for_each_segment(bvl, bio, i) {
-               int len = bio_iovec_idx(bio,i)->bv_len;
-               int clen;
-               int b_offset = 0;
-
-               if (page_offset < 0) {
-                       b_offset = -page_offset;
-                       page_offset += b_offset;
-                       len -= b_offset;
-               }
-
-               if (len > 0 && page_offset + len > STRIPE_SIZE)
-                       clen = STRIPE_SIZE - page_offset;
-               else clen = len;
-
-               if (clen > 0) {
-                       char *ba = __bio_kmap_atomic(bio, i, KM_USER0);
-                       if (frombio)
-                               memcpy(pa+page_offset, ba+b_offset, clen);
-                       else
-                               memcpy(ba+b_offset, pa+page_offset, clen);
-                       __bio_kunmap_atomic(ba, KM_USER0);
-               }
-               if (clen < len) /* hit end of page */
-                       break;
-               page_offset +=  len;
-       }
-}
-
-#define check_xor()    do {                                              \
-                               if (count == MAX_XOR_BLOCKS) {            \
-                               xor_blocks(count, STRIPE_SIZE, dest, ptr);\
-                               count = 0;                                \
-                          }                                              \
-                       } while(0)
-
-static void compute_parity6(struct stripe_head *sh, int method)
-{
-       raid5_conf_t *conf = sh->raid_conf;
-       int i, pd_idx, qd_idx, d0_idx, disks = sh->disks, count;
-       int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
-       struct bio *chosen;
-       /**** FIX THIS: This could be very bad if disks is close to 256 ****/
-       void *ptrs[syndrome_disks+2];
-
-       pd_idx = sh->pd_idx;
-       qd_idx = sh->qd_idx;
-       d0_idx = raid6_d0(sh);
-
-       pr_debug("compute_parity, stripe %llu, method %d\n",
-               (unsigned long long)sh->sector, method);
-
-       switch(method) {
-       case READ_MODIFY_WRITE:
-               BUG();          /* READ_MODIFY_WRITE N/A for RAID-6 */
-       case RECONSTRUCT_WRITE:
-               for (i= disks; i-- ;)
-                       if ( i != pd_idx && i != qd_idx && sh->dev[i].towrite ) {
-                               chosen = sh->dev[i].towrite;
-                               sh->dev[i].towrite = NULL;
-
-                               if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
-                                       wake_up(&conf->wait_for_overlap);
-
-                               BUG_ON(sh->dev[i].written);
-                               sh->dev[i].written = chosen;
-                       }
-               break;
-       case CHECK_PARITY:
-               BUG();          /* Not implemented yet */
-       }
-
-       for (i = disks; i--;)
-               if (sh->dev[i].written) {
-                       sector_t sector = sh->dev[i].sector;
-                       struct bio *wbi = sh->dev[i].written;
-                       while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
-                               copy_data(1, wbi, sh->dev[i].page, sector);
-                               wbi = r5_next_bio(wbi, sector);
-                       }
-
-                       set_bit(R5_LOCKED, &sh->dev[i].flags);
-                       set_bit(R5_UPTODATE, &sh->dev[i].flags);
-               }
-
-       /* Note that unlike RAID-5, the ordering of the disks matters greatly.*/
-
-       for (i = 0; i < disks; i++)
-               ptrs[i] = (void *)raid6_empty_zero_page;
-
-       count = 0;
-       i = d0_idx;
-       do {
-               int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
-
-               ptrs[slot] = page_address(sh->dev[i].page);
-               if (slot < syndrome_disks &&
-                   !test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
-                       printk(KERN_ERR "block %d/%d not uptodate "
-                              "on parity calc\n", i, count);
-                       BUG();
-               }
-
-               i = raid6_next_disk(i, disks);
-       } while (i != d0_idx);
-       BUG_ON(count != syndrome_disks);
-
-       raid6_call.gen_syndrome(syndrome_disks+2, STRIPE_SIZE, ptrs);
-
-       switch(method) {
-       case RECONSTRUCT_WRITE:
-               set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
-               set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
-               set_bit(R5_LOCKED,   &sh->dev[pd_idx].flags);
-               set_bit(R5_LOCKED,   &sh->dev[qd_idx].flags);
-               break;
-       case UPDATE_PARITY:
-               set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
-               set_bit(R5_UPTODATE, &sh->dev[qd_idx].flags);
-               break;
-       }
-}
-
-
-/* Compute one missing block */
-static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
-{
-       int i, count, disks = sh->disks;
-       void *ptr[MAX_XOR_BLOCKS], *dest, *p;
-       int qd_idx = sh->qd_idx;
-
-       pr_debug("compute_block_1, stripe %llu, idx %d\n",
-               (unsigned long long)sh->sector, dd_idx);
-
-       if ( dd_idx == qd_idx ) {
-               /* We're actually computing the Q drive */
-               compute_parity6(sh, UPDATE_PARITY);
-       } else {
-               dest = page_address(sh->dev[dd_idx].page);
-               if (!nozero) memset(dest, 0, STRIPE_SIZE);
-               count = 0;
-               for (i = disks ; i--; ) {
-                       if (i == dd_idx || i == qd_idx)
-                               continue;
-                       p = page_address(sh->dev[i].page);
-                       if (test_bit(R5_UPTODATE, &sh->dev[i].flags))
-                               ptr[count++] = p;
-                       else
-                               printk("compute_block() %d, stripe %llu, %d"
-                                      " not present\n", dd_idx,
-                                      (unsigned long long)sh->sector, i);
-
-                       check_xor();
-               }
-               if (count)
-                       xor_blocks(count, STRIPE_SIZE, dest, ptr);
-               if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
-               else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
-       }
-}
-
-/* Compute two missing blocks */
-static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
-{
-       int i, count, disks = sh->disks;
-       int syndrome_disks = sh->ddf_layout ? disks : disks-2;
-       int d0_idx = raid6_d0(sh);
-       int faila = -1, failb = -1;
-       /**** FIX THIS: This could be very bad if disks is close to 256 ****/
-       void *ptrs[syndrome_disks+2];
-
-       for (i = 0; i < disks ; i++)
-               ptrs[i] = (void *)raid6_empty_zero_page;
-       count = 0;
-       i = d0_idx;
-       do {
-               int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
-
-               ptrs[slot] = page_address(sh->dev[i].page);
-
-               if (i == dd_idx1)
-                       faila = slot;
-               if (i == dd_idx2)
-                       failb = slot;
-               i = raid6_next_disk(i, disks);
-       } while (i != d0_idx);
-       BUG_ON(count != syndrome_disks);
-
-       BUG_ON(faila == failb);
-       if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
-
-       pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
-                (unsigned long long)sh->sector, dd_idx1, dd_idx2,
-                faila, failb);
-
-       if (failb == syndrome_disks+1) {
-               /* Q disk is one of the missing disks */
-               if (faila == syndrome_disks) {
-                       /* Missing P+Q, just recompute */
-                       compute_parity6(sh, UPDATE_PARITY);
-                       return;
-               } else {
-                       /* We're missing D+Q; recompute D from P */
-                       compute_block_1(sh, ((dd_idx1 == sh->qd_idx) ?
-                                            dd_idx2 : dd_idx1),
-                                       0);
-                       compute_parity6(sh, UPDATE_PARITY); /* Is this necessary? */
-                       return;
-               }
-       }
-
-       /* We're missing D+P or D+D; */
-       if (failb == syndrome_disks) {
-               /* We're missing D+P. */
-               raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE, faila, ptrs);
-       } else {
-               /* We're missing D+D. */
-               raid6_2data_recov(syndrome_disks+2, STRIPE_SIZE, faila, failb,
-                                 ptrs);
-       }
-
-       /* Both the above update both missing blocks */
-       set_bit(R5_UPTODATE, &sh->dev[dd_idx1].flags);
-       set_bit(R5_UPTODATE, &sh->dev[dd_idx2].flags);
-}
-
 static void
-schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
+schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
                         int rcw, int expand)
 {
        int i, pd_idx = sh->pd_idx, disks = sh->disks;
+       raid5_conf_t *conf = sh->raid_conf;
+       int level = conf->level;
 
        if (rcw) {
                /* if we are not expanding this is a proper write request, and
@@ -1858,7 +1959,7 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
                } else
                        sh->reconstruct_state = reconstruct_state_run;
 
-               set_bit(STRIPE_OP_POSTXOR, &s->ops_request);
+               set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
 
                for (i = disks; i--; ) {
                        struct r5dev *dev = &sh->dev[i];
@@ -1871,17 +1972,18 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
                                s->locked++;
                        }
                }
-               if (s->locked + 1 == disks)
+               if (s->locked + conf->max_degraded == disks)
                        if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
-                               atomic_inc(&sh->raid_conf->pending_full_writes);
+                               atomic_inc(&conf->pending_full_writes);
        } else {
+               BUG_ON(level == 6);
                BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
                        test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
 
                sh->reconstruct_state = reconstruct_state_prexor_drain_run;
                set_bit(STRIPE_OP_PREXOR, &s->ops_request);
                set_bit(STRIPE_OP_BIODRAIN, &s->ops_request);
-               set_bit(STRIPE_OP_POSTXOR, &s->ops_request);
+               set_bit(STRIPE_OP_RECONSTRUCT, &s->ops_request);
 
                for (i = disks; i--; ) {
                        struct r5dev *dev = &sh->dev[i];
@@ -1899,13 +2001,22 @@ schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s,
                }
        }
 
-       /* keep the parity disk locked while asynchronous operations
+       /* keep the parity disk(s) locked while asynchronous operations
         * are in flight
         */
        set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
        clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
        s->locked++;
 
+       if (level == 6) {
+               int qd_idx = sh->qd_idx;
+               struct r5dev *dev = &sh->dev[qd_idx];
+
+               set_bit(R5_LOCKED, &dev->flags);
+               clear_bit(R5_UPTODATE, &dev->flags);
+               s->locked++;
+       }
+
        pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n",
                __func__, (unsigned long long)sh->sector,
                s->locked, s->ops_request);
@@ -1986,13 +2097,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
 
 static void end_reshape(raid5_conf_t *conf);
 
-static int page_is_zero(struct page *p)
-{
-       char *a = page_address(p);
-       return ((*(u32*)a) == 0 &&
-               memcmp(a, a+4, STRIPE_SIZE-4)==0);
-}
-
 static void stripe_set_idx(sector_t stripe, raid5_conf_t *conf, int previous,
                            struct stripe_head *sh)
 {
@@ -2132,9 +2236,10 @@ static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s,
                        set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
                        set_bit(R5_Wantcompute, &dev->flags);
                        sh->ops.target = disk_idx;
+                       sh->ops.target2 = -1;
                        s->req_compute = 1;
                        /* Careful: from this point on 'uptodate' is in the eye
-                        * of raid5_run_ops which services 'compute' operations
+                        * of raid_run_ops which services 'compute' operations
                         * before writes. R5_Wantcompute flags a block that will
                         * be R5_UPTODATE by the time it is needed for a
                         * subsequent operation.
@@ -2173,61 +2278,104 @@ static void handle_stripe_fill5(struct stripe_head *sh,
        set_bit(STRIPE_HANDLE, &sh->state);
 }
 
-static void handle_stripe_fill6(struct stripe_head *sh,
-                       struct stripe_head_state *s, struct r6_state *r6s,
-                       int disks)
+/* fetch_block6 - checks the given member device to see if its data needs
+ * to be read or computed to satisfy a request.
+ *
+ * Returns 1 when no more member devices need to be checked, otherwise returns
+ * 0 to tell the loop in handle_stripe_fill6 to continue
+ */
+static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s,
+                        struct r6_state *r6s, int disk_idx, int disks)
 {
-       int i;
-       for (i = disks; i--; ) {
-               struct r5dev *dev = &sh->dev[i];
-               if (!test_bit(R5_LOCKED, &dev->flags) &&
-                   !test_bit(R5_UPTODATE, &dev->flags) &&
-                   (dev->toread || (dev->towrite &&
-                    !test_bit(R5_OVERWRITE, &dev->flags)) ||
-                    s->syncing || s->expanding ||
-                    (s->failed >= 1 &&
-                     (sh->dev[r6s->failed_num[0]].toread ||
-                      s->to_write)) ||
-                    (s->failed >= 2 &&
-                     (sh->dev[r6s->failed_num[1]].toread ||
-                      s->to_write)))) {
-                       /* we would like to get this block, possibly
-                        * by computing it, but we might not be able to
+       struct r5dev *dev = &sh->dev[disk_idx];
+       struct r5dev *fdev[2] = { &sh->dev[r6s->failed_num[0]],
+                                 &sh->dev[r6s->failed_num[1]] };
+
+       if (!test_bit(R5_LOCKED, &dev->flags) &&
+           !test_bit(R5_UPTODATE, &dev->flags) &&
+           (dev->toread ||
+            (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
+            s->syncing || s->expanding ||
+            (s->failed >= 1 &&
+             (fdev[0]->toread || s->to_write)) ||
+            (s->failed >= 2 &&
+             (fdev[1]->toread || s->to_write)))) {
+               /* we would like to get this block, possibly by computing it,
+                * otherwise read it if the backing disk is insync
+                */
+               BUG_ON(test_bit(R5_Wantcompute, &dev->flags));
+               BUG_ON(test_bit(R5_Wantread, &dev->flags));
+               if ((s->uptodate == disks - 1) &&
+                   (s->failed && (disk_idx == r6s->failed_num[0] ||
+                                  disk_idx == r6s->failed_num[1]))) {
+                       /* have disk failed, and we're requested to fetch it;
+                        * do compute it
                         */
-                       if ((s->uptodate == disks - 1) &&
-                           (s->failed && (i == r6s->failed_num[0] ||
-                                          i == r6s->failed_num[1]))) {
-                               pr_debug("Computing stripe %llu block %d\n",
-                                      (unsigned long long)sh->sector, i);
-                               compute_block_1(sh, i, 0);
-                               s->uptodate++;
-                       } else if ( s->uptodate == disks-2 && s->failed >= 2 ) {
-                               /* Computing 2-failure is *very* expensive; only
-                                * do it if failed >= 2
-                                */
-                               int other;
-                               for (other = disks; other--; ) {
-                                       if (other == i)
-                                               continue;
-                                       if (!test_bit(R5_UPTODATE,
-                                             &sh->dev[other].flags))
-                                               break;
-                               }
-                               BUG_ON(other < 0);
-                               pr_debug("Computing stripe %llu blocks %d,%d\n",
-                                      (unsigned long long)sh->sector,
-                                      i, other);
-                               compute_block_2(sh, i, other);
-                               s->uptodate += 2;
-                       } else if (test_bit(R5_Insync, &dev->flags)) {
-                               set_bit(R5_LOCKED, &dev->flags);
-                               set_bit(R5_Wantread, &dev->flags);
-                               s->locked++;
-                               pr_debug("Reading block %d (sync=%d)\n",
-                                       i, s->syncing);
+                       pr_debug("Computing stripe %llu block %d\n",
+                              (unsigned long long)sh->sector, disk_idx);
+                       set_bit(STRIPE_COMPUTE_RUN, &sh->state);
+                       set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
+                       set_bit(R5_Wantcompute, &dev->flags);
+                       sh->ops.target = disk_idx;
+                       sh->ops.target2 = -1; /* no 2nd target */
+                       s->req_compute = 1;
+                       s->uptodate++;
+                       return 1;
+               } else if (s->uptodate == disks-2 && s->failed >= 2) {
+                       /* Computing 2-failure is *very* expensive; only
+                        * do it if failed >= 2
+                        */
+                       int other;
+                       for (other = disks; other--; ) {
+                               if (other == disk_idx)
+                                       continue;
+                               if (!test_bit(R5_UPTODATE,
+                                     &sh->dev[other].flags))
+                                       break;
                        }
+                       BUG_ON(other < 0);
+                       pr_debug("Computing stripe %llu blocks %d,%d\n",
+                              (unsigned long long)sh->sector,
+                              disk_idx, other);
+                       set_bit(STRIPE_COMPUTE_RUN, &sh->state);
+                       set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
+                       set_bit(R5_Wantcompute, &sh->dev[disk_idx].flags);
+                       set_bit(R5_Wantcompute, &sh->dev[other].flags);
+                       sh->ops.target = disk_idx;
+                       sh->ops.target2 = other;
+                       s->uptodate += 2;
+                       s->req_compute = 1;
+                       return 1;
+               } else if (test_bit(R5_Insync, &dev->flags)) {
+                       set_bit(R5_LOCKED, &dev->flags);
+                       set_bit(R5_Wantread, &dev->flags);
+                       s->locked++;
+                       pr_debug("Reading block %d (sync=%d)\n",
+                               disk_idx, s->syncing);
                }
        }
+
+       return 0;
+}
+
+/**
+ * handle_stripe_fill6 - read or compute data to satisfy pending requests.
+ */
+static void handle_stripe_fill6(struct stripe_head *sh,
+                       struct stripe_head_state *s, struct r6_state *r6s,
+                       int disks)
+{
+       int i;
+
+       /* look for blocks to read/compute, skip this if a compute
+        * is already in flight, or if the stripe contents are in the
+        * midst of changing due to a write
+        */
+       if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
+           !sh->reconstruct_state)
+               for (i = disks; i--; )
+                       if (fetch_block6(sh, s, r6s, i, disks))
+                               break;
        set_bit(STRIPE_HANDLE, &sh->state);
 }
 
@@ -2361,114 +2509,61 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf,
         */
        /* since handle_stripe can be called at any time we need to handle the
         * case where a compute block operation has been submitted and then a
-        * subsequent call wants to start a write request.  raid5_run_ops only
-        * handles the case where compute block and postxor are requested
+        * subsequent call wants to start a write request.  raid_run_ops only
+        * handles the case where compute block and reconstruct are requested
         * simultaneously.  If this is not the case then new writes need to be
         * held off until the compute completes.
         */
        if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
            (s->locked == 0 && (rcw == 0 || rmw == 0) &&
            !test_bit(STRIPE_BIT_DELAY, &sh->state)))
-               schedule_reconstruction5(sh, s, rcw == 0, 0);
+               schedule_reconstruction(sh, s, rcw == 0, 0);
 }
 
 static void handle_stripe_dirtying6(raid5_conf_t *conf,
                struct stripe_head *sh, struct stripe_head_state *s,
-               struct r6_state *r6s, int disks)
-{
-       int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i;
-       int qd_idx = sh->qd_idx;
-       for (i = disks; i--; ) {
-               struct r5dev *dev = &sh->dev[i];
-               /* Would I have to read this buffer for reconstruct_write */
-               if (!test_bit(R5_OVERWRITE, &dev->flags)
-                   && i != pd_idx && i != qd_idx
-                   && (!test_bit(R5_LOCKED, &dev->flags)
-                           ) &&
-                   !test_bit(R5_UPTODATE, &dev->flags)) {
-                       if (test_bit(R5_Insync, &dev->flags)) rcw++;
-                       else {
-                               pr_debug("raid6: must_compute: "
-                                       "disk %d flags=%#lx\n", i, dev->flags);
-                               must_compute++;
-                       }
-               }
-       }
-       pr_debug("for sector %llu, rcw=%d, must_compute=%d\n",
-              (unsigned long long)sh->sector, rcw, must_compute);
-       set_bit(STRIPE_HANDLE, &sh->state);
-
-       if (rcw > 0)
-               /* want reconstruct write, but need to get some data */
-               for (i = disks; i--; ) {
-                       struct r5dev *dev = &sh->dev[i];
-                       if (!test_bit(R5_OVERWRITE, &dev->flags)
-                           && !(s->failed == 0 && (i == pd_idx || i == qd_idx))
-                           && !test_bit(R5_LOCKED, &dev->flags) &&
-                           !test_bit(R5_UPTODATE, &dev->flags) &&
-                           test_bit(R5_Insync, &dev->flags)) {
-                               if (
-                                 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
-                                       pr_debug("Read_old stripe %llu "
-                                               "block %d for Reconstruct\n",
-                                            (unsigned long long)sh->sector, i);
-                                       set_bit(R5_LOCKED, &dev->flags);
-                                       set_bit(R5_Wantread, &dev->flags);
-                                       s->locked++;
-                               } else {
-                                       pr_debug("Request delayed stripe %llu "
-                                               "block %d for Reconstruct\n",
-                                            (unsigned long long)sh->sector, i);
-                                       set_bit(STRIPE_DELAYED, &sh->state);
-                                       set_bit(STRIPE_HANDLE, &sh->state);
-                               }
+               struct r6_state *r6s, int disks)
+{
+       int rcw = 0, pd_idx = sh->pd_idx, i;
+       int qd_idx = sh->qd_idx;
+
+       set_bit(STRIPE_HANDLE, &sh->state);
+       for (i = disks; i--; ) {
+               struct r5dev *dev = &sh->dev[i];
+               /* check if we haven't enough data */
+               if (!test_bit(R5_OVERWRITE, &dev->flags) &&
+                   i != pd_idx && i != qd_idx &&
+                   !test_bit(R5_LOCKED, &dev->flags) &&
+                   !(test_bit(R5_UPTODATE, &dev->flags) ||
+                     test_bit(R5_Wantcompute, &dev->flags))) {
+                       rcw++;
+                       if (!test_bit(R5_Insync, &dev->flags))
+                               continue; /* it's a failed drive */
+
+                       if (
+                         test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+                               pr_debug("Read_old stripe %llu "
+                                       "block %d for Reconstruct\n",
+                                    (unsigned long long)sh->sector, i);
+                               set_bit(R5_LOCKED, &dev->flags);
+                               set_bit(R5_Wantread, &dev->flags);
+                               s->locked++;
+                       } else {
+                               pr_debug("Request delayed stripe %llu "
+                                       "block %d for Reconstruct\n",
+                                    (unsigned long long)sh->sector, i);
+                               set_bit(STRIPE_DELAYED, &sh->state);
+                               set_bit(STRIPE_HANDLE, &sh->state);
                        }
                }
+       }
        /* now if nothing is locked, and if we have enough data, we can start a
         * write request
         */
-       if (s->locked == 0 && rcw == 0 &&
+       if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) &&
+           s->locked == 0 && rcw == 0 &&
            !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
-               if (must_compute > 0) {
-                       /* We have failed blocks and need to compute them */
-                       switch (s->failed) {
-                       case 0:
-                               BUG();
-                       case 1:
-                               compute_block_1(sh, r6s->failed_num[0], 0);
-                               break;
-                       case 2:
-                               compute_block_2(sh, r6s->failed_num[0],
-                                               r6s->failed_num[1]);
-                               break;
-                       default: /* This request should have been failed? */
-                               BUG();
-                       }
-               }
-
-               pr_debug("Computing parity for stripe %llu\n",
-                       (unsigned long long)sh->sector);
-               compute_parity6(sh, RECONSTRUCT_WRITE);
-               /* now every locked buffer is ready to be written */
-               for (i = disks; i--; )
-                       if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
-                               pr_debug("Writing stripe %llu block %d\n",
-                                      (unsigned long long)sh->sector, i);
-                               s->locked++;
-                               set_bit(R5_Wantwrite, &sh->dev[i].flags);
-                       }
-               if (s->locked == disks)
-                       if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
-                               atomic_inc(&conf->pending_full_writes);
-               /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
-               set_bit(STRIPE_INSYNC, &sh->state);
-
-               if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
-                       atomic_dec(&conf->preread_active_stripes);
-                       if (atomic_read(&conf->preread_active_stripes) <
-                           IO_THRESHOLD)
-                               md_wakeup_thread(conf->mddev->thread);
-               }
+               schedule_reconstruction(sh, s, 1, 0);
        }
 }
 
@@ -2527,7 +2622,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
                 * we are done.  Otherwise update the mismatch count and repair
                 * parity if !MD_RECOVERY_CHECK
                 */
-               if (sh->ops.zero_sum_result == 0)
+               if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0)
                        /* parity is correct (on disc,
                         * not in buffer any more)
                         */
@@ -2544,6 +2639,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
                                set_bit(R5_Wantcompute,
                                        &sh->dev[sh->pd_idx].flags);
                                sh->ops.target = sh->pd_idx;
+                               sh->ops.target2 = -1;
                                s->uptodate++;
                        }
                }
@@ -2560,67 +2656,74 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
 
 
 static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
-                               struct stripe_head_state *s,
-                               struct r6_state *r6s, struct page *tmp_page,
-                               int disks)
+                                 struct stripe_head_state *s,
+                                 struct r6_state *r6s, int disks)
 {
-       int update_p = 0, update_q = 0;
-       struct r5dev *dev;
        int pd_idx = sh->pd_idx;
        int qd_idx = sh->qd_idx;
+       struct r5dev *dev;
 
        set_bit(STRIPE_HANDLE, &sh->state);
 
        BUG_ON(s->failed > 2);
-       BUG_ON(s->uptodate < disks);
+
        /* Want to check and possibly repair P and Q.
         * However there could be one 'failed' device, in which
         * case we can only check one of them, possibly using the
         * other to generate missing data
         */
 
-       /* If !tmp_page, we cannot do the calculations,
-        * but as we have set STRIPE_HANDLE, we will soon be called
-        * by stripe_handle with a tmp_page - just wait until then.
-        */
-       if (tmp_page) {
+       switch (sh->check_state) {
+       case check_state_idle:
+               /* start a new check operation if there are < 2 failures */
                if (s->failed == r6s->q_failed) {
-                       /* The only possible failed device holds 'Q', so it
+                       /* The only possible failed device holds Q, so it
                         * makes sense to check P (If anything else were failed,
                         * we would have used P to recreate it).
                         */
-                       compute_block_1(sh, pd_idx, 1);
-                       if (!page_is_zero(sh->dev[pd_idx].page)) {
-                               compute_block_1(sh, pd_idx, 0);
-                               update_p = 1;
-                       }
+                       sh->check_state = check_state_run;
                }
                if (!r6s->q_failed && s->failed < 2) {
-                       /* q is not failed, and we didn't use it to generate
+                       /* Q is not failed, and we didn't use it to generate
                         * anything, so it makes sense to check it
                         */
-                       memcpy(page_address(tmp_page),
-                              page_address(sh->dev[qd_idx].page),
-                              STRIPE_SIZE);
-                       compute_parity6(sh, UPDATE_PARITY);
-                       if (memcmp(page_address(tmp_page),
-                                  page_address(sh->dev[qd_idx].page),
-                                  STRIPE_SIZE) != 0) {
-                               clear_bit(STRIPE_INSYNC, &sh->state);
-                               update_q = 1;
-                       }
+                       if (sh->check_state == check_state_run)
+                               sh->check_state = check_state_run_pq;
+                       else
+                               sh->check_state = check_state_run_q;
                }
-               if (update_p || update_q) {
-                       conf->mddev->resync_mismatches += STRIPE_SECTORS;
-                       if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
-                               /* don't try to repair!! */
-                               update_p = update_q = 0;
+
+               /* discard potentially stale zero_sum_result */
+               sh->ops.zero_sum_result = 0;
+
+               if (sh->check_state == check_state_run) {
+                       /* async_xor_zero_sum destroys the contents of P */
+                       clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
+                       s->uptodate--;
+               }
+               if (sh->check_state >= check_state_run &&
+                   sh->check_state <= check_state_run_pq) {
+                       /* async_syndrome_zero_sum preserves P and Q, so
+                        * no need to mark them !uptodate here
+                        */
+                       set_bit(STRIPE_OP_CHECK, &s->ops_request);
+                       break;
                }
 
+               /* we have 2-disk failure */
+               BUG_ON(s->failed != 2);
+               /* fall through */
+       case check_state_compute_result:
+               sh->check_state = check_state_idle;
+
+               /* check that a write has not made the stripe insync */
+               if (test_bit(STRIPE_INSYNC, &sh->state))
+                       break;
+
                /* now write out any block on a failed drive,
-                * or P or Q if they need it
+                * or P or Q if they were recomputed
                 */
-
+               BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */
                if (s->failed == 2) {
                        dev = &sh->dev[r6s->failed_num[1]];
                        s->locked++;
@@ -2633,14 +2736,13 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
                        set_bit(R5_LOCKED, &dev->flags);
                        set_bit(R5_Wantwrite, &dev->flags);
                }
-
-               if (update_p) {
+               if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
                        dev = &sh->dev[pd_idx];
                        s->locked++;
                        set_bit(R5_LOCKED, &dev->flags);
                        set_bit(R5_Wantwrite, &dev->flags);
                }
-               if (update_q) {
+               if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
                        dev = &sh->dev[qd_idx];
                        s->locked++;
                        set_bit(R5_LOCKED, &dev->flags);
@@ -2649,6 +2751,70 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
                clear_bit(STRIPE_DEGRADED, &sh->state);
 
                set_bit(STRIPE_INSYNC, &sh->state);
+               break;
+       case check_state_run:
+       case check_state_run_q:
+       case check_state_run_pq:
+               break; /* we will be called again upon completion */
+       case check_state_check_result:
+               sh->check_state = check_state_idle;
+
+               /* handle a successful check operation, if parity is correct
+                * we are done.  Otherwise update the mismatch count and repair
+                * parity if !MD_RECOVERY_CHECK
+                */
+               if (sh->ops.zero_sum_result == 0) {
+                       /* both parities are correct */
+                       if (!s->failed)
+                               set_bit(STRIPE_INSYNC, &sh->state);
+                       else {
+                               /* in contrast to the raid5 case we can validate
+                                * parity, but still have a failure to write
+                                * back
+                                */
+                               sh->check_state = check_state_compute_result;
+                               /* Returning at this point means that we may go
+                                * off and bring p and/or q uptodate again so
+                                * we make sure to check zero_sum_result again
+                                * to verify if p or q need writeback
+                                */
+                       }
+               } else {
+                       conf->mddev->resync_mismatches += STRIPE_SECTORS;
+                       if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
+                               /* don't try to repair!! */
+                               set_bit(STRIPE_INSYNC, &sh->state);
+                       else {
+                               int *target = &sh->ops.target;
+
+                               sh->ops.target = -1;
+                               sh->ops.target2 = -1;
+                               sh->check_state = check_state_compute_run;
+                               set_bit(STRIPE_COMPUTE_RUN, &sh->state);
+                               set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
+                               if (sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) {
+                                       set_bit(R5_Wantcompute,
+                                               &sh->dev[pd_idx].flags);
+                                       *target = pd_idx;
+                                       target = &sh->ops.target2;
+                                       s->uptodate++;
+                               }
+                               if (sh->ops.zero_sum_result & SUM_CHECK_Q_RESULT) {
+                                       set_bit(R5_Wantcompute,
+                                               &sh->dev[qd_idx].flags);
+                                       *target = qd_idx;
+                                       s->uptodate++;
+                               }
+                       }
+               }
+               break;
+       case check_state_compute_run:
+               break;
+       default:
+               printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
+                      __func__, sh->check_state,
+                      (unsigned long long) sh->sector);
+               BUG();
        }
 }
 
@@ -2666,6 +2832,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
                if (i != sh->pd_idx && i != sh->qd_idx) {
                        int dd_idx, j;
                        struct stripe_head *sh2;
+                       struct async_submit_ctl submit;
 
                        sector_t bn = compute_blocknr(sh, i, 1);
                        sector_t s = raid5_compute_sector(conf, bn, 0,
@@ -2685,9 +2852,10 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
                        }
 
                        /* place all the copies on one channel */
+                       init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
                        tx = async_memcpy(sh2->dev[dd_idx].page,
-                               sh->dev[i].page, 0, 0, STRIPE_SIZE,
-                               ASYNC_TX_DEP_ACK, tx, NULL, NULL);
+                                         sh->dev[i].page, 0, 0, STRIPE_SIZE,
+                                         &submit);
 
                        set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
                        set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
@@ -2756,7 +2924,8 @@ static bool handle_stripe5(struct stripe_head *sh)
        rcu_read_lock();
        for (i=disks; i--; ) {
                mdk_rdev_t *rdev;
-               struct r5dev *dev = &sh->dev[i];
+
+               dev = &sh->dev[i];
                clear_bit(R5_Insync, &dev->flags);
 
                pr_debug("check %d: state 0x%lx toread %p read %p write %p "
@@ -2973,7 +3142,7 @@ static bool handle_stripe5(struct stripe_head *sh)
                /* Need to write out all blocks after computing parity */
                sh->disks = conf->raid_disks;
                stripe_set_idx(sh->sector, conf, 0, sh);
-               schedule_reconstruction5(sh, &s, 1, 1);
+               schedule_reconstruction(sh, &s, 1, 1);
        } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
                clear_bit(STRIPE_EXPAND_READY, &sh->state);
                atomic_dec(&conf->reshape_stripes);
@@ -2993,7 +3162,7 @@ static bool handle_stripe5(struct stripe_head *sh)
                md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
 
        if (s.ops_request)
-               raid5_run_ops(sh, s.ops_request);
+               raid_run_ops(sh, s.ops_request);
 
        ops_run_io(sh, &s);
 
@@ -3002,7 +3171,7 @@ static bool handle_stripe5(struct stripe_head *sh)
        return blocked_rdev == NULL;
 }
 
-static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
+static bool handle_stripe6(struct stripe_head *sh)
 {
        raid5_conf_t *conf = sh->raid_conf;
        int disks = sh->disks;
@@ -3014,9 +3183,10 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
        mdk_rdev_t *blocked_rdev = NULL;
 
        pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
-               "pd_idx=%d, qd_idx=%d\n",
+               "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n",
               (unsigned long long)sh->sector, sh->state,
-              atomic_read(&sh->count), pd_idx, qd_idx);
+              atomic_read(&sh->count), pd_idx, qd_idx,
+              sh->check_state, sh->reconstruct_state);
        memset(&s, 0, sizeof(s));
 
        spin_lock(&sh->lock);
@@ -3036,35 +3206,26 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 
                pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
                        i, dev->flags, dev->toread, dev->towrite, dev->written);
-               /* maybe we can reply to a read */
-               if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) {
-                       struct bio *rbi, *rbi2;
-                       pr_debug("Return read for disc %d\n", i);
-                       spin_lock_irq(&conf->device_lock);
-                       rbi = dev->toread;
-                       dev->toread = NULL;
-                       if (test_and_clear_bit(R5_Overlap, &dev->flags))
-                               wake_up(&conf->wait_for_overlap);
-                       spin_unlock_irq(&conf->device_lock);
-                       while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
-                               copy_data(0, rbi, dev->page, dev->sector);
-                               rbi2 = r5_next_bio(rbi, dev->sector);
-                               spin_lock_irq(&conf->device_lock);
-                               if (!raid5_dec_bi_phys_segments(rbi)) {
-                                       rbi->bi_next = return_bi;
-                                       return_bi = rbi;
-                               }
-                               spin_unlock_irq(&conf->device_lock);
-                               rbi = rbi2;
-                       }
-               }
+               /* maybe we can reply to a read
+                *
+                * new wantfill requests are only permitted while
+                * ops_complete_biofill is guaranteed to be inactive
+                */
+               if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
+                   !test_bit(STRIPE_BIOFILL_RUN, &sh->state))
+                       set_bit(R5_Wantfill, &dev->flags);
 
                /* now count some things */
                if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
                if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
+               if (test_bit(R5_Wantcompute, &dev->flags)) {
+                       s.compute++;
+                       BUG_ON(s.compute > 2);
+               }
 
-
-               if (dev->toread)
+               if (test_bit(R5_Wantfill, &dev->flags)) {
+                       s.to_fill++;
+               } else if (dev->toread)
                        s.to_read++;
                if (dev->towrite) {
                        s.to_write++;
@@ -3105,6 +3266,11 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
                blocked_rdev = NULL;
        }
 
+       if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) {
+               set_bit(STRIPE_OP_BIOFILL, &s.ops_request);
+               set_bit(STRIPE_BIOFILL_RUN, &sh->state);
+       }
+
        pr_debug("locked=%d uptodate=%d to_read=%d"
               " to_write=%d failed=%d failed_num=%d,%d\n",
               s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
@@ -3145,19 +3311,62 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
         * or to load a block that is being partially written.
         */
        if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
-           (s.syncing && (s.uptodate < disks)) || s.expanding)
+           (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding)
                handle_stripe_fill6(sh, &s, &r6s, disks);
 
-       /* now to consider writing and what else, if anything should be read */
-       if (s.to_write)
+       /* Now we check to see if any write operations have recently
+        * completed
+        */
+       if (sh->reconstruct_state == reconstruct_state_drain_result) {
+               int qd_idx = sh->qd_idx;
+
+               sh->reconstruct_state = reconstruct_state_idle;
+               /* All the 'written' buffers and the parity blocks are ready to
+                * be written back to disk
+                */
+               BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
+               BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags));
+               for (i = disks; i--; ) {
+                       dev = &sh->dev[i];
+                       if (test_bit(R5_LOCKED, &dev->flags) &&
+                           (i == sh->pd_idx || i == qd_idx ||
+                            dev->written)) {
+                               pr_debug("Writing block %d\n", i);
+                               BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
+                               set_bit(R5_Wantwrite, &dev->flags);
+                               if (!test_bit(R5_Insync, &dev->flags) ||
+                                   ((i == sh->pd_idx || i == qd_idx) &&
+                                     s.failed == 0))
+                                       set_bit(STRIPE_INSYNC, &sh->state);
+                       }
+               }
+               if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+                       atomic_dec(&conf->preread_active_stripes);
+                       if (atomic_read(&conf->preread_active_stripes) <
+                               IO_THRESHOLD)
+                               md_wakeup_thread(conf->mddev->thread);
+               }
+       }
+
+       /* Now to consider new write requests and what else, if anything
+        * should be read.  We do not handle new writes when:
+        * 1/ A 'write' operation (copy+gen_syndrome) is already in flight.
+        * 2/ A 'check' operation is in flight, as it may clobber the parity
+        *    block.
+        */
+       if (s.to_write && !sh->reconstruct_state && !sh->check_state)
                handle_stripe_dirtying6(conf, sh, &s, &r6s, disks);
 
        /* maybe we need to check and possibly fix the parity for this stripe
         * Any reads will already have been scheduled, so we just see if enough
-        * data is available
+        * data is available.  The parity check is held off while parity
+        * dependent operations are in flight.
         */
-       if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state))
-               handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks);
+       if (sh->check_state ||
+           (s.syncing && s.locked == 0 &&
+            !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
+            !test_bit(STRIPE_INSYNC, &sh->state)))
+               handle_parity_checks6(conf, sh, &s, &r6s, disks);
 
        if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
                md_done_sync(conf->mddev, STRIPE_SECTORS,1);
@@ -3178,15 +3387,29 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
                                        set_bit(R5_Wantwrite, &dev->flags);
                                        set_bit(R5_ReWrite, &dev->flags);
                                        set_bit(R5_LOCKED, &dev->flags);
+                                       s.locked++;
                                } else {
                                        /* let's read it back */
                                        set_bit(R5_Wantread, &dev->flags);
                                        set_bit(R5_LOCKED, &dev->flags);
+                                       s.locked++;
                                }
                        }
                }
 
-       if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
+       /* Finish reconstruct operations initiated by the expansion process */
+       if (sh->reconstruct_state == reconstruct_state_result) {
+               sh->reconstruct_state = reconstruct_state_idle;
+               clear_bit(STRIPE_EXPANDING, &sh->state);
+               for (i = conf->raid_disks; i--; ) {
+                       set_bit(R5_Wantwrite, &sh->dev[i].flags);
+                       set_bit(R5_LOCKED, &sh->dev[i].flags);
+                       s.locked++;
+               }
+       }
+
+       if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
+           !sh->reconstruct_state) {
                struct stripe_head *sh2
                        = get_active_stripe(conf, sh->sector, 1, 1, 1);
                if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
@@ -3207,14 +3430,8 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
                /* Need to write out all blocks after computing P&Q */
                sh->disks = conf->raid_disks;
                stripe_set_idx(sh->sector, conf, 0, sh);
-               compute_parity6(sh, RECONSTRUCT_WRITE);
-               for (i = conf->raid_disks ; i-- ;  ) {
-                       set_bit(R5_LOCKED, &sh->dev[i].flags);
-                       s.locked++;
-                       set_bit(R5_Wantwrite, &sh->dev[i].flags);
-               }
-               clear_bit(STRIPE_EXPANDING, &sh->state);
-       } else if (s.expanded) {
+               schedule_reconstruction(sh, &s, 1, 1);
+       } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) {
                clear_bit(STRIPE_EXPAND_READY, &sh->state);
                atomic_dec(&conf->reshape_stripes);
                wake_up(&conf->wait_for_overlap);
@@ -3232,6 +3449,9 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
        if (unlikely(blocked_rdev))
                md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
 
+       if (s.ops_request)
+               raid_run_ops(sh, s.ops_request);
+
        ops_run_io(sh, &s);
 
        return_io(return_bi);
@@ -3240,16 +3460,14 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 }
 
 /* returns true if the stripe was handled */
-static bool handle_stripe(struct stripe_head *sh, struct page *tmp_page)
+static bool handle_stripe(struct stripe_head *sh)
 {
        if (sh->raid_conf->level == 6)
-               return handle_stripe6(sh, tmp_page);
+               return handle_stripe6(sh);
        else
                return handle_stripe5(sh);
 }
 
-
-
 static void raid5_activate_delayed(raid5_conf_t *conf)
 {
        if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
@@ -3331,6 +3549,9 @@ static int raid5_congested(void *data, int bits)
        /* No difference between reads and writes.  Just check
         * how busy the stripe_cache is
         */
+
+       if (mddev_congested(mddev, bits))
+               return 1;
        if (conf->inactive_blocked)
                return 1;
        if (conf->quiesce)
@@ -3880,7 +4101,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
        INIT_LIST_HEAD(&stripes);
        for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
                int j;
-               int skipped = 0;
+               int skipped_disk = 0;
                sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
                set_bit(STRIPE_EXPANDING, &sh->state);
                atomic_inc(&conf->reshape_stripes);
@@ -3896,14 +4117,14 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
                                continue;
                        s = compute_blocknr(sh, j, 0);
                        if (s < raid5_size(mddev, 0, 0)) {
-                               skipped = 1;
+                               skipped_disk = 1;
                                continue;
                        }
                        memset(page_address(sh->dev[j].page), 0, STRIPE_SIZE);
                        set_bit(R5_Expanded, &sh->dev[j].flags);
                        set_bit(R5_UPTODATE, &sh->dev[j].flags);
                }
-               if (!skipped) {
+               if (!skipped_disk) {
                        set_bit(STRIPE_EXPAND_READY, &sh->state);
                        set_bit(STRIPE_HANDLE, &sh->state);
                }
@@ -4057,7 +4278,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
        spin_unlock(&sh->lock);
 
        /* wait for any blocked device to be handled */
-       while(unlikely(!handle_stripe(sh, NULL)))
+       while (unlikely(!handle_stripe(sh)))
                ;
        release_stripe(sh);
 
@@ -4114,7 +4335,7 @@ static int  retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
                        return handled;
                }
 
-               handle_stripe(sh, NULL);
+               handle_stripe(sh);
                release_stripe(sh);
                handled++;
        }
@@ -4128,6 +4349,36 @@ static int  retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
        return handled;
 }
 
+#ifdef CONFIG_MULTICORE_RAID456
+static void __process_stripe(void *param, async_cookie_t cookie)
+{
+       struct stripe_head *sh = param;
+
+       handle_stripe(sh);
+       release_stripe(sh);
+}
+
+static void process_stripe(struct stripe_head *sh, struct list_head *domain)
+{
+       async_schedule_domain(__process_stripe, sh, domain);
+}
+
+static void synchronize_stripe_processing(struct list_head *domain)
+{
+       async_synchronize_full_domain(domain);
+}
+#else
+static void process_stripe(struct stripe_head *sh, struct list_head *domain)
+{
+       handle_stripe(sh);
+       release_stripe(sh);
+       cond_resched();
+}
+
+static void synchronize_stripe_processing(struct list_head *domain)
+{
+}
+#endif
 
 
 /*
@@ -4142,6 +4393,7 @@ static void raid5d(mddev_t *mddev)
        struct stripe_head *sh;
        raid5_conf_t *conf = mddev->private;
        int handled;
+       LIST_HEAD(raid_domain);
 
        pr_debug("+++ raid5d active\n");
 
@@ -4178,8 +4430,7 @@ static void raid5d(mddev_t *mddev)
                spin_unlock_irq(&conf->device_lock);
                
                handled++;
-               handle_stripe(sh, conf->spare_page);
-               release_stripe(sh);
+               process_stripe(sh, &raid_domain);
 
                spin_lock_irq(&conf->device_lock);
        }
@@ -4187,6 +4438,7 @@ static void raid5d(mddev_t *mddev)
 
        spin_unlock_irq(&conf->device_lock);
 
+       synchronize_stripe_processing(&raid_domain);
        async_tx_issue_pending_all();
        unplug_slaves(mddev);
 
@@ -4319,15 +4571,118 @@ raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks)
        return sectors * (raid_disks - conf->max_degraded);
 }
 
+static void raid5_free_percpu(raid5_conf_t *conf)
+{
+       struct raid5_percpu *percpu;
+       unsigned long cpu;
+
+       if (!conf->percpu)
+               return;
+
+       get_online_cpus();
+       for_each_possible_cpu(cpu) {
+               percpu = per_cpu_ptr(conf->percpu, cpu);
+               safe_put_page(percpu->spare_page);
+               kfree(percpu->scribble);
+       }
+#ifdef CONFIG_HOTPLUG_CPU
+       unregister_cpu_notifier(&conf->cpu_notify);
+#endif
+       put_online_cpus();
+
+       free_percpu(conf->percpu);
+}
+
 static void free_conf(raid5_conf_t *conf)
 {
        shrink_stripes(conf);
-       safe_put_page(conf->spare_page);
+       raid5_free_percpu(conf);
        kfree(conf->disks);
        kfree(conf->stripe_hashtbl);
        kfree(conf);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
+                             void *hcpu)
+{
+       raid5_conf_t *conf = container_of(nfb, raid5_conf_t, cpu_notify);
+       long cpu = (long)hcpu;
+       struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
+
+       switch (action) {
+       case CPU_UP_PREPARE:
+       case CPU_UP_PREPARE_FROZEN:
+               if (conf->level == 6 && !percpu->spare_page)
+                       percpu->spare_page = alloc_page(GFP_KERNEL);
+               if (!percpu->scribble)
+                       percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
+
+               if (!percpu->scribble ||
+                   (conf->level == 6 && !percpu->spare_page)) {
+                       safe_put_page(percpu->spare_page);
+                       kfree(percpu->scribble);
+                       pr_err("%s: failed memory allocation for cpu%ld\n",
+                              __func__, cpu);
+                       return NOTIFY_BAD;
+               }
+               break;
+       case CPU_DEAD:
+       case CPU_DEAD_FROZEN:
+               safe_put_page(percpu->spare_page);
+               kfree(percpu->scribble);
+               percpu->spare_page = NULL;
+               percpu->scribble = NULL;
+               break;
+       default:
+               break;
+       }
+       return NOTIFY_OK;
+}
+#endif
+
+static int raid5_alloc_percpu(raid5_conf_t *conf)
+{
+       unsigned long cpu;
+       struct page *spare_page;
+       struct raid5_percpu *allcpus;
+       void *scribble;
+       int err;
+
+       allcpus = alloc_percpu(struct raid5_percpu);
+       if (!allcpus)
+               return -ENOMEM;
+       conf->percpu = allcpus;
+
+       get_online_cpus();
+       err = 0;
+       for_each_present_cpu(cpu) {
+               if (conf->level == 6) {
+                       spare_page = alloc_page(GFP_KERNEL);
+                       if (!spare_page) {
+                               err = -ENOMEM;
+                               break;
+                       }
+                       per_cpu_ptr(conf->percpu, cpu)->spare_page = spare_page;
+               }
+               scribble = kmalloc(scribble_len(conf->raid_disks), GFP_KERNEL);
+               if (!scribble) {
+                       err = -ENOMEM;
+                       break;
+               }
+               per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
+       }
+#ifdef CONFIG_HOTPLUG_CPU
+       conf->cpu_notify.notifier_call = raid456_cpu_notify;
+       conf->cpu_notify.priority = 0;
+       if (err == 0)
+               err = register_cpu_notifier(&conf->cpu_notify);
+#endif
+       put_online_cpus();
+
+       return err;
+}
+
 static raid5_conf_t *setup_conf(mddev_t *mddev)
 {
        raid5_conf_t *conf;
@@ -4369,6 +4724,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
                goto abort;
 
        conf->raid_disks = mddev->raid_disks;
+       conf->scribble_len = scribble_len(conf->raid_disks);
        if (mddev->reshape_position == MaxSector)
                conf->previous_raid_disks = mddev->raid_disks;
        else
@@ -4384,11 +4740,10 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
        if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
                goto abort;
 
-       if (mddev->new_level == 6) {
-               conf->spare_page = alloc_page(GFP_KERNEL);
-               if (!conf->spare_page)
-                       goto abort;
-       }
+       conf->level = mddev->new_level;
+       if (raid5_alloc_percpu(conf) != 0)
+               goto abort;
+
        spin_lock_init(&conf->device_lock);
        init_waitqueue_head(&conf->wait_for_stripe);
        init_waitqueue_head(&conf->wait_for_overlap);
@@ -4447,7 +4802,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
                printk(KERN_INFO "raid5: allocated %dkB for %s\n",
                        memory, mdname(mddev));
 
-       conf->thread = md_register_thread(raid5d, mddev, "%s_raid5");
+       conf->thread = md_register_thread(raid5d, mddev, NULL);
        if (!conf->thread) {
                printk(KERN_ERR
                       "raid5: couldn't allocate thread for %s\n",
@@ -4613,7 +4968,7 @@ static int run(mddev_t *mddev)
                set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
                set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
                mddev->sync_thread = md_register_thread(md_do_sync, mddev,
-                                                       "%s_reshape");
+                                                       "reshape");
        }
 
        /* read-ahead size must cover two whole stripes, which is
@@ -5031,7 +5386,7 @@ static int raid5_start_reshape(mddev_t *mddev)
        set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
        set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
        mddev->sync_thread = md_register_thread(md_do_sync, mddev,
-                                               "%s_reshape");
+                                               "reshape");
        if (!mddev->sync_thread) {
                mddev->recovery = 0;
                spin_lock_irq(&conf->device_lock);
index 9459689..2390e0e 100644 (file)
@@ -2,6 +2,7 @@
 #define _RAID5_H
 
 #include <linux/raid/xor.h>
+#include <linux/dmaengine.h>
 
 /*
  *
  */
 enum check_states {
        check_state_idle = 0,
-       check_state_run, /* parity check */
+       check_state_run, /* xor parity check */
+       check_state_run_q, /* q-parity check */
+       check_state_run_pq, /* pq dual parity check */
        check_state_check_result,
        check_state_compute_run, /* parity repair */
        check_state_compute_result,
@@ -215,8 +218,8 @@ struct stripe_head {
         * @target - STRIPE_OP_COMPUTE_BLK target
         */
        struct stripe_operations {
-               int                target;
-               u32                zero_sum_result;
+               int                  target, target2;
+               enum sum_check_flags zero_sum_result;
        } ops;
        struct r5dev {
                struct bio      req;
@@ -298,7 +301,7 @@ struct r6_state {
 #define STRIPE_OP_COMPUTE_BLK  1
 #define STRIPE_OP_PREXOR       2
 #define STRIPE_OP_BIODRAIN     3
-#define STRIPE_OP_POSTXOR      4
+#define STRIPE_OP_RECONSTRUCT  4
 #define STRIPE_OP_CHECK        5
 
 /*
@@ -385,8 +388,21 @@ struct raid5_private_data {
                                            * (fresh device added).
                                            * Cleared when a sync completes.
                                            */
-
-       struct page             *spare_page; /* Used when checking P/Q in raid6 */
+       /* per cpu variables */
+       struct raid5_percpu {
+               struct page     *spare_page; /* Used when checking P/Q in raid6 */
+               void            *scribble;   /* space for constructing buffer
+                                             * lists and performing address
+                                             * conversions
+                                             */
+       } *percpu;
+       size_t                  scribble_len; /* size of scribble region must be
+                                              * associated with conf to handle
+                                              * cpu hotplug while reshaping
+                                              */
+#ifdef CONFIG_HOTPLUG_CPU
+       struct notifier_block   cpu_notify;
+#endif
 
        /*
         * Free stripes pool
index 895e2ef..01fc704 100644 (file)
 #define DVB_MAJOR 212
 
 #if defined(CONFIG_DVB_MAX_ADAPTERS) && CONFIG_DVB_MAX_ADAPTERS > 0
-#define DVB_MAX_ADAPTERS CONFIG_DVB_MAX_ADAPTERS
+  #define DVB_MAX_ADAPTERS CONFIG_DVB_MAX_ADAPTERS
 #else
-#warning invalid CONFIG_DVB_MAX_ADAPTERS value
-#define DVB_MAX_ADAPTERS 8
+  #define DVB_MAX_ADAPTERS 8
 #endif
 
 #define DVB_UNSET (-1)
index 0e4b97f..9744b06 100644 (file)
@@ -75,7 +75,7 @@ config DVB_USB_DIB0700
        select DVB_DIB3000MC if !DVB_FE_CUSTOMISE
        select DVB_S5H1411 if !DVB_FE_CUSTOMISE
        select DVB_LGDT3305 if !DVB_FE_CUSTOMISE
-       select DVB_TUNER_DIB0070 if !DVB_FE_CUSTOMISE
+       select DVB_TUNER_DIB0070
        select MEDIA_TUNER_MT2060 if !MEDIA_TUNER_CUSTOMISE
        select MEDIA_TUNER_MT2266 if !MEDIA_TUNER_CUSTOMISE
        select MEDIA_TUNER_XC2028 if !MEDIA_TUNER_CUSTOMISE
index bb6df1b..6f094a9 100644 (file)
@@ -415,7 +415,7 @@ int saa7164_api_enum_subdevs(struct saa7164_dev *dev)
                goto out;
        }
 
-       if (debug & DBGLVL_API)
+       if (saa_debug & DBGLVL_API)
                saa7164_dumphex16(dev, buf, (buflen/16)*16);
 
        saa7164_api_dump_subdevs(dev, buf, buflen);
@@ -480,7 +480,7 @@ int saa7164_api_i2c_read(struct saa7164_i2c *bus, u8 addr, u32 reglen, u8 *reg,
 
        dprintk(DBGLVL_API, "%s() len = %d bytes\n", __func__, len);
 
-       if (debug & DBGLVL_I2C)
+       if (saa_debug & DBGLVL_I2C)
                saa7164_dumphex16(dev, buf, 2 * 16);
 
        ret = saa7164_cmd_send(bus->dev, unitid, GET_CUR,
@@ -488,7 +488,7 @@ int saa7164_api_i2c_read(struct saa7164_i2c *bus, u8 addr, u32 reglen, u8 *reg,
        if (ret != SAA_OK)
                printk(KERN_ERR "%s() error, ret(2) = 0x%x\n", __func__, ret);
        else {
-               if (debug & DBGLVL_I2C)
+               if (saa_debug & DBGLVL_I2C)
                        saa7164_dumphex16(dev, buf, sizeof(buf));
                memcpy(data, (buf + 2 * sizeof(u32) + reglen), datalen);
        }
@@ -548,7 +548,7 @@ int saa7164_api_i2c_write(struct saa7164_i2c *bus, u8 addr, u32 datalen,
        *((u32 *)(buf + 1 * sizeof(u32))) = datalen - reglen;
        memcpy((buf + 2 * sizeof(u32)), data, datalen);
 
-       if (debug & DBGLVL_I2C)
+       if (saa_debug & DBGLVL_I2C)
                saa7164_dumphex16(dev, buf, sizeof(buf));
 
        ret = saa7164_cmd_send(bus->dev, unitid, SET_CUR,
index e097f1a..c45966e 100644 (file)
@@ -250,7 +250,7 @@ int saa7164_cmd_wait(struct saa7164_dev *dev, u8 seqno)
        unsigned long stamp;
        int r;
 
-       if (debug >= 4)
+       if (saa_debug >= 4)
                saa7164_bus_dump(dev);
 
        dprintk(DBGLVL_CMD, "%s(seqno=%d)\n", __func__, seqno);
index f0dbead..709affc 100644 (file)
@@ -45,8 +45,8 @@ MODULE_LICENSE("GPL");
  32 bus
  */
 
-unsigned int debug;
-module_param(debug, int, 0644);
+unsigned int saa_debug;
+module_param_named(debug, saa_debug, int, 0644);
 MODULE_PARM_DESC(debug, "enable debug messages");
 
 unsigned int waitsecs = 10;
@@ -653,7 +653,7 @@ static int __devinit saa7164_initdev(struct pci_dev *pci_dev,
                printk(KERN_ERR "%s() Unsupported board detected, "
                        "registering without firmware\n", __func__);
 
-       dprintk(1, "%s() parameter debug = %d\n", __func__, debug);
+       dprintk(1, "%s() parameter debug = %d\n", __func__, saa_debug);
        dprintk(1, "%s() parameter waitsecs = %d\n", __func__, waitsecs);
 
 fail_fw:
index 6753008..42660b5 100644 (file)
@@ -375,9 +375,9 @@ extern int saa7164_buffer_dealloc(struct saa7164_tsport *port,
 
 /* ----------------------------------------------------------- */
 
-extern unsigned int debug;
+extern unsigned int saa_debug;
 #define dprintk(level, fmt, arg...)\
-       do { if (debug & level)\
+       do { if (saa_debug & level)\
                printk(KERN_DEBUG "%s: " fmt, dev->name, ## arg);\
        } while (0)
 
index a5b448e..b3bf1c4 100644 (file)
@@ -339,9 +339,9 @@ static int h_memstick_read_dev_id(struct memstick_dev *card,
                        card->id.type = id_reg.type;
                        card->id.category = id_reg.category;
                        card->id.class = id_reg.class;
+                       dev_dbg(&card->dev, "if_mode = %02x\n", id_reg.if_mode);
                }
                complete(&card->mrq_complete);
-               dev_dbg(&card->dev, "if_mode = %02x\n", id_reg.if_mode);
                return -EAGAIN;
        }
 }
index 79689b1..766e21e 100644 (file)
@@ -937,6 +937,8 @@ static int quicktest1(unsigned long arg)
 
        /* Need  1K cacheline aligned that does not cross page boundary */
        p = kmalloc(4096, 0);
+       if (p == NULL)
+               return -ENOMEM;
        mq = ALIGNUP(p, 1024);
        memset(mes, 0xee, sizeof(mes));
        dw = mq;
index 9cbf95b..ccd4408 100644 (file)
@@ -340,10 +340,9 @@ static struct proc_dir_entry *proc_gru __read_mostly;
 
 static int create_proc_file(struct proc_entry *p)
 {
-       p->entry = create_proc_entry(p->name, p->mode, proc_gru);
+       p->entry = proc_create(p->name, p->mode, proc_gru, p->fops);
        if (!p->entry)
                return -1;
-       p->entry->proc_fops = p->fops;
        return 0;
 }
 
index 065fa81..fc25586 100644 (file)
@@ -599,6 +599,7 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
        struct scatterlist              *sg;
        unsigned int                    i;
        enum dma_data_direction         direction;
+       unsigned int                    sglen;
 
        /*
         * We don't do DMA on "complex" transfers, i.e. with
@@ -628,11 +629,14 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
        else
                direction = DMA_TO_DEVICE;
 
+       sglen = dma_map_sg(&host->pdev->dev, data->sg, data->sg_len, direction);
+       if (sglen != data->sg_len)
+               goto unmap_exit;
        desc = chan->device->device_prep_slave_sg(chan,
                        data->sg, data->sg_len, direction,
                        DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
        if (!desc)
-               return -ENOMEM;
+               goto unmap_exit;
 
        host->dma.data_desc = desc;
        desc->callback = atmci_dma_complete;
@@ -643,6 +647,9 @@ atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
        chan->device->device_issue_pending(chan);
 
        return 0;
+unmap_exit:
+       dma_unmap_sg(&host->pdev->dev, data->sg, sglen, direction);
+       return -ENOMEM;
 }
 
 #else /* CONFIG_MMC_ATMELMCI_DMA */
index 8741d0f..3d1e532 100644 (file)
 #include <linux/clk.h>
 #include <linux/scatterlist.h>
 #include <linux/gpio.h>
+#include <linux/amba/mmci.h>
+#include <linux/regulator/consumer.h>
 
 #include <asm/cacheflush.h>
 #include <asm/div64.h>
 #include <asm/io.h>
 #include <asm/sizes.h>
-#include <asm/mach/mmc.h>
 
 #include "mmci.h"
 
 
 static unsigned int fmax = 515633;
 
+/*
+ * This must be called with host->lock held
+ */
+static void mmci_set_clkreg(struct mmci_host *host, unsigned int desired)
+{
+       u32 clk = 0;
+
+       if (desired) {
+               if (desired >= host->mclk) {
+                       clk = MCI_CLK_BYPASS;
+                       host->cclk = host->mclk;
+               } else {
+                       clk = host->mclk / (2 * desired) - 1;
+                       if (clk >= 256)
+                               clk = 255;
+                       host->cclk = host->mclk / (2 * (clk + 1));
+               }
+               if (host->hw_designer == 0x80)
+                       clk |= MCI_FCEN; /* Bug fix in ST IP block */
+               clk |= MCI_CLK_ENABLE;
+               /* This hasn't proven to be worthwhile */
+               /* clk |= MCI_CLK_PWRSAVE; */
+       }
+
+       if (host->mmc->ios.bus_width == MMC_BUS_WIDTH_4)
+               clk |= MCI_WIDE_BUS;
+
+       writel(clk, host->base + MMCICLOCK);
+}
+
 static void
 mmci_request_end(struct mmci_host *host, struct mmc_request *mrq)
 {
@@ -419,30 +450,31 @@ static void mmci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 {
        struct mmci_host *host = mmc_priv(mmc);
-       u32 clk = 0, pwr = 0;
-
-       if (ios->clock) {
-               if (ios->clock >= host->mclk) {
-                       clk = MCI_CLK_BYPASS;
-                       host->cclk = host->mclk;
-               } else {
-                       clk = host->mclk / (2 * ios->clock) - 1;
-                       if (clk >= 256)
-                               clk = 255;
-                       host->cclk = host->mclk / (2 * (clk + 1));
-               }
-               if (host->hw_designer == AMBA_VENDOR_ST)
-                       clk |= MCI_FCEN; /* Bug fix in ST IP block */
-               clk |= MCI_CLK_ENABLE;
-       }
-
-       if (host->plat->translate_vdd)
-               pwr |= host->plat->translate_vdd(mmc_dev(mmc), ios->vdd);
+       u32 pwr = 0;
+       unsigned long flags;
 
        switch (ios->power_mode) {
        case MMC_POWER_OFF:
+               if(host->vcc &&
+                  regulator_is_enabled(host->vcc))
+                       regulator_disable(host->vcc);
                break;
        case MMC_POWER_UP:
+#ifdef CONFIG_REGULATOR
+               if (host->vcc)
+                       /* This implicitly enables the regulator */
+                       mmc_regulator_set_ocr(host->vcc, ios->vdd);
+#endif
+               /*
+                * The translate_vdd function is not used if you have
+                * an external regulator, or your design is really weird.
+                * Using it would mean sending in power control BOTH using
+                * a regulator AND the 4 MMCIPWR bits. If we don't have
+                * a regulator, we might have some other platform specific
+                * power control behind this translate function.
+                */
+               if (!host->vcc && host->plat->translate_vdd)
+                       pwr |= host->plat->translate_vdd(mmc_dev(mmc), ios->vdd);
                /* The ST version does not have this, fall through to POWER_ON */
                if (host->hw_designer != AMBA_VENDOR_ST) {
                        pwr |= MCI_PWR_UP;
@@ -465,12 +497,16 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
                }
        }
 
-       writel(clk, host->base + MMCICLOCK);
+       spin_lock_irqsave(&host->lock, flags);
+
+       mmci_set_clkreg(host, ios->clock);
 
        if (host->pwr != pwr) {
                host->pwr = pwr;
                writel(pwr, host->base + MMCIPOWER);
        }
+
+       spin_unlock_irqrestore(&host->lock, flags);
 }
 
 static int mmci_get_ro(struct mmc_host *mmc)
@@ -517,7 +553,7 @@ static void mmci_check_status(unsigned long data)
 
 static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 {
-       struct mmc_platform_data *plat = dev->dev.platform_data;
+       struct mmci_platform_data *plat = dev->dev.platform_data;
        struct mmci_host *host;
        struct mmc_host *mmc;
        int ret;
@@ -583,7 +619,30 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
        mmc->ops = &mmci_ops;
        mmc->f_min = (host->mclk + 511) / 512;
        mmc->f_max = min(host->mclk, fmax);
-       mmc->ocr_avail = plat->ocr_mask;
+#ifdef CONFIG_REGULATOR
+       /* If we're using the regulator framework, try to fetch a regulator */
+       host->vcc = regulator_get(&dev->dev, "vmmc");
+       if (IS_ERR(host->vcc))
+               host->vcc = NULL;
+       else {
+               int mask = mmc_regulator_get_ocrmask(host->vcc);
+
+               if (mask < 0)
+                       dev_err(&dev->dev, "error getting OCR mask (%d)\n",
+                               mask);
+               else {
+                       host->mmc->ocr_avail = (u32) mask;
+                       if (plat->ocr_mask)
+                               dev_warn(&dev->dev,
+                                "Provided ocr_mask/setpower will not be used "
+                                "(using regulator instead)\n");
+               }
+       }
+#endif
+       /* Fall back to platform data if no regulator is found */
+       if (host->vcc == NULL)
+               mmc->ocr_avail = plat->ocr_mask;
+       mmc->caps = plat->capabilities;
 
        /*
         * We can do SGIO
@@ -720,6 +779,10 @@ static int __devexit mmci_remove(struct amba_device *dev)
                clk_disable(host->clk);
                clk_put(host->clk);
 
+               if (regulator_is_enabled(host->vcc))
+                       regulator_disable(host->vcc);
+               regulator_put(host->vcc);
+
                mmc_free_host(mmc);
 
                amba_release_regions(dev);
index 839f264..1ceb9a9 100644 (file)
@@ -161,7 +161,7 @@ struct mmci_host {
        unsigned int            mclk;
        unsigned int            cclk;
        u32                     pwr;
-       struct mmc_platform_data *plat;
+       struct mmci_platform_data *plat;
 
        u8                      hw_designer;
        u8                      hw_revision:4;
@@ -175,6 +175,7 @@ struct mmci_host {
        struct scatterlist      *sg_ptr;
        unsigned int            sg_off;
        unsigned int            size;
+       struct regulator        *vcc;
 };
 
 static inline void mmci_init_sg(struct mmci_host *host, struct mmc_data *data)
index e55ac79..5e0b152 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/mmc/host.h>
 #include <linux/io.h>
 #include <linux/regulator/consumer.h>
+#include <linux/gpio.h>
 
 #include <asm/sizes.h>
 
@@ -96,10 +97,18 @@ static inline void pxamci_init_ocr(struct pxamci_host *host)
 
 static inline void pxamci_set_power(struct pxamci_host *host, unsigned int vdd)
 {
+       int on;
+
 #ifdef CONFIG_REGULATOR
        if (host->vcc)
                mmc_regulator_set_ocr(host->vcc, vdd);
 #endif
+       if (!host->vcc && host->pdata &&
+           gpio_is_valid(host->pdata->gpio_power)) {
+               on = ((1 << vdd) & host->pdata->ocr_mask);
+               gpio_set_value(host->pdata->gpio_power,
+                              !!on ^ host->pdata->gpio_power_invert);
+       }
        if (!host->vcc && host->pdata && host->pdata->setpower)
                host->pdata->setpower(mmc_dev(host->mmc), vdd);
 }
@@ -421,6 +430,12 @@ static int pxamci_get_ro(struct mmc_host *mmc)
 {
        struct pxamci_host *host = mmc_priv(mmc);
 
+       if (host->pdata && gpio_is_valid(host->pdata->gpio_card_ro)) {
+               if (host->pdata->gpio_card_ro_invert)
+                       return !gpio_get_value(host->pdata->gpio_card_ro);
+               else
+                       return gpio_get_value(host->pdata->gpio_card_ro);
+       }
        if (host->pdata && host->pdata->get_ro)
                return !!host->pdata->get_ro(mmc_dev(mmc));
        /*
@@ -534,7 +549,7 @@ static int pxamci_probe(struct platform_device *pdev)
        struct mmc_host *mmc;
        struct pxamci_host *host = NULL;
        struct resource *r, *dmarx, *dmatx;
-       int ret, irq;
+       int ret, irq, gpio_cd = -1, gpio_ro = -1, gpio_power = -1;
 
        r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        irq = platform_get_irq(pdev, 0);
@@ -661,13 +676,63 @@ static int pxamci_probe(struct platform_device *pdev)
        }
        host->dma_drcmrtx = dmatx->start;
 
+       if (host->pdata) {
+               gpio_cd = host->pdata->gpio_card_detect;
+               gpio_ro = host->pdata->gpio_card_ro;
+               gpio_power = host->pdata->gpio_power;
+       }
+       if (gpio_is_valid(gpio_power)) {
+               ret = gpio_request(gpio_power, "mmc card power");
+               if (ret) {
+                       dev_err(&pdev->dev, "Failed requesting gpio_power %d\n", gpio_power);
+                       goto out;
+               }
+               gpio_direction_output(gpio_power,
+                                     host->pdata->gpio_power_invert);
+       }
+       if (gpio_is_valid(gpio_ro)) {
+               ret = gpio_request(gpio_ro, "mmc card read only");
+               if (ret) {
+                       dev_err(&pdev->dev, "Failed requesting gpio_ro %d\n", gpio_power);
+                       goto err_gpio_ro;
+               }
+               gpio_direction_input(gpio_ro);
+       }
+       if (gpio_is_valid(gpio_cd)) {
+               ret = gpio_request(gpio_cd, "mmc card detect");
+               if (ret) {
+                       dev_err(&pdev->dev, "Failed requesting gpio_cd %d\n", gpio_power);
+                       goto err_gpio_cd;
+               }
+               gpio_direction_input(gpio_cd);
+
+               ret = request_irq(gpio_to_irq(gpio_cd), pxamci_detect_irq,
+                                 IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+                                 "mmc card detect", mmc);
+               if (ret) {
+                       dev_err(&pdev->dev, "failed to request card detect IRQ\n");
+                       goto err_request_irq;
+               }
+       }
+
        if (host->pdata && host->pdata->init)
                host->pdata->init(&pdev->dev, pxamci_detect_irq, mmc);
 
+       if (gpio_is_valid(gpio_power) && host->pdata->setpower)
+               dev_warn(&pdev->dev, "gpio_power and setpower() both defined\n");
+       if (gpio_is_valid(gpio_ro) && host->pdata->get_ro)
+               dev_warn(&pdev->dev, "gpio_ro and get_ro() both defined\n");
+
        mmc_add_host(mmc);
 
        return 0;
 
+err_request_irq:
+       gpio_free(gpio_cd);
+err_gpio_cd:
+       gpio_free(gpio_ro);
+err_gpio_ro:
+       gpio_free(gpio_power);
  out:
        if (host) {
                if (host->dma >= 0)
@@ -688,12 +753,26 @@ static int pxamci_probe(struct platform_device *pdev)
 static int pxamci_remove(struct platform_device *pdev)
 {
        struct mmc_host *mmc = platform_get_drvdata(pdev);
+       int gpio_cd = -1, gpio_ro = -1, gpio_power = -1;
 
        platform_set_drvdata(pdev, NULL);
 
        if (mmc) {
                struct pxamci_host *host = mmc_priv(mmc);
 
+               if (host->pdata) {
+                       gpio_cd = host->pdata->gpio_card_detect;
+                       gpio_ro = host->pdata->gpio_card_ro;
+                       gpio_power = host->pdata->gpio_power;
+               }
+               if (gpio_is_valid(gpio_cd)) {
+                       free_irq(gpio_to_irq(gpio_cd), mmc);
+                       gpio_free(gpio_cd);
+               }
+               if (gpio_is_valid(gpio_ro))
+                       gpio_free(gpio_ro);
+               if (gpio_is_valid(gpio_power))
+                       gpio_free(gpio_power);
                if (host->vcc)
                        regulator_put(host->vcc);
 
@@ -725,20 +804,20 @@ static int pxamci_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM
-static int pxamci_suspend(struct platform_device *dev, pm_message_t state)
+static int pxamci_suspend(struct device *dev)
 {
-       struct mmc_host *mmc = platform_get_drvdata(dev);
+       struct mmc_host *mmc = dev_get_drvdata(dev);
        int ret = 0;
 
        if (mmc)
-               ret = mmc_suspend_host(mmc, state);
+               ret = mmc_suspend_host(mmc, PMSG_SUSPEND);
 
        return ret;
 }
 
-static int pxamci_resume(struct platform_device *dev)
+static int pxamci_resume(struct device *dev)
 {
-       struct mmc_host *mmc = platform_get_drvdata(dev);
+       struct mmc_host *mmc = dev_get_drvdata(dev);
        int ret = 0;
 
        if (mmc)
@@ -746,19 +825,22 @@ static int pxamci_resume(struct platform_device *dev)
 
        return ret;
 }
-#else
-#define pxamci_suspend NULL
-#define pxamci_resume  NULL
+
+static struct dev_pm_ops pxamci_pm_ops = {
+       .suspend        = pxamci_suspend,
+       .resume         = pxamci_resume,
+};
 #endif
 
 static struct platform_driver pxamci_driver = {
        .probe          = pxamci_probe,
        .remove         = pxamci_remove,
-       .suspend        = pxamci_suspend,
-       .resume         = pxamci_resume,
        .driver         = {
                .name   = DRIVER_NAME,
                .owner  = THIS_MODULE,
+#ifdef CONFIG_PM
+               .pm     = &pxamci_pm_ops,
+#endif
        },
 };
 
index e4ec365..ecf90f5 100644 (file)
@@ -159,7 +159,7 @@ config MTD_AFS_PARTS
 
 config MTD_OF_PARTS
        tristate "Flash partition map based on OF description"
-       depends on PPC_OF && MTD_PARTITIONS
+       depends on (MICROBLAZE || PPC_OF) && MTD_PARTITIONS
        help
          This provides a partition parsing function which derives
          the partition map from the children of the flash node,
index 3a9a960..841e085 100644 (file)
@@ -74,7 +74,7 @@ config MTD_PHYSMAP_BANKWIDTH
 
 config MTD_PHYSMAP_OF
        tristate "Flash device in physical memory map based on OF description"
-       depends on PPC_OF && (MTD_CFI || MTD_JEDECPROBE || MTD_ROM)
+       depends on (MICROBLAZE || PPC_OF) && (MTD_CFI || MTD_JEDECPROBE || MTD_ROM)
        help
          This provides a 'mapping' driver which allows the NOR Flash and
          ROM driver code to communicate with chips which are mapped
index 15c0195..a24be34 100644 (file)
@@ -768,10 +768,24 @@ e100_negotiate(struct net_device* dev)
 
        e100_set_mdio_reg(dev, np->mii_if.phy_id, MII_ADVERTISE, data);
 
-       /* Renegotiate with link partner */
+       data = e100_get_mdio_reg(dev, np->mii_if.phy_id, MII_BMCR);
        if (autoneg_normal) {
-         data = e100_get_mdio_reg(dev, np->mii_if.phy_id, MII_BMCR);
-       data |= BMCR_ANENABLE | BMCR_ANRESTART;
+               /* Renegotiate with link partner */
+               data |= BMCR_ANENABLE | BMCR_ANRESTART;
+       } else {
+               /* Don't negotiate speed or duplex */
+               data &= ~(BMCR_ANENABLE | BMCR_ANRESTART);
+
+               /* Set speed and duplex static */
+               if (current_speed_selection == 10)
+                       data &= ~BMCR_SPEED100;
+               else
+                       data |= BMCR_SPEED100;
+
+               if (current_duplex != full)
+                       data &= ~BMCR_FULLDPLX;
+               else
+                       data |= BMCR_FULLDPLX;
        }
        e100_set_mdio_reg(dev, np->mii_if.phy_id, MII_BMCR, data);
 }
index 1445e58..84db145 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/etherdevice.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
+#include <linux/gpio.h>
 
 #include <net/irda/irda.h>
 #include <net/irda/irmod.h>
@@ -162,6 +163,22 @@ inline static void pxa_irda_fir_dma_tx_start(struct pxa_irda *si)
        DCSR(si->txdma) |= DCSR_RUN;
 }
 
+/*
+ * Set the IrDA communications mode.
+ */
+static void pxa_irda_set_mode(struct pxa_irda *si, int mode)
+{
+       if (si->pdata->transceiver_mode)
+               si->pdata->transceiver_mode(si->dev, mode);
+       else {
+               if (gpio_is_valid(si->pdata->gpio_pwdown))
+                       gpio_set_value(si->pdata->gpio_pwdown,
+                                       !(mode & IR_OFF) ^
+                                       !si->pdata->gpio_pwdown_inverted);
+               pxa2xx_transceiver_mode(si->dev, mode);
+       }
+}
+
 /*
  * Set the IrDA communications speed.
  */
@@ -188,7 +205,7 @@ static int pxa_irda_set_speed(struct pxa_irda *si, int speed)
                        pxa_irda_disable_clk(si);
 
                        /* set board transceiver to SIR mode */
-                       si->pdata->transceiver_mode(si->dev, IR_SIRMODE);
+                       pxa_irda_set_mode(si, IR_SIRMODE);
 
                        /* enable the STUART clock */
                        pxa_irda_enable_sirclk(si);
@@ -222,7 +239,7 @@ static int pxa_irda_set_speed(struct pxa_irda *si, int speed)
                ICCR0 = 0;
 
                /* set board transceiver to FIR mode */
-               si->pdata->transceiver_mode(si->dev, IR_FIRMODE);
+               pxa_irda_set_mode(si, IR_FIRMODE);
 
                /* enable the FICP clock */
                pxa_irda_enable_firclk(si);
@@ -641,7 +658,7 @@ static void pxa_irda_shutdown(struct pxa_irda *si)
        local_irq_restore(flags);
 
        /* power off board transceiver */
-       si->pdata->transceiver_mode(si->dev, IR_OFF);
+       pxa_irda_set_mode(si, IR_OFF);
 
        printk(KERN_DEBUG "pxa_ir: irda shutdown\n");
 }
@@ -849,10 +866,26 @@ static int pxa_irda_probe(struct platform_device *pdev)
        if (err)
                goto err_mem_5;
 
-       if (si->pdata->startup)
+       if (gpio_is_valid(si->pdata->gpio_pwdown)) {
+               err = gpio_request(si->pdata->gpio_pwdown, "IrDA switch");
+               if (err)
+                       goto err_startup;
+               err = gpio_direction_output(si->pdata->gpio_pwdown,
+                                       !si->pdata->gpio_pwdown_inverted);
+               if (err) {
+                       gpio_free(si->pdata->gpio_pwdown);
+                       goto err_startup;
+               }
+       }
+
+       if (si->pdata->startup) {
                err = si->pdata->startup(si->dev);
-       if (err)
-               goto err_startup;
+               if (err)
+                       goto err_startup;
+       }
+
+       if (gpio_is_valid(si->pdata->gpio_pwdown) && si->pdata->startup)
+               dev_warn(si->dev, "gpio_pwdown and startup() both defined!\n");
 
        dev->netdev_ops = &pxa_irda_netdev_ops;
 
@@ -903,6 +936,8 @@ static int pxa_irda_remove(struct platform_device *_dev)
        if (dev) {
                struct pxa_irda *si = netdev_priv(dev);
                unregister_netdev(dev);
+               if (gpio_is_valid(si->pdata->gpio_pwdown))
+                       gpio_free(si->pdata->gpio_pwdown);
                if (si->pdata->shutdown)
                        si->pdata->shutdown(si->dev);
                kfree(si->tx_buff.head);
index cee199c..3c16602 100644 (file)
@@ -33,6 +33,7 @@
  */
 
 #include <linux/mlx4/cmd.h>
+#include <linux/cache.h>
 
 #include "fw.h"
 #include "icm.h"
@@ -698,6 +699,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 #define INIT_HCA_IN_SIZE                0x200
 #define INIT_HCA_VERSION_OFFSET                 0x000
 #define         INIT_HCA_VERSION                2
+#define INIT_HCA_CACHELINE_SZ_OFFSET    0x0e
 #define INIT_HCA_FLAGS_OFFSET           0x014
 #define INIT_HCA_QPC_OFFSET             0x020
 #define         INIT_HCA_QPC_BASE_OFFSET        (INIT_HCA_QPC_OFFSET + 0x10)
@@ -735,6 +737,9 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 
        *((u8 *) mailbox->buf + INIT_HCA_VERSION_OFFSET) = INIT_HCA_VERSION;
 
+       *((u8 *) mailbox->buf + INIT_HCA_CACHELINE_SZ_OFFSET) =
+               (ilog2(cache_line_size()) - 4) << 5;
+
 #if defined(__LITTLE_ENDIAN)
        *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
 #elif defined(__BIG_ENDIAN)
index 2ab1d59..a8b6896 100644 (file)
@@ -402,7 +402,7 @@ static int arlan_setup_card_by_book(struct net_device *dev)
 
 static char arlan_drive_info[ARLAN_STR_SIZE] = "A655\n\0";
 
-static int arlan_sysctl_info(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_info(ctl_table * ctl, int write,
                      void __user *buffer, size_t * lenp, loff_t *ppos)
 {
        int i;
@@ -629,7 +629,7 @@ final:
        *lenp = pos;
 
        if (!write)
-               retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+               retv = proc_dostring(ctl, write, buffer, lenp, ppos);
        else
        {
                *lenp = 0;
@@ -639,7 +639,7 @@ final:
 }
 
 
-static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_info161719(ctl_table * ctl, int write,
                            void __user *buffer, size_t * lenp, loff_t *ppos)
 {
        int i;
@@ -669,11 +669,11 @@ static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp
 
 final:
        *lenp = pos;
-       retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+       retv = proc_dostring(ctl, write, buffer, lenp, ppos);
        return retv;
 }
 
-static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_infotxRing(ctl_table * ctl, int write,
                            void __user *buffer, size_t * lenp, loff_t *ppos)
 {
        int i;
@@ -698,11 +698,11 @@ static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp
        SARLBNpln(u_char, txBuffer, 0x800);
 final:
        *lenp = pos;
-       retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+       retv = proc_dostring(ctl, write, buffer, lenp, ppos);
        return retv;
 }
 
-static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_inforxRing(ctl_table * ctl, int write,
                            void __user *buffer, size_t * lenp, loff_t *ppos)
 {
        int i;
@@ -726,11 +726,11 @@ static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp
        SARLBNpln(u_char, rxBuffer, 0x800);
 final:
        *lenp = pos;
-       retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+       retv = proc_dostring(ctl, write, buffer, lenp, ppos);
        return retv;
 }
 
-static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_info18(ctl_table * ctl, int write,
                        void __user *buffer, size_t * lenp, loff_t *ppos)
 {
        int i;
@@ -756,7 +756,7 @@ static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp,
 
 final:
        *lenp = pos;
-       retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+       retv = proc_dostring(ctl, write, buffer, lenp, ppos);
        return retv;
 }
 
@@ -766,7 +766,7 @@ final:
 
 static char conf_reset_result[200];
 
-static int arlan_configure(ctl_table * ctl, int write, struct file *filp,
+static int arlan_configure(ctl_table * ctl, int write,
                    void __user *buffer, size_t * lenp, loff_t *ppos)
 {
        int pos = 0;
@@ -788,10 +788,10 @@ static int arlan_configure(ctl_table * ctl, int write, struct file *filp,
                return -1;
 
        *lenp = pos;
-       return proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+       return proc_dostring(ctl, write, buffer, lenp, ppos);
 }
 
-static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_reset(ctl_table * ctl, int write,
                       void __user *buffer, size_t * lenp, loff_t *ppos)
 {
        int pos = 0;
@@ -811,7 +811,7 @@ static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp,
        } else
                return -1;
        *lenp = pos + 3;
-       return proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+       return proc_dostring(ctl, write, buffer, lenp, ppos);
 }
 
 
index 554e11f..8eefe56 100644 (file)
@@ -31,7 +31,7 @@
 #define PARPORT_MIN_SPINTIME_VALUE 1
 #define PARPORT_MAX_SPINTIME_VALUE 1000
 
-static int do_active_device(ctl_table *table, int write, struct file *filp,
+static int do_active_device(ctl_table *table, int write,
                      void __user *result, size_t *lenp, loff_t *ppos)
 {
        struct parport *port = (struct parport *)table->extra1;
@@ -68,7 +68,7 @@ static int do_active_device(ctl_table *table, int write, struct file *filp,
 }
 
 #ifdef CONFIG_PARPORT_1284
-static int do_autoprobe(ctl_table *table, int write, struct file *filp,
+static int do_autoprobe(ctl_table *table, int write,
                        void __user *result, size_t *lenp, loff_t *ppos)
 {
        struct parport_device_info *info = table->extra2;
@@ -111,7 +111,7 @@ static int do_autoprobe(ctl_table *table, int write, struct file *filp,
 #endif /* IEEE1284.3 support. */
 
 static int do_hardware_base_addr (ctl_table *table, int write,
-                                 struct file *filp, void __user *result,
+                                 void __user *result,
                                  size_t *lenp, loff_t *ppos)
 {
        struct parport *port = (struct parport *)table->extra1;
@@ -139,7 +139,7 @@ static int do_hardware_base_addr (ctl_table *table, int write,
 }
 
 static int do_hardware_irq (ctl_table *table, int write,
-                           struct file *filp, void __user *result,
+                           void __user *result,
                            size_t *lenp, loff_t *ppos)
 {
        struct parport *port = (struct parport *)table->extra1;
@@ -167,7 +167,7 @@ static int do_hardware_irq (ctl_table *table, int write,
 }
 
 static int do_hardware_dma (ctl_table *table, int write,
-                           struct file *filp, void __user *result,
+                           void __user *result,
                            size_t *lenp, loff_t *ppos)
 {
        struct parport *port = (struct parport *)table->extra1;
@@ -195,7 +195,7 @@ static int do_hardware_dma (ctl_table *table, int write,
 }
 
 static int do_hardware_modes (ctl_table *table, int write,
-                             struct file *filp, void __user *result,
+                             void __user *result,
                              size_t *lenp, loff_t *ppos)
 {
        struct parport *port = (struct parport *)table->extra1;
index 36faa9a..3070f77 100644 (file)
@@ -72,15 +72,9 @@ do {                                                                 \
 
 #define SLOT_NAME_SIZE 10
 struct slot {
-       u8 bus;
-       u8 device;
        u8 state;
-       u8 hp_slot;
-       u32 number;
        struct controller *ctrl;
-       struct hpc_ops *hpc_ops;
        struct hotplug_slot *hotplug_slot;
-       struct list_head        slot_list;
        struct delayed_work work;       /* work for button event */
        struct mutex lock;
 };
@@ -92,18 +86,10 @@ struct event_info {
 };
 
 struct controller {
-       struct mutex crit_sect;         /* critical section mutex */
        struct mutex ctrl_lock;         /* controller lock */
-       int num_slots;                  /* Number of slots on ctlr */
-       int slot_num_inc;               /* 1 or -1 */
-       struct pci_dev *pci_dev;
        struct pcie_device *pcie;       /* PCI Express port service */
-       struct list_head slot_list;
-       struct hpc_ops *hpc_ops;
+       struct slot *slot;
        wait_queue_head_t queue;        /* sleep & wake process */
-       u8 slot_device_offset;
-       u32 first_slot;         /* First physical slot number */  /* PCIE only has 1 slot */
-       u8 slot_bus;            /* Bus where the slots handled by this controller sit */
        u32 slot_cap;
        u8 cap_base;
        struct timer_list poll_timer;
@@ -131,40 +117,20 @@ struct controller {
 #define POWERON_STATE                  3
 #define POWEROFF_STATE                 4
 
-/* Error messages */
-#define INTERLOCK_OPEN                 0x00000002
-#define ADD_NOT_SUPPORTED              0x00000003
-#define CARD_FUNCTIONING               0x00000005
-#define ADAPTER_NOT_SAME               0x00000006
-#define NO_ADAPTER_PRESENT             0x00000009
-#define NOT_ENOUGH_RESOURCES           0x0000000B
-#define DEVICE_TYPE_NOT_SUPPORTED      0x0000000C
-#define WRONG_BUS_FREQUENCY            0x0000000D
-#define POWER_FAILURE                  0x0000000E
-
-/* Field definitions in Slot Capabilities Register */
-#define ATTN_BUTTN_PRSN        0x00000001
-#define        PWR_CTRL_PRSN   0x00000002
-#define MRL_SENS_PRSN  0x00000004
-#define ATTN_LED_PRSN  0x00000008
-#define PWR_LED_PRSN   0x00000010
-#define HP_SUPR_RM_SUP 0x00000020
-#define EMI_PRSN       0x00020000
-#define NO_CMD_CMPL_SUP        0x00040000
-
-#define ATTN_BUTTN(ctrl)       ((ctrl)->slot_cap & ATTN_BUTTN_PRSN)
-#define POWER_CTRL(ctrl)       ((ctrl)->slot_cap & PWR_CTRL_PRSN)
-#define MRL_SENS(ctrl)         ((ctrl)->slot_cap & MRL_SENS_PRSN)
-#define ATTN_LED(ctrl)         ((ctrl)->slot_cap & ATTN_LED_PRSN)
-#define PWR_LED(ctrl)          ((ctrl)->slot_cap & PWR_LED_PRSN)
-#define HP_SUPR_RM(ctrl)       ((ctrl)->slot_cap & HP_SUPR_RM_SUP)
-#define EMI(ctrl)              ((ctrl)->slot_cap & EMI_PRSN)
-#define NO_CMD_CMPL(ctrl)      ((ctrl)->slot_cap & NO_CMD_CMPL_SUP)
+#define ATTN_BUTTN(ctrl)       ((ctrl)->slot_cap & PCI_EXP_SLTCAP_ABP)
+#define POWER_CTRL(ctrl)       ((ctrl)->slot_cap & PCI_EXP_SLTCAP_PCP)
+#define MRL_SENS(ctrl)         ((ctrl)->slot_cap & PCI_EXP_SLTCAP_MRLSP)
+#define ATTN_LED(ctrl)         ((ctrl)->slot_cap & PCI_EXP_SLTCAP_AIP)
+#define PWR_LED(ctrl)          ((ctrl)->slot_cap & PCI_EXP_SLTCAP_PIP)
+#define HP_SUPR_RM(ctrl)       ((ctrl)->slot_cap & PCI_EXP_SLTCAP_HPS)
+#define EMI(ctrl)              ((ctrl)->slot_cap & PCI_EXP_SLTCAP_EIP)
+#define NO_CMD_CMPL(ctrl)      ((ctrl)->slot_cap & PCI_EXP_SLTCAP_NCCS)
+#define PSN(ctrl)              ((ctrl)->slot_cap >> 19)
 
 extern int pciehp_sysfs_enable_slot(struct slot *slot);
 extern int pciehp_sysfs_disable_slot(struct slot *slot);
 extern u8 pciehp_handle_attention_button(struct slot *p_slot);
-  extern u8 pciehp_handle_switch_change(struct slot *p_slot);
+extern u8 pciehp_handle_switch_change(struct slot *p_slot);
 extern u8 pciehp_handle_presence_change(struct slot *p_slot);
 extern u8 pciehp_handle_power_fault(struct slot *p_slot);
 extern int pciehp_configure_device(struct slot *p_slot);
@@ -175,45 +141,30 @@ int pcie_init_notification(struct controller *ctrl);
 int pciehp_enable_slot(struct slot *p_slot);
 int pciehp_disable_slot(struct slot *p_slot);
 int pcie_enable_notification(struct controller *ctrl);
+int pciehp_power_on_slot(struct slot *slot);
+int pciehp_power_off_slot(struct slot *slot);
+int pciehp_get_power_status(struct slot *slot, u8 *status);
+int pciehp_get_attention_status(struct slot *slot, u8 *status);
+
+int pciehp_set_attention_status(struct slot *slot, u8 status);
+int pciehp_get_latch_status(struct slot *slot, u8 *status);
+int pciehp_get_adapter_status(struct slot *slot, u8 *status);
+int pciehp_get_max_link_speed(struct slot *slot, enum pci_bus_speed *speed);
+int pciehp_get_max_link_width(struct slot *slot, enum pcie_link_width *val);
+int pciehp_get_cur_link_speed(struct slot *slot, enum pci_bus_speed *speed);
+int pciehp_get_cur_link_width(struct slot *slot, enum pcie_link_width *val);
+int pciehp_query_power_fault(struct slot *slot);
+void pciehp_green_led_on(struct slot *slot);
+void pciehp_green_led_off(struct slot *slot);
+void pciehp_green_led_blink(struct slot *slot);
+int pciehp_check_link_status(struct controller *ctrl);
+void pciehp_release_ctrl(struct controller *ctrl);
 
 static inline const char *slot_name(struct slot *slot)
 {
        return hotplug_slot_name(slot->hotplug_slot);
 }
 
-static inline struct slot *pciehp_find_slot(struct controller *ctrl, u8 device)
-{
-       struct slot *slot;
-
-       list_for_each_entry(slot, &ctrl->slot_list, slot_list) {
-               if (slot->device == device)
-                       return slot;
-       }
-
-       ctrl_err(ctrl, "Slot (device=0x%02x) not found\n", device);
-       return NULL;
-}
-
-struct hpc_ops {
-       int (*power_on_slot)(struct slot *slot);
-       int (*power_off_slot)(struct slot *slot);
-       int (*get_power_status)(struct slot *slot, u8 *status);
-       int (*get_attention_status)(struct slot *slot, u8 *status);
-       int (*set_attention_status)(struct slot *slot, u8 status);
-       int (*get_latch_status)(struct slot *slot, u8 *status);
-       int (*get_adapter_status)(struct slot *slot, u8 *status);
-       int (*get_max_bus_speed)(struct slot *slot, enum pci_bus_speed *speed);
-       int (*get_cur_bus_speed)(struct slot *slot, enum pci_bus_speed *speed);
-       int (*get_max_lnk_width)(struct slot *slot, enum pcie_link_width *val);
-       int (*get_cur_lnk_width)(struct slot *slot, enum pcie_link_width *val);
-       int (*query_power_fault)(struct slot *slot);
-       void (*green_led_on)(struct slot *slot);
-       void (*green_led_off)(struct slot *slot);
-       void (*green_led_blink)(struct slot *slot);
-       void (*release_ctlr)(struct controller *ctrl);
-       int (*check_lnk_status)(struct controller *ctrl);
-};
-
 #ifdef CONFIG_ACPI
 #include <acpi/acpi.h>
 #include <acpi/acpi_bus.h>
index 7163e6a..37c8d3d 100644 (file)
 #define PCIEHP_DETECT_AUTO     (2)
 #define PCIEHP_DETECT_DEFAULT  PCIEHP_DETECT_AUTO
 
+struct dummy_slot {
+       u32 number;
+       struct list_head list;
+};
+
 static int slot_detection_mode;
 static char *pciehp_detect_mode;
 module_param(pciehp_detect_mode, charp, 0444);
@@ -77,7 +82,7 @@ static int __init dummy_probe(struct pcie_device *dev)
        int pos;
        u32 slot_cap;
        acpi_handle handle;
-       struct slot *slot, *tmp;
+       struct dummy_slot *slot, *tmp;
        struct pci_dev *pdev = dev->port;
        /* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */
        if (pciehp_get_hp_hw_control_from_firmware(pdev))
@@ -89,11 +94,11 @@ static int __init dummy_probe(struct pcie_device *dev)
        if (!slot)
                return -ENOMEM;
        slot->number = slot_cap >> 19;
-       list_for_each_entry(tmp, &dummy_slots, slot_list) {
+       list_for_each_entry(tmp, &dummy_slots, list) {
                if (tmp->number == slot->number)
                        dup_slot_id++;
        }
-       list_add_tail(&slot->slot_list, &dummy_slots);
+       list_add_tail(&slot->list, &dummy_slots);
        handle = DEVICE_ACPI_HANDLE(&pdev->dev);
        if (!acpi_slot_detected && acpi_pci_detect_ejectable(handle))
                acpi_slot_detected = 1;
@@ -109,11 +114,11 @@ static struct pcie_port_service_driver __initdata dummy_driver = {
 
 static int __init select_detection_mode(void)
 {
-       struct slot *slot, *tmp;
+       struct dummy_slot *slot, *tmp;
        pcie_port_service_register(&dummy_driver);
        pcie_port_service_unregister(&dummy_driver);
-       list_for_each_entry_safe(slot, tmp, &dummy_slots, slot_list) {
-               list_del(&slot->slot_list);
+       list_for_each_entry_safe(slot, tmp, &dummy_slots, list) {
+               list_del(&slot->list);
                kfree(slot);
        }
        if (acpi_slot_detected && dup_slot_id)
index 2317557..bc23471 100644 (file)
@@ -99,65 +99,55 @@ static void release_slot(struct hotplug_slot *hotplug_slot)
        kfree(hotplug_slot);
 }
 
-static int init_slots(struct controller *ctrl)
+static int init_slot(struct controller *ctrl)
 {
-       struct slot *slot;
-       struct hotplug_slot *hotplug_slot;
-       struct hotplug_slot_info *info;
+       struct slot *slot = ctrl->slot;
+       struct hotplug_slot *hotplug = NULL;
+       struct hotplug_slot_info *info = NULL;
        char name[SLOT_NAME_SIZE];
        int retval = -ENOMEM;
 
-       list_for_each_entry(slot, &ctrl->slot_list, slot_list) {
-               hotplug_slot = kzalloc(sizeof(*hotplug_slot), GFP_KERNEL);
-               if (!hotplug_slot)
-                       goto error;
-
-               info = kzalloc(sizeof(*info), GFP_KERNEL);
-               if (!info)
-                       goto error_hpslot;
-
-               /* register this slot with the hotplug pci core */
-               hotplug_slot->info = info;
-               hotplug_slot->private = slot;
-               hotplug_slot->release = &release_slot;
-               hotplug_slot->ops = &pciehp_hotplug_slot_ops;
-               slot->hotplug_slot = hotplug_slot;
-               snprintf(name, SLOT_NAME_SIZE, "%u", slot->number);
-
-               ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:%02x "
-                        "hp_slot=%x sun=%x slot_device_offset=%x\n",
-                        pci_domain_nr(ctrl->pci_dev->subordinate),
-                        slot->bus, slot->device, slot->hp_slot, slot->number,
-                        ctrl->slot_device_offset);
-               retval = pci_hp_register(hotplug_slot,
-                                        ctrl->pci_dev->subordinate,
-                                        slot->device,
-                                        name);
-               if (retval) {
-                       ctrl_err(ctrl, "pci_hp_register failed with error %d\n",
-                                retval);
-                       goto error_info;
-               }
-               get_power_status(hotplug_slot, &info->power_status);
-               get_attention_status(hotplug_slot, &info->attention_status);
-               get_latch_status(hotplug_slot, &info->latch_status);
-               get_adapter_status(hotplug_slot, &info->adapter_status);
+       hotplug = kzalloc(sizeof(*hotplug), GFP_KERNEL);
+       if (!hotplug)
+               goto out;
+
+       info = kzalloc(sizeof(*info), GFP_KERNEL);
+       if (!info)
+               goto out;
+
+       /* register this slot with the hotplug pci core */
+       hotplug->info = info;
+       hotplug->private = slot;
+       hotplug->release = &release_slot;
+       hotplug->ops = &pciehp_hotplug_slot_ops;
+       slot->hotplug_slot = hotplug;
+       snprintf(name, SLOT_NAME_SIZE, "%u", PSN(ctrl));
+
+       ctrl_dbg(ctrl, "Registering domain:bus:dev=%04x:%02x:00 sun=%x\n",
+                pci_domain_nr(ctrl->pcie->port->subordinate),
+                ctrl->pcie->port->subordinate->number, PSN(ctrl));
+       retval = pci_hp_register(hotplug,
+                                ctrl->pcie->port->subordinate, 0, name);
+       if (retval) {
+               ctrl_err(ctrl,
+                        "pci_hp_register failed with error %d\n", retval);
+               goto out;
+       }
+       get_power_status(hotplug, &info->power_status);
+       get_attention_status(hotplug, &info->attention_status);
+       get_latch_status(hotplug, &info->latch_status);
+       get_adapter_status(hotplug, &info->adapter_status);
+out:
+       if (retval) {
+               kfree(info);
+               kfree(hotplug);
        }
-
-       return 0;
-error_info:
-       kfree(info);
-error_hpslot:
-       kfree(hotplug_slot);
-error:
        return retval;
 }
 
-static void cleanup_slots(struct controller *ctrl)
+static void cleanup_slot(struct controller *ctrl)
 {
-       struct slot *slot;
-       list_for_each_entry(slot, &ctrl->slot_list, slot_list)
-               pci_hp_deregister(slot->hotplug_slot);
+       pci_hp_deregister(ctrl->slot->hotplug_slot);
 }
 
 /*
@@ -173,7 +163,7 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
        hotplug_slot->info->attention_status = status;
 
        if (ATTN_LED(slot->ctrl))
-               slot->hpc_ops->set_attention_status(slot, status);
+               pciehp_set_attention_status(slot, status);
 
        return 0;
 }
@@ -208,7 +198,7 @@ static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
        ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
                  __func__, slot_name(slot));
 
-       retval = slot->hpc_ops->get_power_status(slot, value);
+       retval = pciehp_get_power_status(slot, value);
        if (retval < 0)
                *value = hotplug_slot->info->power_status;
 
@@ -223,7 +213,7 @@ static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
        ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
                  __func__, slot_name(slot));
 
-       retval = slot->hpc_ops->get_attention_status(slot, value);
+       retval = pciehp_get_attention_status(slot, value);
        if (retval < 0)
                *value = hotplug_slot->info->attention_status;
 
@@ -238,7 +228,7 @@ static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
        ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
                 __func__, slot_name(slot));
 
-       retval = slot->hpc_ops->get_latch_status(slot, value);
+       retval = pciehp_get_latch_status(slot, value);
        if (retval < 0)
                *value = hotplug_slot->info->latch_status;
 
@@ -253,7 +243,7 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
        ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
                 __func__, slot_name(slot));
 
-       retval = slot->hpc_ops->get_adapter_status(slot, value);
+       retval = pciehp_get_adapter_status(slot, value);
        if (retval < 0)
                *value = hotplug_slot->info->adapter_status;
 
@@ -269,7 +259,7 @@ static int get_max_bus_speed(struct hotplug_slot *hotplug_slot,
        ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
                 __func__, slot_name(slot));
 
-       retval = slot->hpc_ops->get_max_bus_speed(slot, value);
+       retval = pciehp_get_max_link_speed(slot, value);
        if (retval < 0)
                *value = PCI_SPEED_UNKNOWN;
 
@@ -284,7 +274,7 @@ static int get_cur_bus_speed(struct hotplug_slot *hotplug_slot, enum pci_bus_spe
        ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
                 __func__, slot_name(slot));
 
-       retval = slot->hpc_ops->get_cur_bus_speed(slot, value);
+       retval = pciehp_get_cur_link_speed(slot, value);
        if (retval < 0)
                *value = PCI_SPEED_UNKNOWN;
 
@@ -295,7 +285,7 @@ static int pciehp_probe(struct pcie_device *dev)
 {
        int rc;
        struct controller *ctrl;
-       struct slot *t_slot;
+       struct slot *slot;
        u8 value;
        struct pci_dev *pdev = dev->port;
 
@@ -314,7 +304,7 @@ static int pciehp_probe(struct pcie_device *dev)
        set_service_data(dev, ctrl);
 
        /* Setup the slot information structures */
-       rc = init_slots(ctrl);
+       rc = init_slot(ctrl);
        if (rc) {
                if (rc == -EBUSY)
                        ctrl_warn(ctrl, "Slot already registered by another "
@@ -332,15 +322,15 @@ static int pciehp_probe(struct pcie_device *dev)
        }
 
        /* Check if slot is occupied */
-       t_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset);
-       t_slot->hpc_ops->get_adapter_status(t_slot, &value);
+       slot = ctrl->slot;
+       pciehp_get_adapter_status(slot, &value);
        if (value) {
                if (pciehp_force)
-                       pciehp_enable_slot(t_slot);
+                       pciehp_enable_slot(slot);
        } else {
                /* Power off slot if not occupied */
                if (POWER_CTRL(ctrl)) {
-                       rc = t_slot->hpc_ops->power_off_slot(t_slot);
+                       rc = pciehp_power_off_slot(slot);
                        if (rc)
                                goto err_out_free_ctrl_slot;
                }
@@ -349,19 +339,19 @@ static int pciehp_probe(struct pcie_device *dev)
        return 0;
 
 err_out_free_ctrl_slot:
-       cleanup_slots(ctrl);
+       cleanup_slot(ctrl);
 err_out_release_ctlr:
-       ctrl->hpc_ops->release_ctlr(ctrl);
+       pciehp_release_ctrl(ctrl);
 err_out_none:
        return -ENODEV;
 }
 
-static void pciehp_remove (struct pcie_device *dev)
+static void pciehp_remove(struct pcie_device *dev)
 {
        struct controller *ctrl = get_service_data(dev);
 
-       cleanup_slots(ctrl);
-       ctrl->hpc_ops->release_ctlr(ctrl);
+       cleanup_slot(ctrl);
+       pciehp_release_ctrl(ctrl);
 }
 
 #ifdef CONFIG_PM
@@ -376,20 +366,20 @@ static int pciehp_resume (struct pcie_device *dev)
        dev_info(&dev->device, "%s ENTRY\n", __func__);
        if (pciehp_force) {
                struct controller *ctrl = get_service_data(dev);
-               struct slot *t_slot;
+               struct slot *slot;
                u8 status;
 
                /* reinitialize the chipset's event detection logic */
                pcie_enable_notification(ctrl);
 
-               t_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset);
+               slot = ctrl->slot;
 
                /* Check if slot is occupied */
-               t_slot->hpc_ops->get_adapter_status(t_slot, &status);
+               pciehp_get_adapter_status(slot, &status);
                if (status)
-                       pciehp_enable_slot(t_slot);
+                       pciehp_enable_slot(slot);
                else
-                       pciehp_disable_slot(t_slot);
+                       pciehp_disable_slot(slot);
        }
        return 0;
 }
index b97cb4c..84487d1 100644 (file)
@@ -82,7 +82,7 @@ u8 pciehp_handle_switch_change(struct slot *p_slot)
        /* Switch Change */
        ctrl_dbg(ctrl, "Switch interrupt received\n");
 
-       p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+       pciehp_get_latch_status(p_slot, &getstatus);
        if (getstatus) {
                /*
                 * Switch opened
@@ -114,7 +114,7 @@ u8 pciehp_handle_presence_change(struct slot *p_slot)
        /* Switch is open, assume a presence change
         * Save the presence state
         */
-       p_slot->hpc_ops->get_adapter_status(p_slot, &presence_save);
+       pciehp_get_adapter_status(p_slot, &presence_save);
        if (presence_save) {
                /*
                 * Card Present
@@ -143,7 +143,7 @@ u8 pciehp_handle_power_fault(struct slot *p_slot)
        /* power fault */
        ctrl_dbg(ctrl, "Power fault interrupt received\n");
 
-       if ( !(p_slot->hpc_ops->query_power_fault(p_slot))) {
+       if (!pciehp_query_power_fault(p_slot)) {
                /*
                 * power fault Cleared
                 */
@@ -172,7 +172,7 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot)
 {
        /* turn off slot, turn on Amber LED, turn off Green LED if supported*/
        if (POWER_CTRL(ctrl)) {
-               if (pslot->hpc_ops->power_off_slot(pslot)) {
+               if (pciehp_power_off_slot(pslot)) {
                        ctrl_err(ctrl,
                                 "Issue of Slot Power Off command failed\n");
                        return;
@@ -186,10 +186,10 @@ static void set_slot_off(struct controller *ctrl, struct slot * pslot)
        }
 
        if (PWR_LED(ctrl))
-               pslot->hpc_ops->green_led_off(pslot);
+               pciehp_green_led_off(pslot);
 
        if (ATTN_LED(ctrl)) {
-               if (pslot->hpc_ops->set_attention_status(pslot, 1)) {
+               if (pciehp_set_attention_status(pslot, 1)) {
                        ctrl_err(ctrl,
                                 "Issue of Set Attention Led command failed\n");
                        return;
@@ -208,24 +208,20 @@ static int board_added(struct slot *p_slot)
 {
        int retval = 0;
        struct controller *ctrl = p_slot->ctrl;
-       struct pci_bus *parent = ctrl->pci_dev->subordinate;
-
-       ctrl_dbg(ctrl, "%s: slot device, slot offset, hp slot = %d, %d, %d\n",
-                __func__, p_slot->device, ctrl->slot_device_offset,
-                p_slot->hp_slot);
+       struct pci_bus *parent = ctrl->pcie->port->subordinate;
 
        if (POWER_CTRL(ctrl)) {
                /* Power on slot */
-               retval = p_slot->hpc_ops->power_on_slot(p_slot);
+               retval = pciehp_power_on_slot(p_slot);
                if (retval)
                        return retval;
        }
 
        if (PWR_LED(ctrl))
-               p_slot->hpc_ops->green_led_blink(p_slot);
+               pciehp_green_led_blink(p_slot);
 
        /* Check link training status */
-       retval = p_slot->hpc_ops->check_lnk_status(ctrl);
+       retval = pciehp_check_link_status(ctrl);
        if (retval) {
                ctrl_err(ctrl, "Failed to check link status\n");
                set_slot_off(ctrl, p_slot);
@@ -233,21 +229,21 @@ static int board_added(struct slot *p_slot)
        }
 
        /* Check for a power fault */
-       if (p_slot->hpc_ops->query_power_fault(p_slot)) {
+       if (pciehp_query_power_fault(p_slot)) {
                ctrl_dbg(ctrl, "Power fault detected\n");
-               retval = POWER_FAILURE;
+               retval = -EIO;
                goto err_exit;
        }
 
        retval = pciehp_configure_device(p_slot);
        if (retval) {
-               ctrl_err(ctrl, "Cannot add device at %04x:%02x:%02x\n",
-                        pci_domain_nr(parent), p_slot->bus, p_slot->device);
+               ctrl_err(ctrl, "Cannot add device at %04x:%02x:00\n",
+                        pci_domain_nr(parent), parent->number);
                goto err_exit;
        }
 
        if (PWR_LED(ctrl))
-               p_slot->hpc_ops->green_led_on(p_slot);
+               pciehp_green_led_on(p_slot);
 
        return 0;
 
@@ -269,11 +265,9 @@ static int remove_board(struct slot *p_slot)
        if (retval)
                return retval;
 
-       ctrl_dbg(ctrl, "%s: hp_slot = %d\n", __func__, p_slot->hp_slot);
-
        if (POWER_CTRL(ctrl)) {
                /* power off slot */
-               retval = p_slot->hpc_ops->power_off_slot(p_slot);
+               retval = pciehp_power_off_slot(p_slot);
                if (retval) {
                        ctrl_err(ctrl,
                                 "Issue of Slot Disable command failed\n");
@@ -287,9 +281,9 @@ static int remove_board(struct slot *p_slot)
                msleep(1000);
        }
 
+       /* turn off Green LED */
        if (PWR_LED(ctrl))
-               /* turn off Green LED */
-               p_slot->hpc_ops->green_led_off(p_slot);
+               pciehp_green_led_off(p_slot);
 
        return 0;
 }
@@ -317,18 +311,17 @@ static void pciehp_power_thread(struct work_struct *work)
        case POWEROFF_STATE:
                mutex_unlock(&p_slot->lock);
                ctrl_dbg(p_slot->ctrl,
-                        "Disabling domain:bus:device=%04x:%02x:%02x\n",
-                        pci_domain_nr(p_slot->ctrl->pci_dev->subordinate),
-                        p_slot->bus, p_slot->device);
+                        "Disabling domain:bus:device=%04x:%02x:00\n",
+                        pci_domain_nr(p_slot->ctrl->pcie->port->subordinate),
+                        p_slot->ctrl->pcie->port->subordinate->number);
                pciehp_disable_slot(p_slot);
                mutex_lock(&p_slot->lock);
                p_slot->state = STATIC_STATE;
                break;
        case POWERON_STATE:
                mutex_unlock(&p_slot->lock);
-               if (pciehp_enable_slot(p_slot) &&
-                   PWR_LED(p_slot->ctrl))
-                       p_slot->hpc_ops->green_led_off(p_slot);
+               if (pciehp_enable_slot(p_slot) && PWR_LED(p_slot->ctrl))
+                       pciehp_green_led_off(p_slot);
                mutex_lock(&p_slot->lock);
                p_slot->state = STATIC_STATE;
                break;
@@ -379,10 +372,10 @@ static int update_slot_info(struct slot *slot)
        if (!info)
                return -ENOMEM;
 
-       slot->hpc_ops->get_power_status(slot, &(info->power_status));
-       slot->hpc_ops->get_attention_status(slot, &(info->attention_status));
-       slot->hpc_ops->get_latch_status(slot, &(info->latch_status));
-       slot->hpc_ops->get_adapter_status(slot, &(info->adapter_status));
+       pciehp_get_power_status(slot, &info->power_status);
+       pciehp_get_attention_status(slot, &info->attention_status);
+       pciehp_get_latch_status(slot, &info->latch_status);
+       pciehp_get_adapter_status(slot, &info->adapter_status);
 
        result = pci_hp_change_slot_info(slot->hotplug_slot, info);
        kfree (info);
@@ -399,7 +392,7 @@ static void handle_button_press_event(struct slot *p_slot)
 
        switch (p_slot->state) {
        case STATIC_STATE:
-               p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
+               pciehp_get_power_status(p_slot, &getstatus);
                if (getstatus) {
                        p_slot->state = BLINKINGOFF_STATE;
                        ctrl_info(ctrl,
@@ -413,9 +406,9 @@ static void handle_button_press_event(struct slot *p_slot)
                }
                /* blink green LED and turn off amber */
                if (PWR_LED(ctrl))
-                       p_slot->hpc_ops->green_led_blink(p_slot);
+                       pciehp_green_led_blink(p_slot);
                if (ATTN_LED(ctrl))
-                       p_slot->hpc_ops->set_attention_status(p_slot, 0);
+                       pciehp_set_attention_status(p_slot, 0);
 
                schedule_delayed_work(&p_slot->work, 5*HZ);
                break;
@@ -430,13 +423,13 @@ static void handle_button_press_event(struct slot *p_slot)
                cancel_delayed_work(&p_slot->work);
                if (p_slot->state == BLINKINGOFF_STATE) {
                        if (PWR_LED(ctrl))
-                               p_slot->hpc_ops->green_led_on(p_slot);
+                               pciehp_green_led_on(p_slot);
                } else {
                        if (PWR_LED(ctrl))
-                               p_slot->hpc_ops->green_led_off(p_slot);
+                               pciehp_green_led_off(p_slot);
                }
                if (ATTN_LED(ctrl))
-                       p_slot->hpc_ops->set_attention_status(p_slot, 0);
+                       pciehp_set_attention_status(p_slot, 0);
                ctrl_info(ctrl, "PCI slot #%s - action canceled "
                          "due to button press\n", slot_name(p_slot));
                p_slot->state = STATIC_STATE;
@@ -474,7 +467,7 @@ static void handle_surprise_event(struct slot *p_slot)
        info->p_slot = p_slot;
        INIT_WORK(&info->work, pciehp_power_thread);
 
-       p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
+       pciehp_get_adapter_status(p_slot, &getstatus);
        if (!getstatus)
                p_slot->state = POWEROFF_STATE;
        else
@@ -498,9 +491,9 @@ static void interrupt_event_handler(struct work_struct *work)
                if (!POWER_CTRL(ctrl))
                        break;
                if (ATTN_LED(ctrl))
-                       p_slot->hpc_ops->set_attention_status(p_slot, 1);
+                       pciehp_set_attention_status(p_slot, 1);
                if (PWR_LED(ctrl))
-                       p_slot->hpc_ops->green_led_off(p_slot);
+                       pciehp_green_led_off(p_slot);
                break;
        case INT_PRESENCE_ON:
        case INT_PRESENCE_OFF:
@@ -525,45 +518,38 @@ int pciehp_enable_slot(struct slot *p_slot)
        int rc;
        struct controller *ctrl = p_slot->ctrl;
 
-       /* Check to see if (latch closed, card present, power off) */
-       mutex_lock(&p_slot->ctrl->crit_sect);
-
-       rc = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
+       rc = pciehp_get_adapter_status(p_slot, &getstatus);
        if (rc || !getstatus) {
                ctrl_info(ctrl, "No adapter on slot(%s)\n", slot_name(p_slot));
-               mutex_unlock(&p_slot->ctrl->crit_sect);
                return -ENODEV;
        }
        if (MRL_SENS(p_slot->ctrl)) {
-               rc = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+               rc = pciehp_get_latch_status(p_slot, &getstatus);
                if (rc || getstatus) {
                        ctrl_info(ctrl, "Latch open on slot(%s)\n",
                                  slot_name(p_slot));
-                       mutex_unlock(&p_slot->ctrl->crit_sect);
                        return -ENODEV;
                }
        }
 
        if (POWER_CTRL(p_slot->ctrl)) {
-               rc = p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
+               rc = pciehp_get_power_status(p_slot, &getstatus);
                if (rc || getstatus) {
                        ctrl_info(ctrl, "Already enabled on slot(%s)\n",
                                  slot_name(p_slot));
-                       mutex_unlock(&p_slot->ctrl->crit_sect);
                        return -EINVAL;
                }
        }
 
-       p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+       pciehp_get_latch_status(p_slot, &getstatus);
 
        rc = board_added(p_slot);
        if (rc) {
-               p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+               pciehp_get_latch_status(p_slot, &getstatus);
        }
 
        update_slot_info(p_slot);
 
-       mutex_unlock(&p_slot->ctrl->crit_sect);
        return rc;
 }
 
@@ -577,35 +563,29 @@ int pciehp_disable_slot(struct slot *p_slot)
        if (!p_slot->ctrl)
                return 1;
 
-       /* Check to see if (latch closed, card present, power on) */
-       mutex_lock(&p_slot->ctrl->crit_sect);
-
        if (!HP_SUPR_RM(p_slot->ctrl)) {
-               ret = p_slot->hpc_ops->get_adapter_status(p_slot, &getstatus);
+               ret = pciehp_get_adapter_status(p_slot, &getstatus);
                if (ret || !getstatus) {
                        ctrl_info(ctrl, "No adapter on slot(%s)\n",
                                  slot_name(p_slot));
-                       mutex_unlock(&p_slot->ctrl->crit_sect);
                        return -ENODEV;
                }
        }
 
        if (MRL_SENS(p_slot->ctrl)) {
-               ret = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus);
+               ret = pciehp_get_latch_status(p_slot, &getstatus);
                if (ret || getstatus) {
                        ctrl_info(ctrl, "Latch open on slot(%s)\n",
                                  slot_name(p_slot));
-                       mutex_unlock(&p_slot->ctrl->crit_sect);
                        return -ENODEV;
                }
        }
 
        if (POWER_CTRL(p_slot->ctrl)) {
-               ret = p_slot->hpc_ops->get_power_status(p_slot, &getstatus);
+               ret = pciehp_get_power_status(p_slot, &getstatus);
                if (ret || !getstatus) {
                        ctrl_info(ctrl, "Already disabled on slot(%s)\n",
                                  slot_name(p_slot));
-                       mutex_unlock(&p_slot->ctrl->crit_sect);
                        return -EINVAL;
                }
        }
@@ -613,7 +593,6 @@ int pciehp_disable_slot(struct slot *p_slot)
        ret = remove_board(p_slot);
        update_slot_info(p_slot);
 
-       mutex_unlock(&p_slot->ctrl->crit_sect);
        return ret;
 }
 
index 271f917..9ef4605 100644 (file)
@@ -44,25 +44,25 @@ static atomic_t pciehp_num_controllers = ATOMIC_INIT(0);
 
 static inline int pciehp_readw(struct controller *ctrl, int reg, u16 *value)
 {
-       struct pci_dev *dev = ctrl->pci_dev;
+       struct pci_dev *dev = ctrl->pcie->port;
        return pci_read_config_word(dev, ctrl->cap_base + reg, value);
 }
 
 static inline int pciehp_readl(struct controller *ctrl, int reg, u32 *value)
 {
-       struct pci_dev *dev = ctrl->pci_dev;
+       struct pci_dev *dev = ctrl->pcie->port;
        return pci_read_config_dword(dev, ctrl->cap_base + reg, value);
 }
 
 static inline int pciehp_writew(struct controller *ctrl, int reg, u16 value)
 {
-       struct pci_dev *dev = ctrl->pci_dev;
+       struct pci_dev *dev = ctrl->pcie->port;
        return pci_write_config_word(dev, ctrl->cap_base + reg, value);
 }
 
 static inline int pciehp_writel(struct controller *ctrl, int reg, u32 value)
 {
-       struct pci_dev *dev = ctrl->pci_dev;
+       struct pci_dev *dev = ctrl->pcie->port;
        return pci_write_config_dword(dev, ctrl->cap_base + reg, value);
 }
 
@@ -266,7 +266,7 @@ static void pcie_wait_link_active(struct controller *ctrl)
        ctrl_dbg(ctrl, "Data Link Layer Link Active not set in 1000 msec\n");
 }
 
-static int hpc_check_lnk_status(struct controller *ctrl)
+int pciehp_check_link_status(struct controller *ctrl)
 {
        u16 lnk_status;
        int retval = 0;
@@ -305,7 +305,7 @@ static int hpc_check_lnk_status(struct controller *ctrl)
        return retval;
 }
 
-static int hpc_get_attention_status(struct slot *slot, u8 *status)
+int pciehp_get_attention_status(struct slot *slot, u8 *status)
 {
        struct controller *ctrl = slot->ctrl;
        u16 slot_ctrl;
@@ -344,7 +344,7 @@ static int hpc_get_attention_status(struct slot *slot, u8 *status)
        return 0;
 }
 
-static int hpc_get_power_status(struct slot *slot, u8 *status)
+int pciehp_get_power_status(struct slot *slot, u8 *status)
 {
        struct controller *ctrl = slot->ctrl;
        u16 slot_ctrl;
@@ -376,7 +376,7 @@ static int hpc_get_power_status(struct slot *slot, u8 *status)
        return retval;
 }
 
-static int hpc_get_latch_status(struct slot *slot, u8 *status)
+int pciehp_get_latch_status(struct slot *slot, u8 *status)
 {
        struct controller *ctrl = slot->ctrl;
        u16 slot_status;
@@ -392,7 +392,7 @@ static int hpc_get_latch_status(struct slot *slot, u8 *status)
        return 0;
 }
 
-static int hpc_get_adapter_status(struct slot *slot, u8 *status)
+int pciehp_get_adapter_status(struct slot *slot, u8 *status)
 {
        struct controller *ctrl = slot->ctrl;
        u16 slot_status;
@@ -408,7 +408,7 @@ static int hpc_get_adapter_status(struct slot *slot, u8 *status)
        return 0;
 }
 
-static int hpc_query_power_fault(struct slot *slot)
+int pciehp_query_power_fault(struct slot *slot)
 {
        struct controller *ctrl = slot->ctrl;
        u16 slot_status;
@@ -422,7 +422,7 @@ static int hpc_query_power_fault(struct slot *slot)
        return !!(slot_status & PCI_EXP_SLTSTA_PFD);
 }
 
-static int hpc_set_attention_status(struct slot *slot, u8 value)
+int pciehp_set_attention_status(struct slot *slot, u8 value)
 {
        struct controller *ctrl = slot->ctrl;
        u16 slot_cmd;
@@ -450,7 +450,7 @@ static int hpc_set_attention_status(struct slot *slot, u8 value)
        return rc;
 }
 
-static void hpc_set_green_led_on(struct slot *slot)
+void pciehp_green_led_on(struct slot *slot)
 {
        struct controller *ctrl = slot->ctrl;
        u16 slot_cmd;
@@ -463,7 +463,7 @@ static void hpc_set_green_led_on(struct slot *slot)
                 __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
 }
 
-static void hpc_set_green_led_off(struct slot *slot)
+void pciehp_green_led_off(struct slot *slot)
 {
        struct controller *ctrl = slot->ctrl;
        u16 slot_cmd;
@@ -476,7 +476,7 @@ static void hpc_set_green_led_off(struct slot *slot)
                 __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
 }
 
-static void hpc_set_green_led_blink(struct slot *slot)
+void pciehp_green_led_blink(struct slot *slot)
 {
        struct controller *ctrl = slot->ctrl;
        u16 slot_cmd;
@@ -489,7 +489,7 @@ static void hpc_set_green_led_blink(struct slot *slot)
                 __func__, ctrl->cap_base + PCI_EXP_SLTCTL, slot_cmd);
 }
 
-static int hpc_power_on_slot(struct slot * slot)
+int pciehp_power_on_slot(struct slot * slot)
 {
        struct controller *ctrl = slot->ctrl;
        u16 slot_cmd;
@@ -497,8 +497,6 @@ static int hpc_power_on_slot(struct slot * slot)
        u16 slot_status;
        int retval = 0;
 
-       ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
-
        /* Clear sticky power-fault bit from previous power failures */
        retval = pciehp_readw(ctrl, PCI_EXP_SLTSTA, &slot_status);
        if (retval) {
@@ -539,7 +537,7 @@ static int hpc_power_on_slot(struct slot * slot)
 
 static inline int pcie_mask_bad_dllp(struct controller *ctrl)
 {
-       struct pci_dev *dev = ctrl->pci_dev;
+       struct pci_dev *dev = ctrl->pcie->port;
        int pos;
        u32 reg;
 
@@ -556,7 +554,7 @@ static inline int pcie_mask_bad_dllp(struct controller *ctrl)
 
 static inline void pcie_unmask_bad_dllp(struct controller *ctrl)
 {
-       struct pci_dev *dev = ctrl->pci_dev;
+       struct pci_dev *dev = ctrl->pcie->port;
        u32 reg;
        int pos;
 
@@ -570,7 +568,7 @@ static inline void pcie_unmask_bad_dllp(struct controller *ctrl)
        pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, reg);
 }
 
-static int hpc_power_off_slot(struct slot * slot)
+int pciehp_power_off_slot(struct slot * slot)
 {
        struct controller *ctrl = slot->ctrl;
        u16 slot_cmd;
@@ -578,8 +576,6 @@ static int hpc_power_off_slot(struct slot * slot)
        int retval = 0;
        int changed;
 
-       ctrl_dbg(ctrl, "%s: slot->hp_slot %x\n", __func__, slot->hp_slot);
-
        /*
         * Set Bad DLLP Mask bit in Correctable Error Mask
         * Register. This is the workaround against Bad DLLP error
@@ -614,8 +610,8 @@ static int hpc_power_off_slot(struct slot * slot)
 static irqreturn_t pcie_isr(int irq, void *dev_id)
 {
        struct controller *ctrl = (struct controller *)dev_id;
+       struct slot *slot = ctrl->slot;
        u16 detected, intr_loc;
-       struct slot *p_slot;
 
        /*
         * In order to guarantee that all interrupt events are
@@ -656,29 +652,27 @@ static irqreturn_t pcie_isr(int irq, void *dev_id)
        if (!(intr_loc & ~PCI_EXP_SLTSTA_CC))
                return IRQ_HANDLED;
 
-       p_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset);
-
        /* Check MRL Sensor Changed */
        if (intr_loc & PCI_EXP_SLTSTA_MRLSC)
-               pciehp_handle_switch_change(p_slot);
+               pciehp_handle_switch_change(slot);
 
        /* Check Attention Button Pressed */
        if (intr_loc & PCI_EXP_SLTSTA_ABP)
-               pciehp_handle_attention_button(p_slot);
+               pciehp_handle_attention_button(slot);
 
        /* Check Presence Detect Changed */
        if (intr_loc & PCI_EXP_SLTSTA_PDC)
-               pciehp_handle_presence_change(p_slot);
+               pciehp_handle_presence_change(slot);
 
        /* Check Power Fault Detected */
        if ((intr_loc & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) {
                ctrl->power_fault_detected = 1;
-               pciehp_handle_power_fault(p_slot);
+               pciehp_handle_power_fault(slot);
        }
        return IRQ_HANDLED;
 }
 
-static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
+int pciehp_get_max_link_speed(struct slot *slot, enum pci_bus_speed *value)
 {
        struct controller *ctrl = slot->ctrl;
        enum pcie_link_speed lnk_speed;
@@ -709,7 +703,7 @@ static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
        return retval;
 }
 
-static int hpc_get_max_lnk_width(struct slot *slot,
+int pciehp_get_max_lnk_width(struct slot *slot,
                                 enum pcie_link_width *value)
 {
        struct controller *ctrl = slot->ctrl;
@@ -759,7 +753,7 @@ static int hpc_get_max_lnk_width(struct slot *slot,
        return retval;
 }
 
-static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
+int pciehp_get_cur_link_speed(struct slot *slot, enum pci_bus_speed *value)
 {
        struct controller *ctrl = slot->ctrl;
        enum pcie_link_speed lnk_speed = PCI_SPEED_UNKNOWN;
@@ -791,7 +785,7 @@ static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
        return retval;
 }
 
-static int hpc_get_cur_lnk_width(struct slot *slot,
+int pciehp_get_cur_lnk_width(struct slot *slot,
                                 enum pcie_link_width *value)
 {
        struct controller *ctrl = slot->ctrl;
@@ -842,30 +836,6 @@ static int hpc_get_cur_lnk_width(struct slot *slot,
        return retval;
 }
 
-static void pcie_release_ctrl(struct controller *ctrl);
-static struct hpc_ops pciehp_hpc_ops = {
-       .power_on_slot                  = hpc_power_on_slot,
-       .power_off_slot                 = hpc_power_off_slot,
-       .set_attention_status           = hpc_set_attention_status,
-       .get_power_status               = hpc_get_power_status,
-       .get_attention_status           = hpc_get_attention_status,
-       .get_latch_status               = hpc_get_latch_status,
-       .get_adapter_status             = hpc_get_adapter_status,
-
-       .get_max_bus_speed              = hpc_get_max_lnk_speed,
-       .get_cur_bus_speed              = hpc_get_cur_lnk_speed,
-       .get_max_lnk_width              = hpc_get_max_lnk_width,
-       .get_cur_lnk_width              = hpc_get_cur_lnk_width,
-
-       .query_power_fault              = hpc_query_power_fault,
-       .green_led_on                   = hpc_set_green_led_on,
-       .green_led_off                  = hpc_set_green_led_off,
-       .green_led_blink                = hpc_set_green_led_blink,
-
-       .release_ctlr                   = pcie_release_ctrl,
-       .check_lnk_status               = hpc_check_lnk_status,
-};
-
 int pcie_enable_notification(struct controller *ctrl)
 {
        u16 cmd, mask;
@@ -930,23 +900,16 @@ static int pcie_init_slot(struct controller *ctrl)
        if (!slot)
                return -ENOMEM;
 
-       slot->hp_slot = 0;
        slot->ctrl = ctrl;
-       slot->bus = ctrl->pci_dev->subordinate->number;
-       slot->device = ctrl->slot_device_offset + slot->hp_slot;
-       slot->hpc_ops = ctrl->hpc_ops;
-       slot->number = ctrl->first_slot;
        mutex_init(&slot->lock);
        INIT_DELAYED_WORK(&slot->work, pciehp_queue_pushbutton_work);
-       list_add(&slot->slot_list, &ctrl->slot_list);
+       ctrl->slot = slot;
        return 0;
 }
 
 static void pcie_cleanup_slot(struct controller *ctrl)
 {
-       struct slot *slot;
-       slot = list_first_entry(&ctrl->slot_list, struct slot, slot_list);
-       list_del(&slot->slot_list);
+       struct slot *slot = ctrl->slot;
        cancel_delayed_work(&slot->work);
        flush_scheduled_work();
        flush_workqueue(pciehp_wq);
@@ -957,7 +920,7 @@ static inline void dbg_ctrl(struct controller *ctrl)
 {
        int i;
        u16 reg16;
-       struct pci_dev *pdev = ctrl->pci_dev;
+       struct pci_dev *pdev = ctrl->pcie->port;
 
        if (!pciehp_debug)
                return;
@@ -980,7 +943,7 @@ static inline void dbg_ctrl(struct controller *ctrl)
                          (unsigned long long)pci_resource_start(pdev, i));
        }
        ctrl_info(ctrl, "Slot Capabilities      : 0x%08x\n", ctrl->slot_cap);
-       ctrl_info(ctrl, "  Physical Slot Number : %d\n", ctrl->first_slot);
+       ctrl_info(ctrl, "  Physical Slot Number : %d\n", PSN(ctrl));
        ctrl_info(ctrl, "  Attention Button     : %3s\n",
                  ATTN_BUTTN(ctrl) ? "yes" : "no");
        ctrl_info(ctrl, "  Power Controller     : %3s\n",
@@ -1014,10 +977,7 @@ struct controller *pcie_init(struct pcie_device *dev)
                dev_err(&dev->device, "%s: Out of memory\n", __func__);
                goto abort;
        }
-       INIT_LIST_HEAD(&ctrl->slot_list);
-
        ctrl->pcie = dev;
-       ctrl->pci_dev = pdev;
        ctrl->cap_base = pci_find_capability(pdev, PCI_CAP_ID_EXP);
        if (!ctrl->cap_base) {
                ctrl_err(ctrl, "Cannot find PCI Express capability\n");
@@ -1029,11 +989,6 @@ struct controller *pcie_init(struct pcie_device *dev)
        }
 
        ctrl->slot_cap = slot_cap;
-       ctrl->first_slot = slot_cap >> 19;
-       ctrl->slot_device_offset = 0;
-       ctrl->num_slots = 1;
-       ctrl->hpc_ops = &pciehp_hpc_ops;
-       mutex_init(&ctrl->crit_sect);
        mutex_init(&ctrl->ctrl_lock);
        init_waitqueue_head(&ctrl->queue);
        dbg_ctrl(ctrl);
@@ -1089,7 +1044,7 @@ abort:
        return NULL;
 }
 
-void pcie_release_ctrl(struct controller *ctrl)
+void pciehp_release_ctrl(struct controller *ctrl)
 {
        pcie_shutdown_notification(ctrl);
        pcie_cleanup_slot(ctrl);
index 02e24d6..2173310 100644 (file)
@@ -63,27 +63,27 @@ static int __ref pciehp_add_bridge(struct pci_dev *dev)
 int pciehp_configure_device(struct slot *p_slot)
 {
        struct pci_dev *dev;
-       struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate;
+       struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
        int num, fn;
        struct controller *ctrl = p_slot->ctrl;
 
-       dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, 0));
+       dev = pci_get_slot(parent, PCI_DEVFN(0, 0));
        if (dev) {
                ctrl_err(ctrl, "Device %s already exists "
-                        "at %04x:%02x:%02x, cannot hot-add\n", pci_name(dev),
-                        pci_domain_nr(parent), p_slot->bus, p_slot->device);
+                        "at %04x:%02x:00, cannot hot-add\n", pci_name(dev),
+                        pci_domain_nr(parent), parent->number);
                pci_dev_put(dev);
                return -EINVAL;
        }
 
-       num = pci_scan_slot(parent, PCI_DEVFN(p_slot->device, 0));
+       num = pci_scan_slot(parent, PCI_DEVFN(0, 0));
        if (num == 0) {
                ctrl_err(ctrl, "No new device found\n");
                return -ENODEV;
        }
 
        for (fn = 0; fn < 8; fn++) {
-               dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, fn));
+               dev = pci_get_slot(parent, PCI_DEVFN(0, fn));
                if (!dev)
                        continue;
                if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
@@ -111,19 +111,18 @@ int pciehp_unconfigure_device(struct slot *p_slot)
        int j;
        u8 bctl = 0;
        u8 presence = 0;
-       struct pci_bus *parent = p_slot->ctrl->pci_dev->subordinate;
+       struct pci_bus *parent = p_slot->ctrl->pcie->port->subordinate;
        u16 command;
        struct controller *ctrl = p_slot->ctrl;
 
-       ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:%02x\n",
-                __func__, pci_domain_nr(parent), p_slot->bus, p_slot->device);
-       ret = p_slot->hpc_ops->get_adapter_status(p_slot, &presence);
+       ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:00\n",
+                __func__, pci_domain_nr(parent), parent->number);
+       ret = pciehp_get_adapter_status(p_slot, &presence);
        if (ret)
                presence = 0;
 
        for (j = 0; j < 8; j++) {
-               struct pci_dev* temp = pci_get_slot(parent,
-                               (p_slot->device << 3) | j);
+               struct pci_dev* temp = pci_get_slot(parent, PCI_DEVFN(0, j));
                if (!temp)
                        continue;
                if ((temp->class >> 16) == PCI_BASE_CLASS_DISPLAY) {
index 10c0e62..2ce8f9c 100644 (file)
@@ -318,6 +318,8 @@ static int __init aer_service_init(void)
 {
        if (pcie_aer_disable)
                return -ENXIO;
+       if (!pci_msi_enabled())
+               return -ENXIO;
        return pcie_port_service_register(&aerdriver);
 }
 
index f289ca9..745402e 100644 (file)
@@ -303,9 +303,6 @@ static void pcie_get_aspm_reg(struct pci_dev *pdev,
        pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
        pci_read_config_dword(pdev, pos + PCI_EXP_LNKCAP, &reg32);
        info->support = (reg32 & PCI_EXP_LNKCAP_ASPMS) >> 10;
-       /* 00b and 10b are defined as "Reserved". */
-       if (info->support == PCIE_LINK_STATE_L1)
-               info->support = 0;
        info->latency_encoding_l0s = (reg32 & PCI_EXP_LNKCAP_L0SEL) >> 12;
        info->latency_encoding_l1  = (reg32 & PCI_EXP_LNKCAP_L1EL) >> 15;
        pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, &reg16);
index 047394d..3247828 100644 (file)
@@ -71,6 +71,7 @@ pxa2xx-obj-$(CONFIG_MACH_ARMCORE)             += pxa2xx_cm_x2xx_cs.o
 pxa2xx-obj-$(CONFIG_ARCH_VIPER)                        += pxa2xx_viper.o
 pxa2xx-obj-$(CONFIG_TRIZEPS_PCMCIA)            += pxa2xx_trizeps4.o
 pxa2xx-obj-$(CONFIG_MACH_PALMTX)               += pxa2xx_palmtx.o
+pxa2xx-obj-$(CONFIG_MACH_PALMTC)               += pxa2xx_palmtc.o
 pxa2xx-obj-$(CONFIG_MACH_PALMLD)               += pxa2xx_palmld.o
 pxa2xx-obj-$(CONFIG_MACH_E740)                 += pxa2xx_e740.o
 pxa2xx-obj-$(CONFIG_MACH_STARGATE2)            += pxa2xx_stargate2.o
index c49a726..87e22ef 100644 (file)
@@ -300,25 +300,29 @@ static int pxa2xx_drv_pcmcia_remove(struct platform_device *dev)
        return soc_common_drv_pcmcia_remove(&dev->dev);
 }
 
-static int pxa2xx_drv_pcmcia_suspend(struct platform_device *dev, pm_message_t state)
+static int pxa2xx_drv_pcmcia_suspend(struct device *dev)
 {
-       return pcmcia_socket_dev_suspend(&dev->dev, state);
+       return pcmcia_socket_dev_suspend(dev, PMSG_SUSPEND);
 }
 
-static int pxa2xx_drv_pcmcia_resume(struct platform_device *dev)
+static int pxa2xx_drv_pcmcia_resume(struct device *dev)
 {
-       pxa2xx_configure_sockets(&dev->dev);
-       return pcmcia_socket_dev_resume(&dev->dev);
+       pxa2xx_configure_sockets(dev);
+       return pcmcia_socket_dev_resume(dev);
 }
 
+static struct dev_pm_ops  pxa2xx_drv_pcmcia_pm_ops = {
+       .suspend        = pxa2xx_drv_pcmcia_suspend,
+       .resume         = pxa2xx_drv_pcmcia_resume,
+};
+
 static struct platform_driver pxa2xx_pcmcia_driver = {
        .probe          = pxa2xx_drv_pcmcia_probe,
        .remove         = pxa2xx_drv_pcmcia_remove,
-       .suspend        = pxa2xx_drv_pcmcia_suspend,
-       .resume         = pxa2xx_drv_pcmcia_resume,
        .driver         = {
                .name   = "pxa2xx-pcmcia",
                .owner  = THIS_MODULE,
+               .pm     = &pxa2xx_drv_pcmcia_pm_ops,
        },
 };
 
diff --git a/drivers/pcmcia/pxa2xx_palmtc.c b/drivers/pcmcia/pxa2xx_palmtc.c
new file mode 100644 (file)
index 0000000..3a8993e
--- /dev/null
@@ -0,0 +1,230 @@
+/*
+ * linux/drivers/pcmcia/pxa2xx_palmtc.c
+ *
+ * Driver for Palm Tungsten|C PCMCIA
+ *
+ * Copyright (C) 2008 Alex Osborne <ato@meshy.org>
+ * Copyright (C) 2009 Marek Vasut <marek.vasut@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/delay.h>
+
+#include <asm/mach-types.h>
+#include <mach/palmtc.h>
+#include "soc_common.h"
+
+static int palmtc_pcmcia_hw_init(struct soc_pcmcia_socket *skt)
+{
+       int ret;
+
+       ret = gpio_request(GPIO_NR_PALMTC_PCMCIA_POWER1, "PCMCIA PWR1");
+       if (ret)
+               goto err1;
+       ret = gpio_direction_output(GPIO_NR_PALMTC_PCMCIA_POWER1, 0);
+       if (ret)
+               goto err2;
+
+       ret = gpio_request(GPIO_NR_PALMTC_PCMCIA_POWER2, "PCMCIA PWR2");
+       if (ret)
+               goto err2;
+       ret = gpio_direction_output(GPIO_NR_PALMTC_PCMCIA_POWER2, 0);
+       if (ret)
+               goto err3;
+
+       ret = gpio_request(GPIO_NR_PALMTC_PCMCIA_POWER3, "PCMCIA PWR3");
+       if (ret)
+               goto err3;
+       ret = gpio_direction_output(GPIO_NR_PALMTC_PCMCIA_POWER3, 0);
+       if (ret)
+               goto err4;
+
+       ret = gpio_request(GPIO_NR_PALMTC_PCMCIA_RESET, "PCMCIA RST");
+       if (ret)
+               goto err4;
+       ret = gpio_direction_output(GPIO_NR_PALMTC_PCMCIA_RESET, 1);
+       if (ret)
+               goto err5;
+
+       ret = gpio_request(GPIO_NR_PALMTC_PCMCIA_READY, "PCMCIA RDY");
+       if (ret)
+               goto err5;
+       ret = gpio_direction_input(GPIO_NR_PALMTC_PCMCIA_READY);
+       if (ret)
+               goto err6;
+
+       ret = gpio_request(GPIO_NR_PALMTC_PCMCIA_PWRREADY, "PCMCIA PWRRDY");
+       if (ret)
+               goto err6;
+       ret = gpio_direction_input(GPIO_NR_PALMTC_PCMCIA_PWRREADY);
+       if (ret)
+               goto err7;
+
+       skt->irq = IRQ_GPIO(GPIO_NR_PALMTC_PCMCIA_READY);
+       return 0;
+
+err7:
+       gpio_free(GPIO_NR_PALMTC_PCMCIA_PWRREADY);
+err6:
+       gpio_free(GPIO_NR_PALMTC_PCMCIA_READY);
+err5:
+       gpio_free(GPIO_NR_PALMTC_PCMCIA_RESET);
+err4:
+       gpio_free(GPIO_NR_PALMTC_PCMCIA_POWER3);
+err3:
+       gpio_free(GPIO_NR_PALMTC_PCMCIA_POWER2);
+err2:
+       gpio_free(GPIO_NR_PALMTC_PCMCIA_POWER1);
+err1:
+       return ret;
+}
+
+static void palmtc_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt)
+{
+       gpio_free(GPIO_NR_PALMTC_PCMCIA_PWRREADY);
+       gpio_free(GPIO_NR_PALMTC_PCMCIA_READY);
+       gpio_free(GPIO_NR_PALMTC_PCMCIA_RESET);
+       gpio_free(GPIO_NR_PALMTC_PCMCIA_POWER3);
+       gpio_free(GPIO_NR_PALMTC_PCMCIA_POWER2);
+       gpio_free(GPIO_NR_PALMTC_PCMCIA_POWER1);
+}
+
+static void palmtc_pcmcia_socket_state(struct soc_pcmcia_socket *skt,
+                                       struct pcmcia_state *state)
+{
+       state->detect = 1; /* always inserted */
+       state->ready  = !!gpio_get_value(GPIO_NR_PALMTC_PCMCIA_READY);
+       state->bvd1   = 1;
+       state->bvd2   = 1;
+       state->wrprot = 0;
+       state->vs_3v  = 1;
+       state->vs_Xv  = 0;
+}
+
+static int palmtc_wifi_powerdown(void)
+{
+       gpio_set_value(GPIO_NR_PALMTC_PCMCIA_RESET, 1);
+       gpio_set_value(GPIO_NR_PALMTC_PCMCIA_POWER2, 0);
+       mdelay(40);
+       gpio_set_value(GPIO_NR_PALMTC_PCMCIA_POWER1, 0);
+       return 0;
+}
+
+static int palmtc_wifi_powerup(void)
+{
+       int timeout = 50;
+
+       gpio_set_value(GPIO_NR_PALMTC_PCMCIA_POWER3, 1);
+       mdelay(50);
+
+       /* Power up the card, 1.8V first, after a while 3.3V */
+       gpio_set_value(GPIO_NR_PALMTC_PCMCIA_POWER1, 1);
+       mdelay(100);
+       gpio_set_value(GPIO_NR_PALMTC_PCMCIA_POWER2, 1);
+
+       /* Wait till the card is ready */
+       while (!gpio_get_value(GPIO_NR_PALMTC_PCMCIA_PWRREADY) &&
+               timeout) {
+               mdelay(1);
+               timeout--;
+       }
+
+       /* Power down the WiFi in case of error */
+       if (!timeout) {
+               palmtc_wifi_powerdown();
+               return 1;
+       }
+
+       /* Reset the card */
+       gpio_set_value(GPIO_NR_PALMTC_PCMCIA_RESET, 1);
+       mdelay(20);
+       gpio_set_value(GPIO_NR_PALMTC_PCMCIA_RESET, 0);
+       mdelay(25);
+
+       gpio_set_value(GPIO_NR_PALMTC_PCMCIA_POWER3, 0);
+
+       return 0;
+}
+
+static int palmtc_pcmcia_configure_socket(struct soc_pcmcia_socket *skt,
+                                       const socket_state_t *state)
+{
+       int ret = 1;
+
+       if (state->Vcc == 0)
+               ret = palmtc_wifi_powerdown();
+       else if (state->Vcc == 33)
+               ret = palmtc_wifi_powerup();
+
+       return ret;
+}
+
+static void palmtc_pcmcia_socket_init(struct soc_pcmcia_socket *skt)
+{
+}
+
+static void palmtc_pcmcia_socket_suspend(struct soc_pcmcia_socket *skt)
+{
+}
+
+static struct pcmcia_low_level palmtc_pcmcia_ops = {
+       .owner                  = THIS_MODULE,
+
+       .first                  = 0,
+       .nr                     = 1,
+
+       .hw_init                = palmtc_pcmcia_hw_init,
+       .hw_shutdown            = palmtc_pcmcia_hw_shutdown,
+
+       .socket_state           = palmtc_pcmcia_socket_state,
+       .configure_socket       = palmtc_pcmcia_configure_socket,
+
+       .socket_init            = palmtc_pcmcia_socket_init,
+       .socket_suspend         = palmtc_pcmcia_socket_suspend,
+};
+
+static struct platform_device *palmtc_pcmcia_device;
+
+static int __init palmtc_pcmcia_init(void)
+{
+       int ret;
+
+       if (!machine_is_palmtc())
+               return -ENODEV;
+
+       palmtc_pcmcia_device = platform_device_alloc("pxa2xx-pcmcia", -1);
+       if (!palmtc_pcmcia_device)
+               return -ENOMEM;
+
+       ret = platform_device_add_data(palmtc_pcmcia_device, &palmtc_pcmcia_ops,
+                                       sizeof(palmtc_pcmcia_ops));
+
+       if (!ret)
+               ret = platform_device_add(palmtc_pcmcia_device);
+
+       if (ret)
+               platform_device_put(palmtc_pcmcia_device);
+
+       return ret;
+}
+
+static void __exit palmtc_pcmcia_exit(void)
+{
+       platform_device_unregister(palmtc_pcmcia_device);
+}
+
+module_init(palmtc_pcmcia_init);
+module_exit(palmtc_pcmcia_exit);
+
+MODULE_AUTHOR("Alex Osborne <ato@meshy.org>,"
+           " Marek Vasut <marek.vasut@gmail.com>");
+MODULE_DESCRIPTION("PCMCIA support for Palm Tungsten|C");
+MODULE_ALIAS("platform:pxa2xx-pcmcia");
+MODULE_LICENSE("GPL");
index bb8cc05..747ca19 100644 (file)
@@ -438,34 +438,37 @@ static int __exit pxa_rtc_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM
-static int pxa_rtc_suspend(struct platform_device *pdev, pm_message_t state)
+static int pxa_rtc_suspend(struct device *dev)
 {
-       struct pxa_rtc *pxa_rtc = platform_get_drvdata(pdev);
+       struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
 
-       if (device_may_wakeup(&pdev->dev))
+       if (device_may_wakeup(dev))
                enable_irq_wake(pxa_rtc->irq_Alrm);
        return 0;
 }
 
-static int pxa_rtc_resume(struct platform_device *pdev)
+static int pxa_rtc_resume(struct device *dev)
 {
-       struct pxa_rtc *pxa_rtc = platform_get_drvdata(pdev);
+       struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
 
-       if (device_may_wakeup(&pdev->dev))
+       if (device_may_wakeup(dev))
                disable_irq_wake(pxa_rtc->irq_Alrm);
        return 0;
 }
-#else
-#define pxa_rtc_suspend        NULL
-#define pxa_rtc_resume NULL
+
+static struct dev_pm_ops pxa_rtc_pm_ops = {
+       .suspend        = pxa_rtc_suspend,
+       .resume         = pxa_rtc_resume,
+};
 #endif
 
 static struct platform_driver pxa_rtc_driver = {
        .remove         = __exit_p(pxa_rtc_remove),
-       .suspend        = pxa_rtc_suspend,
-       .resume         = pxa_rtc_resume,
        .driver         = {
-               .name           = "pxa-rtc",
+               .name   = "pxa-rtc",
+#ifdef CONFIG_PM
+               .pm     = &pxa_rtc_pm_ops,
+#endif
        },
 };
 
index 021b292..29f98a7 100644 (file)
@@ -393,31 +393,34 @@ static int sa1100_rtc_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM
-static int sa1100_rtc_suspend(struct platform_device *pdev, pm_message_t state)
+static int sa1100_rtc_suspend(struct device *dev)
 {
-       if (device_may_wakeup(&pdev->dev))
+       if (device_may_wakeup(dev))
                enable_irq_wake(IRQ_RTCAlrm);
        return 0;
 }
 
-static int sa1100_rtc_resume(struct platform_device *pdev)
+static int sa1100_rtc_resume(struct device *dev)
 {
-       if (device_may_wakeup(&pdev->dev))
+       if (device_may_wakeup(dev))
                disable_irq_wake(IRQ_RTCAlrm);
        return 0;
 }
-#else
-#define sa1100_rtc_suspend     NULL
-#define sa1100_rtc_resume      NULL
+
+static struct dev_pm_ops sa1100_rtc_pm_ops = {
+       .suspend        = sa1100_rtc_suspend,
+       .resume         = sa1100_rtc_resume,
+};
 #endif
 
 static struct platform_driver sa1100_rtc_driver = {
        .probe          = sa1100_rtc_probe,
        .remove         = sa1100_rtc_remove,
-       .suspend        = sa1100_rtc_suspend,
-       .resume         = sa1100_rtc_resume,
        .driver         = {
-               .name           = "sa1100-rtc",
+               .name   = "sa1100-rtc",
+#ifdef CONFIG_PM
+               .pm     = &sa1100_rtc_pm_ops,
+#endif
        },
 };
 
index 8d349b2..300cea7 100644 (file)
@@ -649,7 +649,7 @@ static int cpm_uart_tx_pump(struct uart_port *port)
        u8 *p;
        int count;
        struct uart_cpm_port *pinfo = (struct uart_cpm_port *)port;
-       struct circ_buf *xmit = &port->info->xmit;
+       struct circ_buf *xmit = &port->state->xmit;
 
        /* Handle xon/xoff */
        if (port->x_char) {
index 7be52fe..31f1723 100644 (file)
@@ -18,6 +18,7 @@ static char *serial_version = "$Revision: 1.25 $";
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/major.h>
+#include <linux/smp_lock.h>
 #include <linux/string.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
index 6443b7f..b8629d7 100644 (file)
@@ -726,9 +726,10 @@ static struct uart_driver serial_pxa_reg = {
        .cons           = PXA_CONSOLE,
 };
 
-static int serial_pxa_suspend(struct platform_device *dev, pm_message_t state)
+#ifdef CONFIG_PM
+static int serial_pxa_suspend(struct device *dev)
 {
-        struct uart_pxa_port *sport = platform_get_drvdata(dev);
+        struct uart_pxa_port *sport = dev_get_drvdata(dev);
 
         if (sport)
                 uart_suspend_port(&serial_pxa_reg, &sport->port);
@@ -736,9 +737,9 @@ static int serial_pxa_suspend(struct platform_device *dev, pm_message_t state)
         return 0;
 }
 
-static int serial_pxa_resume(struct platform_device *dev)
+static int serial_pxa_resume(struct device *dev)
 {
-        struct uart_pxa_port *sport = platform_get_drvdata(dev);
+        struct uart_pxa_port *sport = dev_get_drvdata(dev);
 
         if (sport)
                 uart_resume_port(&serial_pxa_reg, &sport->port);
@@ -746,6 +747,12 @@ static int serial_pxa_resume(struct platform_device *dev)
         return 0;
 }
 
+static struct dev_pm_ops serial_pxa_pm_ops = {
+       .suspend        = serial_pxa_suspend,
+       .resume         = serial_pxa_resume,
+};
+#endif
+
 static int serial_pxa_probe(struct platform_device *dev)
 {
        struct uart_pxa_port *sport;
@@ -825,11 +832,12 @@ static struct platform_driver serial_pxa_driver = {
         .probe          = serial_pxa_probe,
         .remove         = serial_pxa_remove,
 
-       .suspend        = serial_pxa_suspend,
-       .resume         = serial_pxa_resume,
        .driver         = {
                .name   = "pxa2xx-uart",
                .owner  = THIS_MODULE,
+#ifdef CONFIG_PM
+               .pm     = &serial_pxa_pm_ops,
+#endif
        },
 };
 
index c0f950a..958a3ff 100644 (file)
@@ -532,7 +532,7 @@ static void restore_state(struct pl022 *pl022)
        GEN_MASK_BITS(SSP_DATA_BITS_12, SSP_CR0_MASK_DSS, 0)    | \
        GEN_MASK_BITS(SSP_MICROWIRE_CHANNEL_FULL_DUPLEX, SSP_CR0_MASK_HALFDUP, 5) | \
        GEN_MASK_BITS(SSP_CLK_POL_IDLE_LOW, SSP_CR0_MASK_SPO, 6) | \
-       GEN_MASK_BITS(SSP_CLK_FALLING_EDGE, SSP_CR0_MASK_SPH, 7) | \
+       GEN_MASK_BITS(SSP_CLK_SECOND_EDGE, SSP_CR0_MASK_SPH, 7) | \
        GEN_MASK_BITS(NMDK_SSP_DEFAULT_CLKRATE, SSP_CR0_MASK_SCR, 8) | \
        GEN_MASK_BITS(SSP_BITS_8, SSP_CR0_MASK_CSS, 16) | \
        GEN_MASK_BITS(SSP_INTERFACE_MOTOROLA_SPI, SSP_CR0_MASK_FRF, 21) \
@@ -1247,8 +1247,8 @@ static int verify_controller_parameters(struct pl022 *pl022,
                return -EINVAL;
        }
        if (chip_info->iface == SSP_INTERFACE_MOTOROLA_SPI) {
-               if ((chip_info->clk_phase != SSP_CLK_RISING_EDGE)
-                   && (chip_info->clk_phase != SSP_CLK_FALLING_EDGE)) {
+               if ((chip_info->clk_phase != SSP_CLK_FIRST_EDGE)
+                   && (chip_info->clk_phase != SSP_CLK_SECOND_EDGE)) {
                        dev_err(chip_info->dev,
                                "Clock Phase is configured incorrectly\n");
                        return -EINVAL;
@@ -1485,7 +1485,7 @@ static int pl022_setup(struct spi_device *spi)
                chip_info->data_size = SSP_DATA_BITS_12;
                chip_info->rx_lev_trig = SSP_RX_1_OR_MORE_ELEM;
                chip_info->tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC;
-               chip_info->clk_phase = SSP_CLK_FALLING_EDGE;
+               chip_info->clk_phase = SSP_CLK_SECOND_EDGE;
                chip_info->clk_pol = SSP_CLK_POL_IDLE_LOW;
                chip_info->ctrl_len = SSP_BITS_8;
                chip_info->wait_state = SSP_MWIRE_WAIT_ZERO;
index 31dd56f..c8c2b69 100644 (file)
@@ -1668,10 +1668,9 @@ static void pxa2xx_spi_shutdown(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM
-
-static int pxa2xx_spi_suspend(struct platform_device *pdev, pm_message_t state)
+static int pxa2xx_spi_suspend(struct device *dev)
 {
-       struct driver_data *drv_data = platform_get_drvdata(pdev);
+       struct driver_data *drv_data = dev_get_drvdata(dev);
        struct ssp_device *ssp = drv_data->ssp;
        int status = 0;
 
@@ -1684,9 +1683,9 @@ static int pxa2xx_spi_suspend(struct platform_device *pdev, pm_message_t state)
        return 0;
 }
 
-static int pxa2xx_spi_resume(struct platform_device *pdev)
+static int pxa2xx_spi_resume(struct device *dev)
 {
-       struct driver_data *drv_data = platform_get_drvdata(pdev);
+       struct driver_data *drv_data = dev_get_drvdata(dev);
        struct ssp_device *ssp = drv_data->ssp;
        int status = 0;
 
@@ -1703,26 +1702,29 @@ static int pxa2xx_spi_resume(struct platform_device *pdev)
        /* Start the queue running */
        status = start_queue(drv_data);
        if (status != 0) {
-               dev_err(&pdev->dev, "problem starting queue (%d)\n", status);
+               dev_err(dev, "problem starting queue (%d)\n", status);
                return status;
        }
 
        return 0;
 }
-#else
-#define pxa2xx_spi_suspend NULL
-#define pxa2xx_spi_resume NULL
-#endif /* CONFIG_PM */
+
+static struct dev_pm_ops pxa2xx_spi_pm_ops = {
+       .suspend        = pxa2xx_spi_suspend,
+       .resume         = pxa2xx_spi_resume,
+};
+#endif
 
 static struct platform_driver driver = {
        .driver = {
-               .name = "pxa2xx-spi",
-               .owner = THIS_MODULE,
+               .name   = "pxa2xx-spi",
+               .owner  = THIS_MODULE,
+#ifdef CONFIG_PM
+               .pm     = &pxa2xx_spi_pm_ops,
+#endif
        },
        .remove = pxa2xx_spi_remove,
        .shutdown = pxa2xx_spi_shutdown,
-       .suspend = pxa2xx_spi_suspend,
-       .resume = pxa2xx_spi_resume,
 };
 
 static int __init pxa2xx_spi_init(void)
index d14ea84..1301caa 100644 (file)
@@ -32,8 +32,3 @@ endif
 
 EXTRA_CFLAGS += -Idrivers/media/dvb/frontends
 EXTRA_CFLAGS += -Idrivers/media/dvb/dvb-core
-
-# Ubuntu 8.04 has CONFIG_SND undefined, so include lum sound/config.h too
-ifeq ($(CONFIG_SND),)
-EXTRA_CFLAGS += -include sound/config.h
-endif
index ebd7237..2407508 100644 (file)
@@ -22,7 +22,6 @@ config USB_ARCH_HAS_HCD
        default y if PCMCIA && !M32R                    # sl811_cs
        default y if ARM                                # SL-811
        default y if SUPERH                             # r8a66597-hcd
-       default y if MICROBLAZE
        default PCI
 
 # many non-PCI SOC chips embed OHCI
index b5294a9..f1c0620 100644 (file)
@@ -481,38 +481,47 @@ static int ohci_hcd_pxa27x_drv_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_PM
-static int ohci_hcd_pxa27x_drv_suspend(struct platform_device *pdev, pm_message_t state)
+#ifdef CONFIG_PM
+static int ohci_hcd_pxa27x_drv_suspend(struct device *dev)
 {
-       struct usb_hcd *hcd = platform_get_drvdata(pdev);
+       struct usb_hcd *hcd = dev_get_drvdata(dev);
        struct pxa27x_ohci *ohci = to_pxa27x_ohci(hcd);
 
        if (time_before(jiffies, ohci->ohci.next_statechange))
                msleep(5);
        ohci->ohci.next_statechange = jiffies;
 
-       pxa27x_stop_hc(ohci, &pdev->dev);
+       pxa27x_stop_hc(ohci, dev);
        hcd->state = HC_STATE_SUSPENDED;
 
        return 0;
 }
 
-static int ohci_hcd_pxa27x_drv_resume(struct platform_device *pdev)
+static int ohci_hcd_pxa27x_drv_resume(struct device *dev)
 {
-       struct usb_hcd *hcd = platform_get_drvdata(pdev);
+       struct usb_hcd *hcd = dev_get_drvdata(dev);
        struct pxa27x_ohci *ohci = to_pxa27x_ohci(hcd);
+       struct pxaohci_platform_data *inf = dev->platform_data;
        int status;
 
        if (time_before(jiffies, ohci->ohci.next_statechange))
                msleep(5);
        ohci->ohci.next_statechange = jiffies;
 
-       if ((status = pxa27x_start_hc(ohci, &pdev->dev)) < 0)
+       if ((status = pxa27x_start_hc(ohci, dev)) < 0)
                return status;
 
+       /* Select Power Management Mode */
+       pxa27x_ohci_select_pmm(ohci, inf->port_mode);
+
        ohci_finish_controller_resume(hcd);
        return 0;
 }
+
+static struct dev_pm_ops ohci_hcd_pxa27x_pm_ops = {
+       .suspend        = ohci_hcd_pxa27x_drv_suspend,
+       .resume         = ohci_hcd_pxa27x_drv_resume,
+};
 #endif
 
 /* work with hotplug and coldplug */
@@ -522,13 +531,12 @@ static struct platform_driver ohci_hcd_pxa27x_driver = {
        .probe          = ohci_hcd_pxa27x_drv_probe,
        .remove         = ohci_hcd_pxa27x_drv_remove,
        .shutdown       = usb_hcd_platform_shutdown,
-#ifdef CONFIG_PM
-       .suspend        = ohci_hcd_pxa27x_drv_suspend,
-       .resume         = ohci_hcd_pxa27x_drv_resume,
-#endif
        .driver         = {
                .name   = "pxa27x-ohci",
                .owner  = THIS_MODULE,
+#ifdef CONFIG_PM
+               .pm     = &ohci_hcd_pxa27x_pm_ops,
+#endif
        },
 };
 
index 68fa0e4..8c075b2 100644 (file)
@@ -912,6 +912,7 @@ static void sierra_release(struct usb_serial *serial)
        }
 }
 
+#ifdef CONFIG_PM
 static void stop_read_write_urbs(struct usb_serial *serial)
 {
        int i, j;
@@ -988,6 +989,10 @@ static int sierra_resume(struct usb_serial *serial)
 
        return ec ? -EIO : 0;
 }
+#else
+#define sierra_suspend NULL
+#define sierra_resume NULL
+#endif
 
 static struct usb_serial_driver sierra_device = {
        .driver = {
index 93bb434..701a108 100644 (file)
@@ -154,34 +154,38 @@ static int da903x_backlight_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM
-static int da903x_backlight_suspend(struct platform_device *pdev,
-                                pm_message_t state)
+static int da903x_backlight_suspend(struct device *dev)
 {
+       struct platform_device *pdev = to_platform_device(dev);
        struct backlight_device *bl = platform_get_drvdata(pdev);
        return da903x_backlight_set(bl, 0);
 }
 
-static int da903x_backlight_resume(struct platform_device *pdev)
+static int da903x_backlight_resume(struct device *dev)
 {
+       struct platform_device *pdev = to_platform_device(dev);
        struct backlight_device *bl = platform_get_drvdata(pdev);
 
        backlight_update_status(bl);
        return 0;
 }
-#else
-#define da903x_backlight_suspend       NULL
-#define da903x_backlight_resume                NULL
+
+static struct dev_pm_ops da903x_backlight_pm_ops = {
+       .suspend        = da903x_backlight_suspend,
+       .resume         = da903x_backlight_resume,
+};
 #endif
 
 static struct platform_driver da903x_backlight_driver = {
        .driver         = {
                .name   = "da903x-backlight",
                .owner  = THIS_MODULE,
+#ifdef CONFIG_PM
+               .pm     = &da903x_backlight_pm_ops,
+#endif
        },
        .probe          = da903x_backlight_probe,
        .remove         = da903x_backlight_remove,
-       .suspend        = da903x_backlight_suspend,
-       .resume         = da903x_backlight_resume,
 };
 
 static int __init da903x_backlight_init(void)
index 6506117..1820c4a 100644 (file)
@@ -1638,24 +1638,26 @@ pxafb_freq_policy(struct notifier_block *nb, unsigned long val, void *data)
  * Power management hooks.  Note that we won't be called from IRQ context,
  * unlike the blank functions above, so we may sleep.
  */
-static int pxafb_suspend(struct platform_device *dev, pm_message_t state)
+static int pxafb_suspend(struct device *dev)
 {
-       struct pxafb_info *fbi = platform_get_drvdata(dev);
+       struct pxafb_info *fbi = dev_get_drvdata(dev);
 
        set_ctrlr_state(fbi, C_DISABLE_PM);
        return 0;
 }
 
-static int pxafb_resume(struct platform_device *dev)
+static int pxafb_resume(struct device *dev)
 {
-       struct pxafb_info *fbi = platform_get_drvdata(dev);
+       struct pxafb_info *fbi = dev_get_drvdata(dev);
 
        set_ctrlr_state(fbi, C_ENABLE_PM);
        return 0;
 }
-#else
-#define pxafb_suspend  NULL
-#define pxafb_resume   NULL
+
+static struct dev_pm_ops pxafb_pm_ops = {
+       .suspend        = pxafb_suspend,
+       .resume         = pxafb_resume,
+};
 #endif
 
 static int __devinit pxafb_init_video_memory(struct pxafb_info *fbi)
@@ -2081,6 +2083,9 @@ static int __devinit pxafb_probe(struct platform_device *dev)
                goto failed;
        }
 
+       if (cpu_is_pxa3xx() && inf->acceleration_enabled)
+               fbi->fb.fix.accel = FB_ACCEL_PXA3XX;
+
        fbi->backlight_power = inf->pxafb_backlight_power;
        fbi->lcd_power = inf->pxafb_lcd_power;
 
@@ -2091,14 +2096,14 @@ static int __devinit pxafb_probe(struct platform_device *dev)
                goto failed_fbi;
        }
 
-       r = request_mem_region(r->start, r->end - r->start + 1, dev->name);
+       r = request_mem_region(r->start, resource_size(r), dev->name);
        if (r == NULL) {
                dev_err(&dev->dev, "failed to request I/O memory\n");
                ret = -EBUSY;
                goto failed_fbi;
        }
 
-       fbi->mmio_base = ioremap(r->start, r->end - r->start + 1);
+       fbi->mmio_base = ioremap(r->start, resource_size(r));
        if (fbi->mmio_base == NULL) {
                dev_err(&dev->dev, "failed to map I/O memory\n");
                ret = -EBUSY;
@@ -2197,7 +2202,7 @@ failed_free_dma:
 failed_free_io:
        iounmap(fbi->mmio_base);
 failed_free_res:
-       release_mem_region(r->start, r->end - r->start + 1);
+       release_mem_region(r->start, resource_size(r));
 failed_fbi:
        clk_put(fbi->clk);
        platform_set_drvdata(dev, NULL);
@@ -2237,7 +2242,7 @@ static int __devexit pxafb_remove(struct platform_device *dev)
        iounmap(fbi->mmio_base);
 
        r = platform_get_resource(dev, IORESOURCE_MEM, 0);
-       release_mem_region(r->start, r->end - r->start + 1);
+       release_mem_region(r->start, resource_size(r));
 
        clk_put(fbi->clk);
        kfree(fbi);
@@ -2248,11 +2253,12 @@ static int __devexit pxafb_remove(struct platform_device *dev)
 static struct platform_driver pxafb_driver = {
        .probe          = pxafb_probe,
        .remove         = __devexit_p(pxafb_remove),
-       .suspend        = pxafb_suspend,
-       .resume         = pxafb_resume,
        .driver         = {
                .owner  = THIS_MODULE,
                .name   = "pxa2xx-fb",
+#ifdef CONFIG_PM
+               .pm     = &pxafb_pm_ops,
+#endif
        },
 };
 
index ba3d71f..9554ad5 100644 (file)
@@ -702,7 +702,7 @@ static int vlynq_probe(struct platform_device *pdev)
        dev->mem_start = mem_res->start;
        dev->mem_end = mem_res->end;
 
-       len = regs_res->end - regs_res->start;
+       len = resource_size(regs_res);
        if (!request_mem_region(regs_res->start, len, dev_name(&dev->dev))) {
                printk(KERN_ERR "%s: Can't request vlynq registers\n",
                       dev_name(&dev->dev));
index 798cb07..3f57ce4 100644 (file)
@@ -19,9 +19,6 @@ static int
 adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh,
               int create)
 {
-       if (block < 0)
-               goto abort_negative;
-
        if (!create) {
                if (block >= inode->i_blocks)
                        goto abort_toobig;
@@ -34,10 +31,6 @@ adfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh,
        /* don't support allocation of blocks yet */
        return -EIO;
 
-abort_negative:
-       adfs_error(inode->i_sb, "block %d < 0", block);
-       return -EIO;
-
 abort_toobig:
        return 0;
 }
index 9fe1b1b..96d394b 100644 (file)
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -18,7 +18,7 @@
 /* Taken over from the old code... */
 
 /* POSIX UID/GID verification for setting inode attributes. */
-int inode_change_ok(struct inode *inode, struct iattr *attr)
+int inode_change_ok(const struct inode *inode, struct iattr *attr)
 {
        int retval = -EPERM;
        unsigned int ia_valid = attr->ia_valid;
@@ -60,9 +60,51 @@ fine:
 error:
        return retval;
 }
-
 EXPORT_SYMBOL(inode_change_ok);
 
+/**
+ * inode_newsize_ok - may this inode be truncated to a given size
+ * @inode:     the inode to be truncated
+ * @offset:    the new size to assign to the inode
+ * @Returns:   0 on success, -ve errno on failure
+ *
+ * inode_newsize_ok will check filesystem limits and ulimits to check that the
+ * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
+ * when necessary. Caller must not proceed with inode size change if failure is
+ * returned. @inode must be a file (not directory), with appropriate
+ * permissions to allow truncate (inode_newsize_ok does NOT check these
+ * conditions).
+ *
+ * inode_newsize_ok must be called with i_mutex held.
+ */
+int inode_newsize_ok(const struct inode *inode, loff_t offset)
+{
+       if (inode->i_size < offset) {
+               unsigned long limit;
+
+               limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+               if (limit != RLIM_INFINITY && offset > limit)
+                       goto out_sig;
+               if (offset > inode->i_sb->s_maxbytes)
+                       goto out_big;
+       } else {
+               /*
+                * truncation of in-use swapfiles is disallowed - it would
+                * cause subsequent swapout to scribble on the now-freed
+                * blocks.
+                */
+               if (IS_SWAPFILE(inode))
+                       return -ETXTBSY;
+       }
+
+       return 0;
+out_sig:
+       send_sig(SIGXFSZ, current, 0);
+out_big:
+       return -EFBIG;
+}
+EXPORT_SYMBOL(inode_newsize_ok);
+
 int inode_setattr(struct inode * inode, struct iattr * attr)
 {
        unsigned int ia_valid = attr->ia_valid;
index dd376c1..33baf27 100644 (file)
@@ -737,12 +737,7 @@ befs_put_super(struct super_block *sb)
 {
        kfree(BEFS_SB(sb)->mount_opts.iocharset);
        BEFS_SB(sb)->mount_opts.iocharset = NULL;
-
-       if (BEFS_SB(sb)->nls) {
-               unload_nls(BEFS_SB(sb)->nls);
-               BEFS_SB(sb)->nls = NULL;
-       }
-
+       unload_nls(BEFS_SB(sb)->nls);
        kfree(sb->s_fs_info);
        sb->s_fs_info = NULL;
 }
index 442d94f..b9b3bb5 100644 (file)
@@ -1711,42 +1711,52 @@ struct elf_note_info {
        int numnote;
 };
 
-static int fill_note_info(struct elfhdr *elf, int phdrs,
-                         struct elf_note_info *info,
-                         long signr, struct pt_regs *regs)
+static int elf_note_info_init(struct elf_note_info *info)
 {
-#define        NUM_NOTES       6
-       struct list_head *t;
-
-       info->notes = NULL;
-       info->prstatus = NULL;
-       info->psinfo = NULL;
-       info->fpu = NULL;
-#ifdef ELF_CORE_COPY_XFPREGS
-       info->xfpu = NULL;
-#endif
+       memset(info, 0, sizeof(*info));
        INIT_LIST_HEAD(&info->thread_list);
 
-       info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
-                             GFP_KERNEL);
+       /* Allocate space for six ELF notes */
+       info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
        if (!info->notes)
                return 0;
        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
        if (!info->psinfo)
-               return 0;
+               goto notes_free;
        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
        if (!info->prstatus)
-               return 0;
+               goto psinfo_free;
        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
        if (!info->fpu)
-               return 0;
+               goto prstatus_free;
 #ifdef ELF_CORE_COPY_XFPREGS
        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
        if (!info->xfpu)
-               return 0;
+               goto fpu_free;
+#endif
+       return 1;
+#ifdef ELF_CORE_COPY_XFPREGS
+ fpu_free:
+       kfree(info->fpu);
 #endif
+ prstatus_free:
+       kfree(info->prstatus);
+ psinfo_free:
+       kfree(info->psinfo);
+ notes_free:
+       kfree(info->notes);
+       return 0;
+}
+
+static int fill_note_info(struct elfhdr *elf, int phdrs,
+                         struct elf_note_info *info,
+                         long signr, struct pt_regs *regs)
+{
+       struct list_head *t;
+
+       if (!elf_note_info_init(info))
+               return 0;
 
-       info->thread_status_size = 0;
        if (signr) {
                struct core_thread *ct;
                struct elf_thread_status *ets;
@@ -1806,8 +1816,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 #endif
 
        return 1;
-
-#undef NUM_NOTES
 }
 
 static size_t get_note_info_size(struct elf_note_info *info)
index 7628547..38502c6 100644 (file)
@@ -283,20 +283,23 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
        }
 
        stack_size = exec_params.stack_size;
-       if (stack_size < interp_params.stack_size)
-               stack_size = interp_params.stack_size;
-
        if (exec_params.flags & ELF_FDPIC_FLAG_EXEC_STACK)
                executable_stack = EXSTACK_ENABLE_X;
        else if (exec_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK)
                executable_stack = EXSTACK_DISABLE_X;
-       else if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK)
-               executable_stack = EXSTACK_ENABLE_X;
-       else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK)
-               executable_stack = EXSTACK_DISABLE_X;
        else
                executable_stack = EXSTACK_DEFAULT;
 
+       if (stack_size == 0) {
+               stack_size = interp_params.stack_size;
+               if (interp_params.flags & ELF_FDPIC_FLAG_EXEC_STACK)
+                       executable_stack = EXSTACK_ENABLE_X;
+               else if (interp_params.flags & ELF_FDPIC_FLAG_NOEXEC_STACK)
+                       executable_stack = EXSTACK_DISABLE_X;
+               else
+                       executable_stack = EXSTACK_DEFAULT;
+       }
+
        retval = -ENOEXEC;
        if (stack_size == 0)
                goto error;
index e92f229..a279665 100644 (file)
@@ -278,8 +278,6 @@ static int decompress_exec(
                ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos);
                if (ret <= 0)
                        break;
-               if (ret >= (unsigned long) -4096)
-                       break;
                len -= ret;
 
                strm.next_in = buf;
@@ -335,7 +333,7 @@ calc_reloc(unsigned long r, struct lib_info *p, int curid, int internalp)
                                        "(%d != %d)", (unsigned) r, curid, id);
                        goto failed;
                } else if ( ! p->lib_list[id].loaded &&
-                               load_flat_shared_library(id, p) > (unsigned long) -4096) {
+                               IS_ERR_VALUE(load_flat_shared_library(id, p))) {
                        printk("BINFMT_FLAT: failed to load library %d", id);
                        goto failed;
                }
@@ -545,7 +543,7 @@ static int load_flat_file(struct linux_binprm * bprm,
                textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
                                  MAP_PRIVATE|MAP_EXECUTABLE, 0);
                up_write(&current->mm->mmap_sem);
-               if (!textpos  || textpos >= (unsigned long) -4096) {
+               if (!textpos || IS_ERR_VALUE(textpos)) {
                        if (!textpos)
                                textpos = (unsigned long) -ENOMEM;
                        printk("Unable to mmap process text, errno %d\n", (int)-textpos);
@@ -560,7 +558,7 @@ static int load_flat_file(struct linux_binprm * bprm,
                        PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
                up_write(&current->mm->mmap_sem);
 
-               if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) {
+               if (realdatastart == 0 || IS_ERR_VALUE(realdatastart)) {
                        if (!realdatastart)
                                realdatastart = (unsigned long) -ENOMEM;
                        printk("Unable to allocate RAM for process data, errno %d\n",
@@ -587,7 +585,7 @@ static int load_flat_file(struct linux_binprm * bprm,
                        result = bprm->file->f_op->read(bprm->file, (char *) datapos,
                                        data_len + (relocs * sizeof(unsigned long)), &fpos);
                }
-               if (result >= (unsigned long)-4096) {
+               if (IS_ERR_VALUE(result)) {
                        printk("Unable to read data+bss, errno %d\n", (int)-result);
                        do_munmap(current->mm, textpos, text_len);
                        do_munmap(current->mm, realdatastart, data_len + extra);
@@ -607,7 +605,7 @@ static int load_flat_file(struct linux_binprm * bprm,
                        PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
                up_write(&current->mm->mmap_sem);
 
-               if (!textpos  || textpos >= (unsigned long) -4096) {
+               if (!textpos || IS_ERR_VALUE(textpos)) {
                        if (!textpos)
                                textpos = (unsigned long) -ENOMEM;
                        printk("Unable to allocate RAM for process text/data, errno %d\n",
@@ -641,7 +639,7 @@ static int load_flat_file(struct linux_binprm * bprm,
                        fpos = 0;
                        result = bprm->file->f_op->read(bprm->file,
                                        (char *) textpos, text_len, &fpos);
-                       if (result < (unsigned long) -4096)
+                       if (!IS_ERR_VALUE(result))
                                result = decompress_exec(bprm, text_len, (char *) datapos,
                                                 data_len + (relocs * sizeof(unsigned long)), 0);
                }
@@ -651,13 +649,13 @@ static int load_flat_file(struct linux_binprm * bprm,
                        fpos = 0;
                        result = bprm->file->f_op->read(bprm->file,
                                        (char *) textpos, text_len, &fpos);
-                       if (result < (unsigned long) -4096) {
+                       if (!IS_ERR_VALUE(result)) {
                                fpos = ntohl(hdr->data_start);
                                result = bprm->file->f_op->read(bprm->file, (char *) datapos,
                                        data_len + (relocs * sizeof(unsigned long)), &fpos);
                        }
                }
-               if (result >= (unsigned long)-4096) {
+               if (IS_ERR_VALUE(result)) {
                        printk("Unable to read code+data+bss, errno %d\n",(int)-result);
                        do_munmap(current->mm, textpos, text_len + data_len + extra +
                                MAX_SHARED_LIBS * sizeof(unsigned long));
@@ -835,7 +833,7 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
 
        res = prepare_binprm(&bprm);
 
-       if (res <= (unsigned long)-4096)
+       if (!IS_ERR_VALUE(res))
                res = load_flat_file(&bprm, libs, id, NULL);
 
        abort_creds(bprm.cred);
@@ -880,7 +878,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
        stack_len += FLAT_DATA_ALIGN - 1;  /* reserve for upcoming alignment */
        
        res = load_flat_file(bprm, &libinfo, 0, &stack_len);
-       if (res > (unsigned long)-4096)
+       if (IS_ERR_VALUE(res))
                return res;
        
        /* Update data segment pointers for all libraries */
index 5d1ed50..9cf4b92 100644 (file)
@@ -216,8 +216,6 @@ EXPORT_SYMBOL(fsync_bdev);
  * freeze_bdev  --  lock a filesystem and force it into a consistent state
  * @bdev:      blockdevice to lock
  *
- * This takes the block device bd_mount_sem to make sure no new mounts
- * happen on bdev until thaw_bdev() is called.
  * If a superblock is found on this device, we take the s_umount semaphore
  * on it to make sure nobody unmounts until the snapshot creation is done.
  * The reference counter (bd_fsfreeze_count) guarantees that only the last
@@ -232,46 +230,55 @@ struct super_block *freeze_bdev(struct block_device *bdev)
        int error = 0;
 
        mutex_lock(&bdev->bd_fsfreeze_mutex);
-       if (bdev->bd_fsfreeze_count > 0) {
-               bdev->bd_fsfreeze_count++;
+       if (++bdev->bd_fsfreeze_count > 1) {
+               /*
+                * We don't even need to grab a reference - the first call
+                * to freeze_bdev grab an active reference and only the last
+                * thaw_bdev drops it.
+                */
                sb = get_super(bdev);
+               drop_super(sb);
                mutex_unlock(&bdev->bd_fsfreeze_mutex);
                return sb;
        }
-       bdev->bd_fsfreeze_count++;
-
-       down(&bdev->bd_mount_sem);
-       sb = get_super(bdev);
-       if (sb && !(sb->s_flags & MS_RDONLY)) {
-               sb->s_frozen = SB_FREEZE_WRITE;
-               smp_wmb();
-
-               sync_filesystem(sb);
-
-               sb->s_frozen = SB_FREEZE_TRANS;
-               smp_wmb();
-
-               sync_blockdev(sb->s_bdev);
-
-               if (sb->s_op->freeze_fs) {
-                       error = sb->s_op->freeze_fs(sb);
-                       if (error) {
-                               printk(KERN_ERR
-                                       "VFS:Filesystem freeze failed\n");
-                               sb->s_frozen = SB_UNFROZEN;
-                               drop_super(sb);
-                               up(&bdev->bd_mount_sem);
-                               bdev->bd_fsfreeze_count--;
-                               mutex_unlock(&bdev->bd_fsfreeze_mutex);
-                               return ERR_PTR(error);
-                       }
+
+       sb = get_active_super(bdev);
+       if (!sb)
+               goto out;
+       if (sb->s_flags & MS_RDONLY) {
+               deactivate_locked_super(sb);
+               mutex_unlock(&bdev->bd_fsfreeze_mutex);
+               return sb;
+       }
+
+       sb->s_frozen = SB_FREEZE_WRITE;
+       smp_wmb();
+
+       sync_filesystem(sb);
+
+       sb->s_frozen = SB_FREEZE_TRANS;
+       smp_wmb();
+
+       sync_blockdev(sb->s_bdev);
+
+       if (sb->s_op->freeze_fs) {
+               error = sb->s_op->freeze_fs(sb);
+               if (error) {
+                       printk(KERN_ERR
+                               "VFS:Filesystem freeze failed\n");
+                       sb->s_frozen = SB_UNFROZEN;
+                       deactivate_locked_super(sb);
+                       bdev->bd_fsfreeze_count--;
+                       mutex_unlock(&bdev->bd_fsfreeze_mutex);
+                       return ERR_PTR(error);
                }
        }
+       up_write(&sb->s_umount);
 
+ out:
        sync_blockdev(bdev);
        mutex_unlock(&bdev->bd_fsfreeze_mutex);
-
-       return sb;      /* thaw_bdev releases s->s_umount and bd_mount_sem */
+       return sb;      /* thaw_bdev releases s->s_umount */
 }
 EXPORT_SYMBOL(freeze_bdev);
 
@@ -284,44 +291,44 @@ EXPORT_SYMBOL(freeze_bdev);
  */
 int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 {
-       int error = 0;
+       int error = -EINVAL;
 
        mutex_lock(&bdev->bd_fsfreeze_mutex);
-       if (!bdev->bd_fsfreeze_count) {
-               mutex_unlock(&bdev->bd_fsfreeze_mutex);
-               return -EINVAL;
-       }
-
-       bdev->bd_fsfreeze_count--;
-       if (bdev->bd_fsfreeze_count > 0) {
-               if (sb)
-                       drop_super(sb);
-               mutex_unlock(&bdev->bd_fsfreeze_mutex);
-               return 0;
-       }
-
-       if (sb) {
-               BUG_ON(sb->s_bdev != bdev);
-               if (!(sb->s_flags & MS_RDONLY)) {
-                       if (sb->s_op->unfreeze_fs) {
-                               error = sb->s_op->unfreeze_fs(sb);
-                               if (error) {
-                                       printk(KERN_ERR
-                                               "VFS:Filesystem thaw failed\n");
-                                       sb->s_frozen = SB_FREEZE_TRANS;
-                                       bdev->bd_fsfreeze_count++;
-                                       mutex_unlock(&bdev->bd_fsfreeze_mutex);
-                                       return error;
-                               }
-                       }
-                       sb->s_frozen = SB_UNFROZEN;
-                       smp_wmb();
-                       wake_up(&sb->s_wait_unfrozen);
+       if (!bdev->bd_fsfreeze_count)
+               goto out_unlock;
+
+       error = 0;
+       if (--bdev->bd_fsfreeze_count > 0)
+               goto out_unlock;
+
+       if (!sb)
+               goto out_unlock;
+
+       BUG_ON(sb->s_bdev != bdev);
+       down_write(&sb->s_umount);
+       if (sb->s_flags & MS_RDONLY)
+               goto out_deactivate;
+
+       if (sb->s_op->unfreeze_fs) {
+               error = sb->s_op->unfreeze_fs(sb);
+               if (error) {
+                       printk(KERN_ERR
+                               "VFS:Filesystem thaw failed\n");
+                       sb->s_frozen = SB_FREEZE_TRANS;
+                       bdev->bd_fsfreeze_count++;
+                       mutex_unlock(&bdev->bd_fsfreeze_mutex);
+                       return error;
                }
-               drop_super(sb);
        }
 
-       up(&bdev->bd_mount_sem);
+       sb->s_frozen = SB_UNFROZEN;
+       smp_wmb();
+       wake_up(&sb->s_wait_unfrozen);
+
+out_deactivate:
+       if (sb)
+               deactivate_locked_super(sb);
+out_unlock:
        mutex_unlock(&bdev->bd_fsfreeze_mutex);
        return 0;
 }
@@ -430,7 +437,6 @@ static void init_once(void *foo)
 
        memset(bdev, 0, sizeof(*bdev));
        mutex_init(&bdev->bd_mutex);
-       sema_init(&bdev->bd_mount_sem, 1);
        INIT_LIST_HEAD(&bdev->bd_inodes);
        INIT_LIST_HEAD(&bdev->bd_list);
 #ifdef CONFIG_SYSFS
index 019e8af..282ca08 100644 (file)
@@ -48,6 +48,9 @@ struct btrfs_worker_thread {
        /* number of things on the pending list */
        atomic_t num_pending;
 
+       /* reference counter for this struct */
+       atomic_t refs;
+
        unsigned long sequence;
 
        /* protects the pending list. */
@@ -71,7 +74,12 @@ static void check_idle_worker(struct btrfs_worker_thread *worker)
                unsigned long flags;
                spin_lock_irqsave(&worker->workers->lock, flags);
                worker->idle = 1;
-               list_move(&worker->worker_list, &worker->workers->idle_list);
+
+               /* the list may be empty if the worker is just starting */
+               if (!list_empty(&worker->worker_list)) {
+                       list_move(&worker->worker_list,
+                                &worker->workers->idle_list);
+               }
                spin_unlock_irqrestore(&worker->workers->lock, flags);
        }
 }
@@ -87,23 +95,49 @@ static void check_busy_worker(struct btrfs_worker_thread *worker)
                unsigned long flags;
                spin_lock_irqsave(&worker->workers->lock, flags);
                worker->idle = 0;
-               list_move_tail(&worker->worker_list,
-                              &worker->workers->worker_list);
+
+               if (!list_empty(&worker->worker_list)) {
+                       list_move_tail(&worker->worker_list,
+                                     &worker->workers->worker_list);
+               }
                spin_unlock_irqrestore(&worker->workers->lock, flags);
        }
 }
 
-static noinline int run_ordered_completions(struct btrfs_workers *workers,
-                                           struct btrfs_work *work)
+static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
 {
+       struct btrfs_workers *workers = worker->workers;
        unsigned long flags;
 
+       rmb();
+       if (!workers->atomic_start_pending)
+               return;
+
+       spin_lock_irqsave(&workers->lock, flags);
+       if (!workers->atomic_start_pending)
+               goto out;
+
+       workers->atomic_start_pending = 0;
+       if (workers->num_workers >= workers->max_workers)
+               goto out;
+
+       spin_unlock_irqrestore(&workers->lock, flags);
+       btrfs_start_workers(workers, 1);
+       return;
+
+out:
+       spin_unlock_irqrestore(&workers->lock, flags);
+}
+
+static noinline int run_ordered_completions(struct btrfs_workers *workers,
+                                           struct btrfs_work *work)
+{
        if (!workers->ordered)
                return 0;
 
        set_bit(WORK_DONE_BIT, &work->flags);
 
-       spin_lock_irqsave(&workers->lock, flags);
+       spin_lock(&workers->order_lock);
 
        while (1) {
                if (!list_empty(&workers->prio_order_list)) {
@@ -126,45 +160,118 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers,
                if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
                        break;
 
-               spin_unlock_irqrestore(&workers->lock, flags);
+               spin_unlock(&workers->order_lock);
 
                work->ordered_func(work);
 
                /* now take the lock again and call the freeing code */
-               spin_lock_irqsave(&workers->lock, flags);
+               spin_lock(&workers->order_lock);
                list_del(&work->order_list);
                work->ordered_free(work);
        }
 
-       spin_unlock_irqrestore(&workers->lock, flags);
+       spin_unlock(&workers->order_lock);
        return 0;
 }
 
+static void put_worker(struct btrfs_worker_thread *worker)
+{
+       if (atomic_dec_and_test(&worker->refs))
+               kfree(worker);
+}
+
+static int try_worker_shutdown(struct btrfs_worker_thread *worker)
+{
+       int freeit = 0;
+
+       spin_lock_irq(&worker->lock);
+       spin_lock(&worker->workers->lock);
+       if (worker->workers->num_workers > 1 &&
+           worker->idle &&
+           !worker->working &&
+           !list_empty(&worker->worker_list) &&
+           list_empty(&worker->prio_pending) &&
+           list_empty(&worker->pending) &&
+           atomic_read(&worker->num_pending) == 0) {
+               freeit = 1;
+               list_del_init(&worker->worker_list);
+               worker->workers->num_workers--;
+       }
+       spin_unlock(&worker->workers->lock);
+       spin_unlock_irq(&worker->lock);
+
+       if (freeit)
+               put_worker(worker);
+       return freeit;
+}
+
+static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker,
+                                       struct list_head *prio_head,
+                                       struct list_head *head)
+{
+       struct btrfs_work *work = NULL;
+       struct list_head *cur = NULL;
+
+       if(!list_empty(prio_head))
+               cur = prio_head->next;
+
+       smp_mb();
+       if (!list_empty(&worker->prio_pending))
+               goto refill;
+
+       if (!list_empty(head))
+               cur = head->next;
+
+       if (cur)
+               goto out;
+
+refill:
+       spin_lock_irq(&worker->lock);
+       list_splice_tail_init(&worker->prio_pending, prio_head);
+       list_splice_tail_init(&worker->pending, head);
+
+       if (!list_empty(prio_head))
+               cur = prio_head->next;
+       else if (!list_empty(head))
+               cur = head->next;
+       spin_unlock_irq(&worker->lock);
+
+       if (!cur)
+               goto out_fail;
+
+out:
+       work = list_entry(cur, struct btrfs_work, list);
+
+out_fail:
+       return work;
+}
+
 /*
  * main loop for servicing work items
  */
 static int worker_loop(void *arg)
 {
        struct btrfs_worker_thread *worker = arg;
-       struct list_head *cur;
+       struct list_head head;
+       struct list_head prio_head;
        struct btrfs_work *work;
+
+       INIT_LIST_HEAD(&head);
+       INIT_LIST_HEAD(&prio_head);
+
        do {
-               spin_lock_irq(&worker->lock);
-again_locked:
+again:
                while (1) {
-                       if (!list_empty(&worker->prio_pending))
-                               cur = worker->prio_pending.next;
-                       else if (!list_empty(&worker->pending))
-                               cur = worker->pending.next;
-                       else
+
+
+                       work = get_next_work(worker, &prio_head, &head);
+                       if (!work)
                                break;
 
-                       work = list_entry(cur, struct btrfs_work, list);
                        list_del(&work->list);
                        clear_bit(WORK_QUEUED_BIT, &work->flags);
 
                        work->worker = worker;
-                       spin_unlock_irq(&worker->lock);
 
                        work->func(work);
 
@@ -175,9 +282,13 @@ again_locked:
                         */
                        run_ordered_completions(worker->workers, work);
 
-                       spin_lock_irq(&worker->lock);
-                       check_idle_worker(worker);
+                       check_pending_worker_creates(worker);
+
                }
+
+               spin_lock_irq(&worker->lock);
+               check_idle_worker(worker);
+
                if (freezing(current)) {
                        worker->working = 0;
                        spin_unlock_irq(&worker->lock);
@@ -216,8 +327,10 @@ again_locked:
                                spin_lock_irq(&worker->lock);
                                set_current_state(TASK_INTERRUPTIBLE);
                                if (!list_empty(&worker->pending) ||
-                                   !list_empty(&worker->prio_pending))
-                                       goto again_locked;
+                                   !list_empty(&worker->prio_pending)) {
+                                       spin_unlock_irq(&worker->lock);
+                                       goto again;
+                               }
 
                                /*
                                 * this makes sure we get a wakeup when someone
@@ -226,8 +339,13 @@ again_locked:
                                worker->working = 0;
                                spin_unlock_irq(&worker->lock);
 
-                               if (!kthread_should_stop())
-                                       schedule();
+                               if (!kthread_should_stop()) {
+                                       schedule_timeout(HZ * 120);
+                                       if (!worker->working &&
+                                           try_worker_shutdown(worker)) {
+                                               return 0;
+                                       }
+                               }
                        }
                        __set_current_state(TASK_RUNNING);
                }
@@ -242,16 +360,30 @@ int btrfs_stop_workers(struct btrfs_workers *workers)
 {
        struct list_head *cur;
        struct btrfs_worker_thread *worker;
+       int can_stop;
 
+       spin_lock_irq(&workers->lock);
        list_splice_init(&workers->idle_list, &workers->worker_list);
        while (!list_empty(&workers->worker_list)) {
                cur = workers->worker_list.next;
                worker = list_entry(cur, struct btrfs_worker_thread,
                                    worker_list);
-               kthread_stop(worker->task);
-               list_del(&worker->worker_list);
-               kfree(worker);
+
+               atomic_inc(&worker->refs);
+               workers->num_workers -= 1;
+               if (!list_empty(&worker->worker_list)) {
+                       list_del_init(&worker->worker_list);
+                       put_worker(worker);
+                       can_stop = 1;
+               } else
+                       can_stop = 0;
+               spin_unlock_irq(&workers->lock);
+               if (can_stop)
+                       kthread_stop(worker->task);
+               spin_lock_irq(&workers->lock);
+               put_worker(worker);
        }
+       spin_unlock_irq(&workers->lock);
        return 0;
 }
 
@@ -266,10 +398,13 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
        INIT_LIST_HEAD(&workers->order_list);
        INIT_LIST_HEAD(&workers->prio_order_list);
        spin_lock_init(&workers->lock);
+       spin_lock_init(&workers->order_lock);
        workers->max_workers = max;
        workers->idle_thresh = 32;
        workers->name = name;
        workers->ordered = 0;
+       workers->atomic_start_pending = 0;
+       workers->atomic_worker_start = 0;
 }
 
 /*
@@ -293,7 +428,9 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
                INIT_LIST_HEAD(&worker->prio_pending);
                INIT_LIST_HEAD(&worker->worker_list);
                spin_lock_init(&worker->lock);
+
                atomic_set(&worker->num_pending, 0);
+               atomic_set(&worker->refs, 1);
                worker->workers = workers;
                worker->task = kthread_run(worker_loop, worker,
                                           "btrfs-%s-%d", workers->name,
@@ -303,7 +440,6 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
                        kfree(worker);
                        goto fail;
                }
-
                spin_lock_irq(&workers->lock);
                list_add_tail(&worker->worker_list, &workers->idle_list);
                worker->idle = 1;
@@ -350,7 +486,6 @@ static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
         */
        next = workers->worker_list.next;
        worker = list_entry(next, struct btrfs_worker_thread, worker_list);
-       atomic_inc(&worker->num_pending);
        worker->sequence++;
 
        if (worker->sequence % workers->idle_thresh == 0)
@@ -367,28 +502,18 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
 {
        struct btrfs_worker_thread *worker;
        unsigned long flags;
+       struct list_head *fallback;
 
 again:
        spin_lock_irqsave(&workers->lock, flags);
        worker = next_worker(workers);
-       spin_unlock_irqrestore(&workers->lock, flags);
 
        if (!worker) {
-               spin_lock_irqsave(&workers->lock, flags);
                if (workers->num_workers >= workers->max_workers) {
-                       struct list_head *fallback = NULL;
-                       /*
-                        * we have failed to find any workers, just
-                        * return the force one
-                        */
-                       if (!list_empty(&workers->worker_list))
-                               fallback = workers->worker_list.next;
-                       if (!list_empty(&workers->idle_list))
-                               fallback = workers->idle_list.next;
-                       BUG_ON(!fallback);
-                       worker = list_entry(fallback,
-                                 struct btrfs_worker_thread, worker_list);
-                       spin_unlock_irqrestore(&workers->lock, flags);
+                       goto fallback;
+               } else if (workers->atomic_worker_start) {
+                       workers->atomic_start_pending = 1;
+                       goto fallback;
                } else {
                        spin_unlock_irqrestore(&workers->lock, flags);
                        /* we're below the limit, start another worker */
@@ -396,6 +521,28 @@ again:
                        goto again;
                }
        }
+       goto found;
+
+fallback:
+       fallback = NULL;
+       /*
+        * we have failed to find any workers, just
+        * return the first one we can find.
+        */
+       if (!list_empty(&workers->worker_list))
+               fallback = workers->worker_list.next;
+       if (!list_empty(&workers->idle_list))
+               fallback = workers->idle_list.next;
+       BUG_ON(!fallback);
+       worker = list_entry(fallback,
+                 struct btrfs_worker_thread, worker_list);
+found:
+       /*
+        * this makes sure the worker doesn't exit before it is placed
+        * onto a busy/idle list
+        */
+       atomic_inc(&worker->num_pending);
+       spin_unlock_irqrestore(&workers->lock, flags);
        return worker;
 }
 
@@ -427,7 +574,7 @@ int btrfs_requeue_work(struct btrfs_work *work)
                spin_lock(&worker->workers->lock);
                worker->idle = 0;
                list_move_tail(&worker->worker_list,
-                              &worker->workers->worker_list);
+                             &worker->workers->worker_list);
                spin_unlock(&worker->workers->lock);
        }
        if (!worker->working) {
@@ -435,9 +582,9 @@ int btrfs_requeue_work(struct btrfs_work *work)
                worker->working = 1;
        }
 
-       spin_unlock_irqrestore(&worker->lock, flags);
        if (wake)
                wake_up_process(worker->task);
+       spin_unlock_irqrestore(&worker->lock, flags);
 out:
 
        return 0;
@@ -463,14 +610,18 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
 
        worker = find_worker(workers);
        if (workers->ordered) {
-               spin_lock_irqsave(&workers->lock, flags);
+               /*
+                * you're not allowed to do ordered queues from an
+                * interrupt handler
+                */
+               spin_lock(&workers->order_lock);
                if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) {
                        list_add_tail(&work->order_list,
                                      &workers->prio_order_list);
                } else {
                        list_add_tail(&work->order_list, &workers->order_list);
                }
-               spin_unlock_irqrestore(&workers->lock, flags);
+               spin_unlock(&workers->order_lock);
        } else {
                INIT_LIST_HEAD(&work->order_list);
        }
@@ -481,7 +632,6 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
                list_add_tail(&work->list, &worker->prio_pending);
        else
                list_add_tail(&work->list, &worker->pending);
-       atomic_inc(&worker->num_pending);
        check_busy_worker(worker);
 
        /*
@@ -492,10 +642,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
                wake = 1;
        worker->working = 1;
 
-       spin_unlock_irqrestore(&worker->lock, flags);
-
        if (wake)
                wake_up_process(worker->task);
+       spin_unlock_irqrestore(&worker->lock, flags);
+
 out:
        return 0;
 }
index 1b511c1..fc089b9 100644 (file)
@@ -73,6 +73,15 @@ struct btrfs_workers {
        /* force completions in the order they were queued */
        int ordered;
 
+       /* more workers required, but in an interrupt handler */
+       int atomic_start_pending;
+
+       /*
+        * are we allowed to sleep while starting workers or are we required
+        * to start them at a later time?
+        */
+       int atomic_worker_start;
+
        /* list with all the work threads.  The workers on the idle thread
         * may be actively servicing jobs, but they haven't yet hit the
         * idle thresh limit above.
@@ -90,6 +99,9 @@ struct btrfs_workers {
        /* lock for finding the next worker thread to queue on */
        spinlock_t lock;
 
+       /* lock for the ordered lists */
+       spinlock_t order_lock;
+
        /* extra name for this worker, used for current->name */
        char *name;
 };
index ea1ea0a..82ee56b 100644 (file)
@@ -138,6 +138,7 @@ struct btrfs_inode {
         * of these.
         */
        unsigned ordered_data_close:1;
+       unsigned dummy_inode:1;
 
        struct inode vfs_inode;
 };
index 9d8ba4d..a11a320 100644 (file)
@@ -506,10 +506,10 @@ static noinline int add_ra_bio_pages(struct inode *inode,
                 */
                set_page_extent_mapped(page);
                lock_extent(tree, last_offset, end, GFP_NOFS);
-               spin_lock(&em_tree->lock);
+               read_lock(&em_tree->lock);
                em = lookup_extent_mapping(em_tree, last_offset,
                                           PAGE_CACHE_SIZE);
-               spin_unlock(&em_tree->lock);
+               read_unlock(&em_tree->lock);
 
                if (!em || last_offset < em->start ||
                    (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
@@ -593,11 +593,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        em_tree = &BTRFS_I(inode)->extent_tree;
 
        /* we need the actual starting offset of this extent in the file */
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree,
                                   page_offset(bio->bi_io_vec->bv_page),
                                   PAGE_CACHE_SIZE);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
 
        compressed_len = em->block_len;
        cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
index 3fdcc05..ec96f3a 100644 (file)
@@ -2853,6 +2853,12 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
        int split;
        int num_doubles = 0;
 
+       l = path->nodes[0];
+       slot = path->slots[0];
+       if (extend && data_size + btrfs_item_size_nr(l, slot) +
+           sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root))
+               return -EOVERFLOW;
+
        /* first try to make some room by pushing left and right */
        if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
                wret = push_leaf_right(trans, root, path, data_size, 0);
index 837435c..80599b4 100644 (file)
@@ -114,6 +114,10 @@ struct btrfs_ordered_sum;
  */
 #define BTRFS_DEV_ITEMS_OBJECTID 1ULL
 
+#define BTRFS_BTREE_INODE_OBJECTID 1
+
+#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
+
 /*
  * we can actually store much bigger names, but lets not confuse the rest
  * of linux
@@ -670,6 +674,7 @@ struct btrfs_space_info {
        u64 bytes_reserved;     /* total bytes the allocator has reserved for
                                   current allocations */
        u64 bytes_readonly;     /* total bytes that are read only */
+       u64 bytes_super;        /* total bytes reserved for the super blocks */
 
        /* delalloc accounting */
        u64 bytes_delalloc;     /* number of bytes reserved for allocation,
@@ -726,6 +731,15 @@ enum btrfs_caching_type {
        BTRFS_CACHE_FINISHED    = 2,
 };
 
+struct btrfs_caching_control {
+       struct list_head list;
+       struct mutex mutex;
+       wait_queue_head_t wait;
+       struct btrfs_block_group_cache *block_group;
+       u64 progress;
+       atomic_t count;
+};
+
 struct btrfs_block_group_cache {
        struct btrfs_key key;
        struct btrfs_block_group_item item;
@@ -733,6 +747,7 @@ struct btrfs_block_group_cache {
        spinlock_t lock;
        u64 pinned;
        u64 reserved;
+       u64 bytes_super;
        u64 flags;
        u64 sectorsize;
        int extents_thresh;
@@ -742,8 +757,9 @@ struct btrfs_block_group_cache {
        int dirty;
 
        /* cache tracking stuff */
-       wait_queue_head_t caching_q;
        int cached;
+       struct btrfs_caching_control *caching_ctl;
+       u64 last_byte_to_unpin;
 
        struct btrfs_space_info *space_info;
 
@@ -782,13 +798,16 @@ struct btrfs_fs_info {
 
        /* the log root tree is a directory of all the other log roots */
        struct btrfs_root *log_root_tree;
+
+       spinlock_t fs_roots_radix_lock;
        struct radix_tree_root fs_roots_radix;
 
        /* block group cache stuff */
        spinlock_t block_group_cache_lock;
        struct rb_root block_group_cache_tree;
 
-       struct extent_io_tree pinned_extents;
+       struct extent_io_tree freed_extents[2];
+       struct extent_io_tree *pinned_extents;
 
        /* logical->physical extent mapping */
        struct btrfs_mapping_tree mapping_tree;
@@ -822,11 +841,7 @@ struct btrfs_fs_info {
        struct mutex transaction_kthread_mutex;
        struct mutex cleaner_mutex;
        struct mutex chunk_mutex;
-       struct mutex drop_mutex;
        struct mutex volume_mutex;
-       struct mutex tree_reloc_mutex;
-       struct rw_semaphore extent_commit_sem;
-
        /*
         * this protects the ordered operations list only while we are
         * processing all of the entries on it.  This way we make
@@ -835,10 +850,16 @@ struct btrfs_fs_info {
         * before jumping into the main commit.
         */
        struct mutex ordered_operations_mutex;
+       struct rw_semaphore extent_commit_sem;
+
+       struct rw_semaphore subvol_sem;
+
+       struct srcu_struct subvol_srcu;
 
        struct list_head trans_list;
        struct list_head hashers;
        struct list_head dead_roots;
+       struct list_head caching_block_groups;
 
        atomic_t nr_async_submits;
        atomic_t async_submit_draining;
@@ -996,10 +1017,12 @@ struct btrfs_root {
        u32 stripesize;
 
        u32 type;
-       u64 highest_inode;
-       u64 last_inode_alloc;
+
+       u64 highest_objectid;
        int ref_cows;
        int track_dirty;
+       int in_radix;
+
        u64 defrag_trans_start;
        struct btrfs_key defrag_progress;
        struct btrfs_key defrag_max;
@@ -1920,8 +1943,8 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root, unsigned long count);
 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
-int btrfs_update_pinned_extents(struct btrfs_root *root,
-                               u64 bytenr, u64 num, int pin);
+int btrfs_pin_extent(struct btrfs_root *root,
+                    u64 bytenr, u64 num, int reserved);
 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root, struct extent_buffer *leaf);
 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
@@ -1971,9 +1994,10 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
                      u64 root_objectid, u64 owner, u64 offset);
 
 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
+int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root);
 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
-                              struct btrfs_root *root,
-                              struct extent_io_tree *unpin);
+                              struct btrfs_root *root);
 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root,
                         u64 bytenr, u64 num_bytes, u64 parent,
@@ -1984,6 +2008,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
 int btrfs_free_block_groups(struct btrfs_fs_info *info);
 int btrfs_read_block_groups(struct btrfs_root *root);
+int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr);
 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root, u64 bytes_used,
                           u64 type, u64 chunk_objectid, u64 chunk_offset,
@@ -2006,7 +2031,6 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
                                 u64 bytes);
 void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
                              u64 bytes);
-void btrfs_free_pinned_extents(struct btrfs_fs_info *info);
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
                     int level, int *slot);
@@ -2100,12 +2124,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
                        struct extent_buffer *parent);
 /* root-item.c */
 int btrfs_find_root_ref(struct btrfs_root *tree_root,
-                  struct btrfs_path *path,
-                  u64 root_id, u64 ref_id);
+                       struct btrfs_path *path,
+                       u64 root_id, u64 ref_id);
 int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
                       struct btrfs_root *tree_root,
-                      u64 root_id, u8 type, u64 ref_id,
-                      u64 dirid, u64 sequence,
+                      u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
+                      const char *name, int name_len);
+int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
+                      struct btrfs_root *tree_root,
+                      u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
                       const char *name, int name_len);
 int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                   struct btrfs_key *key);
@@ -2120,6 +2147,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
 int btrfs_search_root(struct btrfs_root *root, u64 search_start,
                      u64 *found_objectid);
 int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
+int btrfs_find_orphan_roots(struct btrfs_root *tree_root);
 int btrfs_set_root_node(struct btrfs_root_item *item,
                        struct extent_buffer *node);
 /* dir-item.c */
@@ -2138,6 +2166,10 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
                            struct btrfs_path *path, u64 dir,
                            u64 objectid, const char *name, int name_len,
                            int mod);
+struct btrfs_dir_item *
+btrfs_search_dir_index_item(struct btrfs_root *root,
+                           struct btrfs_path *path, u64 dirid,
+                           const char *name, int name_len);
 struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
                              struct btrfs_path *path,
                              const char *name, int name_len);
@@ -2160,6 +2192,7 @@ int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root, u64 offset);
 int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root, u64 offset);
+int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset);
 
 /* inode-map.c */
 int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
@@ -2232,6 +2265,10 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
 int btrfs_add_link(struct btrfs_trans_handle *trans,
                   struct inode *parent_inode, struct inode *inode,
                   const char *name, int name_len, int add_backref, u64 index);
+int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *root,
+                       struct inode *dir, u64 objectid,
+                       const char *name, int name_len);
 int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root,
                               struct inode *inode, u64 new_size,
@@ -2242,7 +2279,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
 int btrfs_writepages(struct address_space *mapping,
                     struct writeback_control *wbc);
 int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
-                            struct btrfs_root *new_root, struct dentry *dentry,
+                            struct btrfs_root *new_root,
                             u64 new_dirid, u64 alloc_hint);
 int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
                         size_t size, struct bio *bio, unsigned long bio_flags);
@@ -2258,6 +2295,7 @@ int btrfs_write_inode(struct inode *inode, int wait);
 void btrfs_dirty_inode(struct inode *inode);
 struct inode *btrfs_alloc_inode(struct super_block *sb);
 void btrfs_destroy_inode(struct inode *inode);
+void btrfs_drop_inode(struct inode *inode);
 int btrfs_init_cachep(void);
 void btrfs_destroy_cachep(void);
 long btrfs_ioctl_trans_end(struct file *file);
@@ -2275,6 +2313,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
 int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
 void btrfs_orphan_cleanup(struct btrfs_root *root);
 int btrfs_cont_expand(struct inode *inode, loff_t size);
+int btrfs_invalidate_inodes(struct btrfs_root *root);
+extern struct dentry_operations btrfs_dentry_operations;
 
 /* ioctl.c */
 long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
@@ -2290,7 +2330,7 @@ extern struct file_operations btrfs_file_operations;
 int btrfs_drop_extents(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root, struct inode *inode,
                       u64 start, u64 end, u64 locked_end,
-                      u64 inline_limit, u64 *hint_block);
+                      u64 inline_limit, u64 *hint_block, int drop_cache);
 int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              struct inode *inode, u64 start, u64 end);
index 1d70236..f3a6075 100644 (file)
@@ -281,6 +281,53 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
        return btrfs_match_dir_item_name(root, path, name, name_len);
 }
 
+struct btrfs_dir_item *
+btrfs_search_dir_index_item(struct btrfs_root *root,
+                           struct btrfs_path *path, u64 dirid,
+                           const char *name, int name_len)
+{
+       struct extent_buffer *leaf;
+       struct btrfs_dir_item *di;
+       struct btrfs_key key;
+       u32 nritems;
+       int ret;
+
+       key.objectid = dirid;
+       key.type = BTRFS_DIR_INDEX_KEY;
+       key.offset = 0;
+
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0)
+               return ERR_PTR(ret);
+
+       leaf = path->nodes[0];
+       nritems = btrfs_header_nritems(leaf);
+
+       while (1) {
+               if (path->slots[0] >= nritems) {
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0)
+                               return ERR_PTR(ret);
+                       if (ret > 0)
+                               break;
+                       leaf = path->nodes[0];
+                       nritems = btrfs_header_nritems(leaf);
+                       continue;
+               }
+
+               btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+               if (key.objectid != dirid || key.type != BTRFS_DIR_INDEX_KEY)
+                       break;
+
+               di = btrfs_match_dir_item_name(root, path, name, name_len);
+               if (di)
+                       return di;
+
+               path->slots[0]++;
+       }
+       return NULL;
+}
+
 struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
                                          struct btrfs_root *root,
                                          struct btrfs_path *path, u64 dir,
index 6c41731..644e796 100644 (file)
@@ -41,6 +41,7 @@
 
 static struct extent_io_ops btree_extent_io_ops;
 static void end_workqueue_fn(struct btrfs_work *work);
+static void free_fs_root(struct btrfs_root *root);
 
 static atomic_t btrfs_bdi_num = ATOMIC_INIT(0);
 
@@ -123,15 +124,15 @@ static struct extent_map *btree_get_extent(struct inode *inode,
        struct extent_map *em;
        int ret;
 
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, start, len);
        if (em) {
                em->bdev =
                        BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
-               spin_unlock(&em_tree->lock);
+               read_unlock(&em_tree->lock);
                goto out;
        }
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
 
        em = alloc_extent_map(GFP_NOFS);
        if (!em) {
@@ -144,7 +145,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
        em->block_start = 0;
        em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
 
-       spin_lock(&em_tree->lock);
+       write_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em);
        if (ret == -EEXIST) {
                u64 failed_start = em->start;
@@ -163,7 +164,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
                free_extent_map(em);
                em = NULL;
        }
-       spin_unlock(&em_tree->lock);
+       write_unlock(&em_tree->lock);
 
        if (ret)
                em = ERR_PTR(ret);
@@ -895,8 +896,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
        root->fs_info = fs_info;
        root->objectid = objectid;
        root->last_trans = 0;
-       root->highest_inode = 0;
-       root->last_inode_alloc = 0;
+       root->highest_objectid = 0;
        root->name = NULL;
        root->in_sysfs = 0;
        root->inode_tree.rb_node = NULL;
@@ -952,14 +952,16 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
                     root, fs_info, objectid);
        ret = btrfs_find_last_root(tree_root, objectid,
                                   &root->root_item, &root->root_key);
+       if (ret > 0)
+               return -ENOENT;
        BUG_ON(ret);
 
        generation = btrfs_root_generation(&root->root_item);
        blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
        root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
                                     blocksize, generation);
-       root->commit_root = btrfs_root_node(root);
        BUG_ON(!root->node);
+       root->commit_root = btrfs_root_node(root);
        return 0;
 }
 
@@ -1095,7 +1097,6 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
        struct btrfs_fs_info *fs_info = tree_root->fs_info;
        struct btrfs_path *path;
        struct extent_buffer *l;
-       u64 highest_inode;
        u64 generation;
        u32 blocksize;
        int ret = 0;
@@ -1110,7 +1111,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
                        kfree(root);
                        return ERR_PTR(ret);
                }
-               goto insert;
+               goto out;
        }
 
        __setup_root(tree_root->nodesize, tree_root->leafsize,
@@ -1120,39 +1121,30 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
        path = btrfs_alloc_path();
        BUG_ON(!path);
        ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
-       if (ret != 0) {
-               if (ret > 0)
-                       ret = -ENOENT;
-               goto out;
+       if (ret == 0) {
+               l = path->nodes[0];
+               read_extent_buffer(l, &root->root_item,
+                               btrfs_item_ptr_offset(l, path->slots[0]),
+                               sizeof(root->root_item));
+               memcpy(&root->root_key, location, sizeof(*location));
        }
-       l = path->nodes[0];
-       read_extent_buffer(l, &root->root_item,
-              btrfs_item_ptr_offset(l, path->slots[0]),
-              sizeof(root->root_item));
-       memcpy(&root->root_key, location, sizeof(*location));
-       ret = 0;
-out:
-       btrfs_release_path(root, path);
        btrfs_free_path(path);
        if (ret) {
-               kfree(root);
+               if (ret > 0)
+                       ret = -ENOENT;
                return ERR_PTR(ret);
        }
+
        generation = btrfs_root_generation(&root->root_item);
        blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
        root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
                                     blocksize, generation);
        root->commit_root = btrfs_root_node(root);
        BUG_ON(!root->node);
-insert:
-       if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
+out:
+       if (location->objectid != BTRFS_TREE_LOG_OBJECTID)
                root->ref_cows = 1;
-               ret = btrfs_find_highest_inode(root, &highest_inode);
-               if (ret == 0) {
-                       root->highest_inode = highest_inode;
-                       root->last_inode_alloc = highest_inode;
-               }
-       }
+
        return root;
 }
 
@@ -1187,39 +1179,66 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
                return fs_info->dev_root;
        if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
                return fs_info->csum_root;
-
+again:
+       spin_lock(&fs_info->fs_roots_radix_lock);
        root = radix_tree_lookup(&fs_info->fs_roots_radix,
                                 (unsigned long)location->objectid);
+       spin_unlock(&fs_info->fs_roots_radix_lock);
        if (root)
                return root;
 
+       ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
+       if (ret == 0)
+               ret = -ENOENT;
+       if (ret < 0)
+               return ERR_PTR(ret);
+
        root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location);
        if (IS_ERR(root))
                return root;
 
+       WARN_ON(btrfs_root_refs(&root->root_item) == 0);
        set_anon_super(&root->anon_super, NULL);
 
+       ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
+       if (ret)
+               goto fail;
+
+       spin_lock(&fs_info->fs_roots_radix_lock);
        ret = radix_tree_insert(&fs_info->fs_roots_radix,
                                (unsigned long)root->root_key.objectid,
                                root);
+       if (ret == 0)
+               root->in_radix = 1;
+       spin_unlock(&fs_info->fs_roots_radix_lock);
+       radix_tree_preload_end();
        if (ret) {
-               free_extent_buffer(root->node);
-               kfree(root);
-               return ERR_PTR(ret);
+               if (ret == -EEXIST) {
+                       free_fs_root(root);
+                       goto again;
+               }
+               goto fail;
        }
-       if (!(fs_info->sb->s_flags & MS_RDONLY)) {
-               ret = btrfs_find_dead_roots(fs_info->tree_root,
-                                           root->root_key.objectid);
-               BUG_ON(ret);
+
+       ret = btrfs_find_dead_roots(fs_info->tree_root,
+                                   root->root_key.objectid);
+       WARN_ON(ret);
+
+       if (!(fs_info->sb->s_flags & MS_RDONLY))
                btrfs_orphan_cleanup(root);
-       }
+
        return root;
+fail:
+       free_fs_root(root);
+       return ERR_PTR(ret);
 }
 
 struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
                                      struct btrfs_key *location,
                                      const char *name, int namelen)
 {
+       return btrfs_read_fs_root_no_name(fs_info, location);
+#if 0
        struct btrfs_root *root;
        int ret;
 
@@ -1236,7 +1255,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
                kfree(root);
                return ERR_PTR(ret);
        }
-#if 0
+
        ret = btrfs_sysfs_add_root(root);
        if (ret) {
                free_extent_buffer(root->node);
@@ -1244,9 +1263,9 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
                kfree(root);
                return ERR_PTR(ret);
        }
-#endif
        root->in_sysfs = 1;
        return root;
+#endif
 }
 
 static int btrfs_congested_fn(void *congested_data, int bdi_bits)
@@ -1325,9 +1344,9 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
        offset = page_offset(page);
 
        em_tree = &BTRFS_I(inode)->extent_tree;
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
        if (!em) {
                __unplug_io_fn(bdi, page);
                return;
@@ -1360,8 +1379,10 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
 
        err = bdi_register(bdi, NULL, "btrfs-%d",
                                atomic_inc_return(&btrfs_bdi_num));
-       if (err)
+       if (err) {
+               bdi_destroy(bdi);
                return err;
+       }
 
        bdi->ra_pages   = default_backing_dev_info.ra_pages;
        bdi->unplug_io_fn       = btrfs_unplug_io_fn;
@@ -1451,9 +1472,12 @@ static int cleaner_kthread(void *arg)
                        break;
 
                vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
-               mutex_lock(&root->fs_info->cleaner_mutex);
-               btrfs_clean_old_snapshots(root);
-               mutex_unlock(&root->fs_info->cleaner_mutex);
+
+               if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
+                   mutex_trylock(&root->fs_info->cleaner_mutex)) {
+                       btrfs_clean_old_snapshots(root);
+                       mutex_unlock(&root->fs_info->cleaner_mutex);
+               }
 
                if (freezing(current)) {
                        refrigerator();
@@ -1558,15 +1582,36 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                err = -ENOMEM;
                goto fail;
        }
-       INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
+
+       ret = init_srcu_struct(&fs_info->subvol_srcu);
+       if (ret) {
+               err = ret;
+               goto fail;
+       }
+
+       ret = setup_bdi(fs_info, &fs_info->bdi);
+       if (ret) {
+               err = ret;
+               goto fail_srcu;
+       }
+
+       fs_info->btree_inode = new_inode(sb);
+       if (!fs_info->btree_inode) {
+               err = -ENOMEM;
+               goto fail_bdi;
+       }
+
+       INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
        INIT_LIST_HEAD(&fs_info->trans_list);
        INIT_LIST_HEAD(&fs_info->dead_roots);
        INIT_LIST_HEAD(&fs_info->hashers);
        INIT_LIST_HEAD(&fs_info->delalloc_inodes);
        INIT_LIST_HEAD(&fs_info->ordered_operations);
+       INIT_LIST_HEAD(&fs_info->caching_block_groups);
        spin_lock_init(&fs_info->delalloc_lock);
        spin_lock_init(&fs_info->new_trans_lock);
        spin_lock_init(&fs_info->ref_cache_lock);
+       spin_lock_init(&fs_info->fs_roots_radix_lock);
 
        init_completion(&fs_info->kobj_unregister);
        fs_info->tree_root = tree_root;
@@ -1585,11 +1630,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        fs_info->sb = sb;
        fs_info->max_extent = (u64)-1;
        fs_info->max_inline = 8192 * 1024;
-       if (setup_bdi(fs_info, &fs_info->bdi))
-               goto fail_bdi;
-       fs_info->btree_inode = new_inode(sb);
-       fs_info->btree_inode->i_ino = 1;
-       fs_info->btree_inode->i_nlink = 1;
        fs_info->metadata_ratio = 8;
 
        fs_info->thread_pool_size = min_t(unsigned long,
@@ -1602,6 +1642,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        sb->s_blocksize_bits = blksize_bits(4096);
        sb->s_bdi = &fs_info->bdi;
 
+       fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
+       fs_info->btree_inode->i_nlink = 1;
        /*
         * we set the i_size on the btree inode to the max possible int.
         * the real end of the address space is determined by all of
@@ -1620,28 +1662,32 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 
        BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
 
+       BTRFS_I(fs_info->btree_inode)->root = tree_root;
+       memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
+              sizeof(struct btrfs_key));
+       BTRFS_I(fs_info->btree_inode)->dummy_inode = 1;
+       insert_inode_hash(fs_info->btree_inode);
+
        spin_lock_init(&fs_info->block_group_cache_lock);
        fs_info->block_group_cache_tree.rb_node = NULL;
 
-       extent_io_tree_init(&fs_info->pinned_extents,
+       extent_io_tree_init(&fs_info->freed_extents[0],
                             fs_info->btree_inode->i_mapping, GFP_NOFS);
+       extent_io_tree_init(&fs_info->freed_extents[1],
+                            fs_info->btree_inode->i_mapping, GFP_NOFS);
+       fs_info->pinned_extents = &fs_info->freed_extents[0];
        fs_info->do_barriers = 1;
 
-       BTRFS_I(fs_info->btree_inode)->root = tree_root;
-       memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
-              sizeof(struct btrfs_key));
-       insert_inode_hash(fs_info->btree_inode);
 
        mutex_init(&fs_info->trans_mutex);
        mutex_init(&fs_info->ordered_operations_mutex);
        mutex_init(&fs_info->tree_log_mutex);
-       mutex_init(&fs_info->drop_mutex);
        mutex_init(&fs_info->chunk_mutex);
        mutex_init(&fs_info->transaction_kthread_mutex);
        mutex_init(&fs_info->cleaner_mutex);
        mutex_init(&fs_info->volume_mutex);
-       mutex_init(&fs_info->tree_reloc_mutex);
        init_rwsem(&fs_info->extent_commit_sem);
+       init_rwsem(&fs_info->subvol_sem);
 
        btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
        btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -1700,7 +1746,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                err = -EINVAL;
                goto fail_iput;
        }
-
+printk("thread pool is %d\n", fs_info->thread_pool_size);
        /*
         * we need to start all the end_io workers up front because the
         * queue work function gets called at interrupt time, and so it
@@ -1745,20 +1791,22 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        fs_info->endio_workers.idle_thresh = 4;
        fs_info->endio_meta_workers.idle_thresh = 4;
 
-       fs_info->endio_write_workers.idle_thresh = 64;
-       fs_info->endio_meta_write_workers.idle_thresh = 64;
+       fs_info->endio_write_workers.idle_thresh = 2;
+       fs_info->endio_meta_write_workers.idle_thresh = 2;
+
+       fs_info->endio_workers.atomic_worker_start = 1;
+       fs_info->endio_meta_workers.atomic_worker_start = 1;
+       fs_info->endio_write_workers.atomic_worker_start = 1;
+       fs_info->endio_meta_write_workers.atomic_worker_start = 1;
 
        btrfs_start_workers(&fs_info->workers, 1);
        btrfs_start_workers(&fs_info->submit_workers, 1);
        btrfs_start_workers(&fs_info->delalloc_workers, 1);
        btrfs_start_workers(&fs_info->fixup_workers, 1);
-       btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
-       btrfs_start_workers(&fs_info->endio_meta_workers,
-                           fs_info->thread_pool_size);
-       btrfs_start_workers(&fs_info->endio_meta_write_workers,
-                           fs_info->thread_pool_size);
-       btrfs_start_workers(&fs_info->endio_write_workers,
-                           fs_info->thread_pool_size);
+       btrfs_start_workers(&fs_info->endio_workers, 1);
+       btrfs_start_workers(&fs_info->endio_meta_workers, 1);
+       btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
+       btrfs_start_workers(&fs_info->endio_write_workers, 1);
 
        fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
        fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -1918,6 +1966,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                }
        }
 
+       ret = btrfs_find_orphan_roots(tree_root);
+       BUG_ON(ret);
+
        if (!(sb->s_flags & MS_RDONLY)) {
                ret = btrfs_recover_relocation(tree_root);
                BUG_ON(ret);
@@ -1977,6 +2028,8 @@ fail_iput:
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
 fail_bdi:
        bdi_destroy(&fs_info->bdi);
+fail_srcu:
+       cleanup_srcu_struct(&fs_info->subvol_srcu);
 fail:
        kfree(extent_root);
        kfree(tree_root);
@@ -2236,20 +2289,29 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
 
 int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
 {
-       WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
+       spin_lock(&fs_info->fs_roots_radix_lock);
        radix_tree_delete(&fs_info->fs_roots_radix,
                          (unsigned long)root->root_key.objectid);
+       spin_unlock(&fs_info->fs_roots_radix_lock);
+
+       if (btrfs_root_refs(&root->root_item) == 0)
+               synchronize_srcu(&fs_info->subvol_srcu);
+
+       free_fs_root(root);
+       return 0;
+}
+
+static void free_fs_root(struct btrfs_root *root)
+{
+       WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
        if (root->anon_super.s_dev) {
                down_write(&root->anon_super.s_umount);
                kill_anon_super(&root->anon_super);
        }
-       if (root->node)
-               free_extent_buffer(root->node);
-       if (root->commit_root)
-               free_extent_buffer(root->commit_root);
+       free_extent_buffer(root->node);
+       free_extent_buffer(root->commit_root);
        kfree(root->name);
        kfree(root);
-       return 0;
 }
 
 static int del_fs_roots(struct btrfs_fs_info *fs_info)
@@ -2258,6 +2320,20 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info)
        struct btrfs_root *gang[8];
        int i;
 
+       while (!list_empty(&fs_info->dead_roots)) {
+               gang[0] = list_entry(fs_info->dead_roots.next,
+                                    struct btrfs_root, root_list);
+               list_del(&gang[0]->root_list);
+
+               if (gang[0]->in_radix) {
+                       btrfs_free_fs_root(fs_info, gang[0]);
+               } else {
+                       free_extent_buffer(gang[0]->node);
+                       free_extent_buffer(gang[0]->commit_root);
+                       kfree(gang[0]);
+               }
+       }
+
        while (1) {
                ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
                                             (void **)gang, 0,
@@ -2287,9 +2363,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
                root_objectid = gang[ret - 1]->root_key.objectid + 1;
                for (i = 0; i < ret; i++) {
                        root_objectid = gang[i]->root_key.objectid;
-                       ret = btrfs_find_dead_roots(fs_info->tree_root,
-                                                   root_objectid);
-                       BUG_ON(ret);
                        btrfs_orphan_cleanup(gang[i]);
                }
                root_objectid++;
@@ -2359,7 +2432,6 @@ int close_ctree(struct btrfs_root *root)
        free_extent_buffer(root->fs_info->csum_root->commit_root);
 
        btrfs_free_block_groups(root->fs_info);
-       btrfs_free_pinned_extents(root->fs_info);
 
        del_fs_roots(fs_info);
 
@@ -2378,6 +2450,7 @@ int close_ctree(struct btrfs_root *root)
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
 
        bdi_destroy(&fs_info->bdi);
+       cleanup_srcu_struct(&fs_info->subvol_srcu);
 
        kfree(fs_info->extent_root);
        kfree(fs_info->tree_root);
index 9596b40..ba5c3fd 100644 (file)
@@ -28,7 +28,7 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
        len  = BTRFS_FID_SIZE_NON_CONNECTABLE;
        type = FILEID_BTRFS_WITHOUT_PARENT;
 
-       fid->objectid = BTRFS_I(inode)->location.objectid;
+       fid->objectid = inode->i_ino;
        fid->root_objectid = BTRFS_I(inode)->root->objectid;
        fid->gen = inode->i_generation;
 
@@ -60,34 +60,61 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
 }
 
 static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
-                                      u64 root_objectid, u32 generation)
+                                      u64 root_objectid, u32 generation,
+                                      int check_generation)
 {
+       struct btrfs_fs_info *fs_info = btrfs_sb(sb)->fs_info;
        struct btrfs_root *root;
+       struct dentry *dentry;
        struct inode *inode;
        struct btrfs_key key;
+       int index;
+       int err = 0;
+
+       if (objectid < BTRFS_FIRST_FREE_OBJECTID)
+               return ERR_PTR(-ESTALE);
 
        key.objectid = root_objectid;
        btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
        key.offset = (u64)-1;
 
-       root = btrfs_read_fs_root_no_name(btrfs_sb(sb)->fs_info, &key);
-       if (IS_ERR(root))
-               return ERR_CAST(root);
+       index = srcu_read_lock(&fs_info->subvol_srcu);
+
+       root = btrfs_read_fs_root_no_name(fs_info, &key);
+       if (IS_ERR(root)) {
+               err = PTR_ERR(root);
+               goto fail;
+       }
+
+       if (btrfs_root_refs(&root->root_item) == 0) {
+               err = -ENOENT;
+               goto fail;
+       }
 
        key.objectid = objectid;
        btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
        key.offset = 0;
 
        inode = btrfs_iget(sb, &key, root);
-       if (IS_ERR(inode))
-               return (void *)inode;
+       if (IS_ERR(inode)) {
+               err = PTR_ERR(inode);
+               goto fail;
+       }
+
+       srcu_read_unlock(&fs_info->subvol_srcu, index);
 
-       if (generation != inode->i_generation) {
+       if (check_generation && generation != inode->i_generation) {
                iput(inode);
                return ERR_PTR(-ESTALE);
        }
 
-       return d_obtain_alias(inode);
+       dentry = d_obtain_alias(inode);
+       if (!IS_ERR(dentry))
+               dentry->d_op = &btrfs_dentry_operations;
+       return dentry;
+fail:
+       srcu_read_unlock(&fs_info->subvol_srcu, index);
+       return ERR_PTR(err);
 }
 
 static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
@@ -111,7 +138,7 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
        objectid = fid->parent_objectid;
        generation = fid->parent_gen;
 
-       return btrfs_get_dentry(sb, objectid, root_objectid, generation);
+       return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
 }
 
 static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
@@ -133,66 +160,76 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
        root_objectid = fid->root_objectid;
        generation = fid->gen;
 
-       return btrfs_get_dentry(sb, objectid, root_objectid, generation);
+       return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
 }
 
 static struct dentry *btrfs_get_parent(struct dentry *child)
 {
        struct inode *dir = child->d_inode;
+       static struct dentry *dentry;
        struct btrfs_root *root = BTRFS_I(dir)->root;
-       struct btrfs_key key;
        struct btrfs_path *path;
        struct extent_buffer *leaf;
-       int slot;
-       u64 objectid;
+       struct btrfs_root_ref *ref;
+       struct btrfs_key key;
+       struct btrfs_key found_key;
        int ret;
 
        path = btrfs_alloc_path();
 
-       key.objectid = dir->i_ino;
-       btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
-       key.offset = (u64)-1;
+       if (dir->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+               key.objectid = root->root_key.objectid;
+               key.type = BTRFS_ROOT_BACKREF_KEY;
+               key.offset = (u64)-1;
+               root = root->fs_info->tree_root;
+       } else {
+               key.objectid = dir->i_ino;
+               key.type = BTRFS_INODE_REF_KEY;
+               key.offset = (u64)-1;
+       }
 
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-       if (ret < 0) {
-               /* Error */
-               btrfs_free_path(path);
-               return ERR_PTR(ret);
+       if (ret < 0)
+               goto fail;
+
+       BUG_ON(ret == 0);
+       if (path->slots[0] == 0) {
+               ret = -ENOENT;
+               goto fail;
        }
+
+       path->slots[0]--;
        leaf = path->nodes[0];
-       slot = path->slots[0];
-       if (ret) {
-               /* btrfs_search_slot() returns the slot where we'd want to
-                  insert a backref for parent inode #0xFFFFFFFFFFFFFFFF.
-                  The _real_ backref, telling us what the parent inode
-                  _actually_ is, will be in the slot _before_ the one
-                  that btrfs_search_slot() returns. */
-               if (!slot) {
-                       /* Unless there is _no_ key in the tree before... */
-                       btrfs_free_path(path);
-                       return ERR_PTR(-EIO);
-               }
-               slot--;
+
+       btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+       if (found_key.objectid != key.objectid || found_key.type != key.type) {
+               ret = -ENOENT;
+               goto fail;
        }
 
-       btrfs_item_key_to_cpu(leaf, &key, slot);
+       if (found_key.type == BTRFS_ROOT_BACKREF_KEY) {
+               ref = btrfs_item_ptr(leaf, path->slots[0],
+                                    struct btrfs_root_ref);
+               key.objectid = btrfs_root_ref_dirid(leaf, ref);
+       } else {
+               key.objectid = found_key.offset;
+       }
        btrfs_free_path(path);
 
-       if (key.objectid != dir->i_ino || key.type != BTRFS_INODE_REF_KEY)
-               return ERR_PTR(-EINVAL);
-
-       objectid = key.offset;
-
-       /* If we are already at the root of a subvol, return the real root */
-       if (objectid == dir->i_ino)
-               return dget(dir->i_sb->s_root);
+       if (found_key.type == BTRFS_ROOT_BACKREF_KEY) {
+               return btrfs_get_dentry(root->fs_info->sb, key.objectid,
+                                       found_key.offset, 0, 0);
+       }
 
-       /* Build a new key for the inode item */
-       key.objectid = objectid;
-       btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
+       key.type = BTRFS_INODE_ITEM_KEY;
        key.offset = 0;
-
-       return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root));
+       dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root));
+       if (!IS_ERR(dentry))
+               dentry->d_op = &btrfs_dentry_operations;
+       return dentry;
+fail:
+       btrfs_free_path(path);
+       return ERR_PTR(ret);
 }
 
 const struct export_operations btrfs_export_ops = {
index 535f85b..993f93f 100644 (file)
 #include "locking.h"
 #include "free-space-cache.h"
 
-static int update_reserved_extents(struct btrfs_root *root,
-                                  u64 bytenr, u64 num, int reserve);
 static int update_block_group(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              u64 bytenr, u64 num_bytes, int alloc,
                              int mark_free);
+static int update_reserved_extents(struct btrfs_block_group_cache *cache,
+                                  u64 num_bytes, int reserve);
 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                u64 bytenr, u64 num_bytes, u64 parent,
@@ -57,10 +57,17 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
                                     u64 parent, u64 root_objectid,
                                     u64 flags, struct btrfs_disk_key *key,
                                     int level, struct btrfs_key *ins);
-
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
                          struct btrfs_root *extent_root, u64 alloc_bytes,
                          u64 flags, int force);
+static int pin_down_bytes(struct btrfs_trans_handle *trans,
+                         struct btrfs_root *root,
+                         struct btrfs_path *path,
+                         u64 bytenr, u64 num_bytes,
+                         int is_data, int reserved,
+                         struct extent_buffer **must_clean);
+static int find_next_key(struct btrfs_path *path, int level,
+                        struct btrfs_key *key);
 
 static noinline int
 block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -153,34 +160,34 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
        return ret;
 }
 
-/*
- * We always set EXTENT_LOCKED for the super mirror extents so we don't
- * overwrite them, so those bits need to be unset.  Also, if we are unmounting
- * with pinned extents still sitting there because we had a block group caching,
- * we need to clear those now, since we are done.
- */
-void btrfs_free_pinned_extents(struct btrfs_fs_info *info)
+static int add_excluded_extent(struct btrfs_root *root,
+                              u64 start, u64 num_bytes)
 {
-       u64 start, end, last = 0;
-       int ret;
+       u64 end = start + num_bytes - 1;
+       set_extent_bits(&root->fs_info->freed_extents[0],
+                       start, end, EXTENT_UPTODATE, GFP_NOFS);
+       set_extent_bits(&root->fs_info->freed_extents[1],
+                       start, end, EXTENT_UPTODATE, GFP_NOFS);
+       return 0;
+}
 
-       while (1) {
-               ret = find_first_extent_bit(&info->pinned_extents, last,
-                                           &start, &end,
-                                           EXTENT_LOCKED|EXTENT_DIRTY);
-               if (ret)
-                       break;
+static void free_excluded_extents(struct btrfs_root *root,
+                                 struct btrfs_block_group_cache *cache)
+{
+       u64 start, end;
 
-               clear_extent_bits(&info->pinned_extents, start, end,
-                                 EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS);
-               last = end+1;
-       }
+       start = cache->key.objectid;
+       end = start + cache->key.offset - 1;
+
+       clear_extent_bits(&root->fs_info->freed_extents[0],
+                         start, end, EXTENT_UPTODATE, GFP_NOFS);
+       clear_extent_bits(&root->fs_info->freed_extents[1],
+                         start, end, EXTENT_UPTODATE, GFP_NOFS);
 }
 
-static int remove_sb_from_cache(struct btrfs_root *root,
-                               struct btrfs_block_group_cache *cache)
+static int exclude_super_stripes(struct btrfs_root *root,
+                                struct btrfs_block_group_cache *cache)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
        u64 bytenr;
        u64 *logical;
        int stripe_len;
@@ -192,17 +199,42 @@ static int remove_sb_from_cache(struct btrfs_root *root,
                                       cache->key.objectid, bytenr,
                                       0, &logical, &nr, &stripe_len);
                BUG_ON(ret);
+
                while (nr--) {
-                       try_lock_extent(&fs_info->pinned_extents,
-                                       logical[nr],
-                                       logical[nr] + stripe_len - 1, GFP_NOFS);
+                       cache->bytes_super += stripe_len;
+                       ret = add_excluded_extent(root, logical[nr],
+                                                 stripe_len);
+                       BUG_ON(ret);
                }
+
                kfree(logical);
        }
-
        return 0;
 }
 
+static struct btrfs_caching_control *
+get_caching_control(struct btrfs_block_group_cache *cache)
+{
+       struct btrfs_caching_control *ctl;
+
+       spin_lock(&cache->lock);
+       if (cache->cached != BTRFS_CACHE_STARTED) {
+               spin_unlock(&cache->lock);
+               return NULL;
+       }
+
+       ctl = cache->caching_ctl;
+       atomic_inc(&ctl->count);
+       spin_unlock(&cache->lock);
+       return ctl;
+}
+
+static void put_caching_control(struct btrfs_caching_control *ctl)
+{
+       if (atomic_dec_and_test(&ctl->count))
+               kfree(ctl);
+}
+
 /*
  * this is only called by cache_block_group, since we could have freed extents
  * we need to check the pinned_extents for any extents that can't be used yet
@@ -215,9 +247,9 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
        int ret;
 
        while (start < end) {
-               ret = find_first_extent_bit(&info->pinned_extents, start,
+               ret = find_first_extent_bit(info->pinned_extents, start,
                                            &extent_start, &extent_end,
-                                           EXTENT_DIRTY|EXTENT_LOCKED);
+                                           EXTENT_DIRTY | EXTENT_UPTODATE);
                if (ret)
                        break;
 
@@ -249,22 +281,27 @@ static int caching_kthread(void *data)
 {
        struct btrfs_block_group_cache *block_group = data;
        struct btrfs_fs_info *fs_info = block_group->fs_info;
-       u64 last = 0;
+       struct btrfs_caching_control *caching_ctl = block_group->caching_ctl;
+       struct btrfs_root *extent_root = fs_info->extent_root;
        struct btrfs_path *path;
-       int ret = 0;
-       struct btrfs_key key;
        struct extent_buffer *leaf;
-       int slot;
+       struct btrfs_key key;
        u64 total_found = 0;
-
-       BUG_ON(!fs_info);
+       u64 last = 0;
+       u32 nritems;
+       int ret = 0;
 
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
 
-       atomic_inc(&block_group->space_info->caching_threads);
+       exclude_super_stripes(extent_root, block_group);
+       spin_lock(&block_group->space_info->lock);
+       block_group->space_info->bytes_super += block_group->bytes_super;
+       spin_unlock(&block_group->space_info->lock);
+
        last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
+
        /*
         * We don't want to deadlock with somebody trying to allocate a new
         * extent for the extent root while also trying to search the extent
@@ -277,74 +314,64 @@ static int caching_kthread(void *data)
 
        key.objectid = last;
        key.offset = 0;
-       btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
+       key.type = BTRFS_EXTENT_ITEM_KEY;
 again:
+       mutex_lock(&caching_ctl->mutex);
        /* need to make sure the commit_root doesn't disappear */
        down_read(&fs_info->extent_commit_sem);
 
-       ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
+       ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
        if (ret < 0)
                goto err;
 
+       leaf = path->nodes[0];
+       nritems = btrfs_header_nritems(leaf);
+
        while (1) {
                smp_mb();
-               if (block_group->fs_info->closing > 1) {
+               if (fs_info->closing > 1) {
                        last = (u64)-1;
                        break;
                }
 
-               leaf = path->nodes[0];
-               slot = path->slots[0];
-               if (slot >= btrfs_header_nritems(leaf)) {
-                       ret = btrfs_next_leaf(fs_info->extent_root, path);
-                       if (ret < 0)
-                               goto err;
-                       else if (ret)
+               if (path->slots[0] < nritems) {
+                       btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+               } else {
+                       ret = find_next_key(path, 0, &key);
+                       if (ret)
                                break;
 
-                       if (need_resched() ||
-                           btrfs_transaction_in_commit(fs_info)) {
-                               leaf = path->nodes[0];
-
-                               /* this shouldn't happen, but if the
-                                * leaf is empty just move on.
-                                */
-                               if (btrfs_header_nritems(leaf) == 0)
-                                       break;
-                               /*
-                                * we need to copy the key out so that
-                                * we are sure the next search advances
-                                * us forward in the btree.
-                                */
-                               btrfs_item_key_to_cpu(leaf, &key, 0);
-                               btrfs_release_path(fs_info->extent_root, path);
-                               up_read(&fs_info->extent_commit_sem);
+                       caching_ctl->progress = last;
+                       btrfs_release_path(extent_root, path);
+                       up_read(&fs_info->extent_commit_sem);
+                       mutex_unlock(&caching_ctl->mutex);
+                       if (btrfs_transaction_in_commit(fs_info))
                                schedule_timeout(1);
-                               goto again;
-                       }
+                       else
+                               cond_resched();
+                       goto again;
+               }
 
+               if (key.objectid < block_group->key.objectid) {
+                       path->slots[0]++;
                        continue;
                }
-               btrfs_item_key_to_cpu(leaf, &key, slot);
-               if (key.objectid < block_group->key.objectid)
-                       goto next;
 
                if (key.objectid >= block_group->key.objectid +
                    block_group->key.offset)
                        break;
 
-               if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
+               if (key.type == BTRFS_EXTENT_ITEM_KEY) {
                        total_found += add_new_free_space(block_group,
                                                          fs_info, last,
                                                          key.objectid);
                        last = key.objectid + key.offset;
-               }
 
-               if (total_found > (1024 * 1024 * 2)) {
-                       total_found = 0;
-                       wake_up(&block_group->caching_q);
+                       if (total_found > (1024 * 1024 * 2)) {
+                               total_found = 0;
+                               wake_up(&caching_ctl->wait);
+                       }
                }
-next:
                path->slots[0]++;
        }
        ret = 0;
@@ -352,33 +379,65 @@ next:
        total_found += add_new_free_space(block_group, fs_info, last,
                                          block_group->key.objectid +
                                          block_group->key.offset);
+       caching_ctl->progress = (u64)-1;
 
        spin_lock(&block_group->lock);
+       block_group->caching_ctl = NULL;
        block_group->cached = BTRFS_CACHE_FINISHED;
        spin_unlock(&block_group->lock);
 
 err:
        btrfs_free_path(path);
        up_read(&fs_info->extent_commit_sem);
-       atomic_dec(&block_group->space_info->caching_threads);
-       wake_up(&block_group->caching_q);
 
+       free_excluded_extents(extent_root, block_group);
+
+       mutex_unlock(&caching_ctl->mutex);
+       wake_up(&caching_ctl->wait);
+
+       put_caching_control(caching_ctl);
+       atomic_dec(&block_group->space_info->caching_threads);
        return 0;
 }
 
 static int cache_block_group(struct btrfs_block_group_cache *cache)
 {
+       struct btrfs_fs_info *fs_info = cache->fs_info;
+       struct btrfs_caching_control *caching_ctl;
        struct task_struct *tsk;
        int ret = 0;
 
+       smp_mb();
+       if (cache->cached != BTRFS_CACHE_NO)
+               return 0;
+
+       caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
+       BUG_ON(!caching_ctl);
+
+       INIT_LIST_HEAD(&caching_ctl->list);
+       mutex_init(&caching_ctl->mutex);
+       init_waitqueue_head(&caching_ctl->wait);
+       caching_ctl->block_group = cache;
+       caching_ctl->progress = cache->key.objectid;
+       /* one for caching kthread, one for caching block group list */
+       atomic_set(&caching_ctl->count, 2);
+
        spin_lock(&cache->lock);
        if (cache->cached != BTRFS_CACHE_NO) {
                spin_unlock(&cache->lock);
-               return ret;
+               kfree(caching_ctl);
+               return 0;
        }
+       cache->caching_ctl = caching_ctl;
        cache->cached = BTRFS_CACHE_STARTED;
        spin_unlock(&cache->lock);
 
+       down_write(&fs_info->extent_commit_sem);
+       list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
+       up_write(&fs_info->extent_commit_sem);
+
+       atomic_inc(&cache->space_info->caching_threads);
+
        tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
                          cache->key.objectid);
        if (IS_ERR(tsk)) {
@@ -1657,7 +1716,6 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
                                                 parent, ref_root, flags,
                                                 ref->objectid, ref->offset,
                                                 &ins, node->ref_mod);
-               update_reserved_extents(root, ins.objectid, ins.offset, 0);
        } else if (node->action == BTRFS_ADD_DELAYED_REF) {
                ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
                                             node->num_bytes, parent,
@@ -1783,7 +1841,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
                                                extent_op->flags_to_set,
                                                &extent_op->key,
                                                ref->level, &ins);
-               update_reserved_extents(root, ins.objectid, ins.offset, 0);
        } else if (node->action == BTRFS_ADD_DELAYED_REF) {
                ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
                                             node->num_bytes, parent, ref_root,
@@ -1818,16 +1875,32 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
                BUG_ON(extent_op);
                head = btrfs_delayed_node_to_head(node);
                if (insert_reserved) {
+                       int mark_free = 0;
+                       struct extent_buffer *must_clean = NULL;
+
+                       ret = pin_down_bytes(trans, root, NULL,
+                                            node->bytenr, node->num_bytes,
+                                            head->is_data, 1, &must_clean);
+                       if (ret > 0)
+                               mark_free = 1;
+
+                       if (must_clean) {
+                               clean_tree_block(NULL, root, must_clean);
+                               btrfs_tree_unlock(must_clean);
+                               free_extent_buffer(must_clean);
+                       }
                        if (head->is_data) {
                                ret = btrfs_del_csums(trans, root,
                                                      node->bytenr,
                                                      node->num_bytes);
                                BUG_ON(ret);
                        }
-                       btrfs_update_pinned_extents(root, node->bytenr,
-                                                   node->num_bytes, 1);
-                       update_reserved_extents(root, node->bytenr,
-                                               node->num_bytes, 0);
+                       if (mark_free) {
+                               ret = btrfs_free_reserved_extent(root,
+                                                       node->bytenr,
+                                                       node->num_bytes);
+                               BUG_ON(ret);
+                       }
                }
                mutex_unlock(&head->mutex);
                return 0;
@@ -2706,6 +2779,8 @@ int btrfs_check_metadata_free_space(struct btrfs_root *root)
        /* get the space info for where the metadata will live */
        alloc_target = btrfs_get_alloc_profile(root, 0);
        meta_sinfo = __find_space_info(info, alloc_target);
+       if (!meta_sinfo)
+               goto alloc;
 
 again:
        spin_lock(&meta_sinfo->lock);
@@ -2717,12 +2792,13 @@ again:
        do_div(thresh, 100);
 
        if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
-           meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) {
+           meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
+           meta_sinfo->bytes_super > thresh) {
                struct btrfs_trans_handle *trans;
                if (!meta_sinfo->full) {
                        meta_sinfo->force_alloc = 1;
                        spin_unlock(&meta_sinfo->lock);
-
+alloc:
                        trans = btrfs_start_transaction(root, 1);
                        if (!trans)
                                return -ENOMEM;
@@ -2730,6 +2806,10 @@ again:
                        ret = do_chunk_alloc(trans, root->fs_info->extent_root,
                                             2 * 1024 * 1024, alloc_target, 0);
                        btrfs_end_transaction(trans, root);
+                       if (!meta_sinfo) {
+                               meta_sinfo = __find_space_info(info,
+                                                              alloc_target);
+                       }
                        goto again;
                }
                spin_unlock(&meta_sinfo->lock);
@@ -2765,13 +2845,16 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
        bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
 
        data_sinfo = BTRFS_I(inode)->space_info;
+       if (!data_sinfo)
+               goto alloc;
+
 again:
        /* make sure we have enough space to handle the data first */
        spin_lock(&data_sinfo->lock);
        if (data_sinfo->total_bytes - data_sinfo->bytes_used -
            data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
            data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
-           data_sinfo->bytes_may_use < bytes) {
+           data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) {
                struct btrfs_trans_handle *trans;
 
                /*
@@ -2783,7 +2866,7 @@ again:
 
                        data_sinfo->force_alloc = 1;
                        spin_unlock(&data_sinfo->lock);
-
+alloc:
                        alloc_target = btrfs_get_alloc_profile(root, 1);
                        trans = btrfs_start_transaction(root, 1);
                        if (!trans)
@@ -2795,6 +2878,11 @@ again:
                        btrfs_end_transaction(trans, root);
                        if (ret)
                                return ret;
+
+                       if (!data_sinfo) {
+                               btrfs_set_inode_space_info(root, inode);
+                               data_sinfo = BTRFS_I(inode)->space_info;
+                       }
                        goto again;
                }
                spin_unlock(&data_sinfo->lock);
@@ -3009,10 +3097,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                num_bytes = min(total, cache->key.offset - byte_in_group);
                if (alloc) {
                        old_val += num_bytes;
+                       btrfs_set_block_group_used(&cache->item, old_val);
+                       cache->reserved -= num_bytes;
                        cache->space_info->bytes_used += num_bytes;
+                       cache->space_info->bytes_reserved -= num_bytes;
                        if (cache->ro)
                                cache->space_info->bytes_readonly -= num_bytes;
-                       btrfs_set_block_group_used(&cache->item, old_val);
                        spin_unlock(&cache->lock);
                        spin_unlock(&cache->space_info->lock);
                } else {
@@ -3057,127 +3147,136 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
        return bytenr;
 }
 
-int btrfs_update_pinned_extents(struct btrfs_root *root,
-                               u64 bytenr, u64 num, int pin)
+/*
+ * this function must be called within transaction
+ */
+int btrfs_pin_extent(struct btrfs_root *root,
+                    u64 bytenr, u64 num_bytes, int reserved)
 {
-       u64 len;
-       struct btrfs_block_group_cache *cache;
        struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_block_group_cache *cache;
 
-       if (pin)
-               set_extent_dirty(&fs_info->pinned_extents,
-                               bytenr, bytenr + num - 1, GFP_NOFS);
-
-       while (num > 0) {
-               cache = btrfs_lookup_block_group(fs_info, bytenr);
-               BUG_ON(!cache);
-               len = min(num, cache->key.offset -
-                         (bytenr - cache->key.objectid));
-               if (pin) {
-                       spin_lock(&cache->space_info->lock);
-                       spin_lock(&cache->lock);
-                       cache->pinned += len;
-                       cache->space_info->bytes_pinned += len;
-                       spin_unlock(&cache->lock);
-                       spin_unlock(&cache->space_info->lock);
-                       fs_info->total_pinned += len;
-               } else {
-                       int unpin = 0;
+       cache = btrfs_lookup_block_group(fs_info, bytenr);
+       BUG_ON(!cache);
 
-                       /*
-                        * in order to not race with the block group caching, we
-                        * only want to unpin the extent if we are cached.  If
-                        * we aren't cached, we want to start async caching this
-                        * block group so we can free the extent the next time
-                        * around.
-                        */
-                       spin_lock(&cache->space_info->lock);
-                       spin_lock(&cache->lock);
-                       unpin = (cache->cached == BTRFS_CACHE_FINISHED);
-                       if (likely(unpin)) {
-                               cache->pinned -= len;
-                               cache->space_info->bytes_pinned -= len;
-                               fs_info->total_pinned -= len;
-                       }
-                       spin_unlock(&cache->lock);
-                       spin_unlock(&cache->space_info->lock);
+       spin_lock(&cache->space_info->lock);
+       spin_lock(&cache->lock);
+       cache->pinned += num_bytes;
+       cache->space_info->bytes_pinned += num_bytes;
+       if (reserved) {
+               cache->reserved -= num_bytes;
+               cache->space_info->bytes_reserved -= num_bytes;
+       }
+       spin_unlock(&cache->lock);
+       spin_unlock(&cache->space_info->lock);
 
-                       if (likely(unpin))
-                               clear_extent_dirty(&fs_info->pinned_extents,
-                                                  bytenr, bytenr + len -1,
-                                                  GFP_NOFS);
-                       else
-                               cache_block_group(cache);
+       btrfs_put_block_group(cache);
 
-                       if (unpin)
-                               btrfs_add_free_space(cache, bytenr, len);
-               }
-               btrfs_put_block_group(cache);
-               bytenr += len;
-               num -= len;
+       set_extent_dirty(fs_info->pinned_extents,
+                        bytenr, bytenr + num_bytes - 1, GFP_NOFS);
+       return 0;
+}
+
+static int update_reserved_extents(struct btrfs_block_group_cache *cache,
+                                  u64 num_bytes, int reserve)
+{
+       spin_lock(&cache->space_info->lock);
+       spin_lock(&cache->lock);
+       if (reserve) {
+               cache->reserved += num_bytes;
+               cache->space_info->bytes_reserved += num_bytes;
+       } else {
+               cache->reserved -= num_bytes;
+               cache->space_info->bytes_reserved -= num_bytes;
        }
+       spin_unlock(&cache->lock);
+       spin_unlock(&cache->space_info->lock);
        return 0;
 }
 
-static int update_reserved_extents(struct btrfs_root *root,
-                                  u64 bytenr, u64 num, int reserve)
+int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root)
 {
-       u64 len;
-       struct btrfs_block_group_cache *cache;
        struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_caching_control *next;
+       struct btrfs_caching_control *caching_ctl;
+       struct btrfs_block_group_cache *cache;
 
-       while (num > 0) {
-               cache = btrfs_lookup_block_group(fs_info, bytenr);
-               BUG_ON(!cache);
-               len = min(num, cache->key.offset -
-                         (bytenr - cache->key.objectid));
+       down_write(&fs_info->extent_commit_sem);
 
-               spin_lock(&cache->space_info->lock);
-               spin_lock(&cache->lock);
-               if (reserve) {
-                       cache->reserved += len;
-                       cache->space_info->bytes_reserved += len;
+       list_for_each_entry_safe(caching_ctl, next,
+                                &fs_info->caching_block_groups, list) {
+               cache = caching_ctl->block_group;
+               if (block_group_cache_done(cache)) {
+                       cache->last_byte_to_unpin = (u64)-1;
+                       list_del_init(&caching_ctl->list);
+                       put_caching_control(caching_ctl);
                } else {
-                       cache->reserved -= len;
-                       cache->space_info->bytes_reserved -= len;
+                       cache->last_byte_to_unpin = caching_ctl->progress;
                }
-               spin_unlock(&cache->lock);
-               spin_unlock(&cache->space_info->lock);
-               btrfs_put_block_group(cache);
-               bytenr += len;
-               num -= len;
        }
+
+       if (fs_info->pinned_extents == &fs_info->freed_extents[0])
+               fs_info->pinned_extents = &fs_info->freed_extents[1];
+       else
+               fs_info->pinned_extents = &fs_info->freed_extents[0];
+
+       up_write(&fs_info->extent_commit_sem);
        return 0;
 }
 
-int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
+static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
 {
-       u64 last = 0;
-       u64 start;
-       u64 end;
-       struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
-       int ret;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_block_group_cache *cache = NULL;
+       u64 len;
 
-       while (1) {
-               ret = find_first_extent_bit(pinned_extents, last,
-                                           &start, &end, EXTENT_DIRTY);
-               if (ret)
-                       break;
+       while (start <= end) {
+               if (!cache ||
+                   start >= cache->key.objectid + cache->key.offset) {
+                       if (cache)
+                               btrfs_put_block_group(cache);
+                       cache = btrfs_lookup_block_group(fs_info, start);
+                       BUG_ON(!cache);
+               }
+
+               len = cache->key.objectid + cache->key.offset - start;
+               len = min(len, end + 1 - start);
+
+               if (start < cache->last_byte_to_unpin) {
+                       len = min(len, cache->last_byte_to_unpin - start);
+                       btrfs_add_free_space(cache, start, len);
+               }
+
+               spin_lock(&cache->space_info->lock);
+               spin_lock(&cache->lock);
+               cache->pinned -= len;
+               cache->space_info->bytes_pinned -= len;
+               spin_unlock(&cache->lock);
+               spin_unlock(&cache->space_info->lock);
 
-               set_extent_dirty(copy, start, end, GFP_NOFS);
-               last = end + 1;
+               start += len;
        }
+
+       if (cache)
+               btrfs_put_block_group(cache);
        return 0;
 }
 
 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
-                              struct btrfs_root *root,
-                              struct extent_io_tree *unpin)
+                              struct btrfs_root *root)
 {
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct extent_io_tree *unpin;
        u64 start;
        u64 end;
        int ret;
 
+       if (fs_info->pinned_extents == &fs_info->freed_extents[0])
+               unpin = &fs_info->freed_extents[1];
+       else
+               unpin = &fs_info->freed_extents[0];
+
        while (1) {
                ret = find_first_extent_bit(unpin, 0, &start, &end,
                                            EXTENT_DIRTY);
@@ -3186,10 +3285,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 
                ret = btrfs_discard_extent(root, start, end + 1 - start);
 
-               /* unlocks the pinned mutex */
-               btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
                clear_extent_dirty(unpin, start, end, GFP_NOFS);
-
+               unpin_extent_range(root, start, end);
                cond_resched();
        }
 
@@ -3199,7 +3296,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 static int pin_down_bytes(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root,
                          struct btrfs_path *path,
-                         u64 bytenr, u64 num_bytes, int is_data,
+                         u64 bytenr, u64 num_bytes,
+                         int is_data, int reserved,
                          struct extent_buffer **must_clean)
 {
        int err = 0;
@@ -3231,15 +3329,15 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans,
        }
        free_extent_buffer(buf);
 pinit:
-       btrfs_set_path_blocking(path);
+       if (path)
+               btrfs_set_path_blocking(path);
        /* unlocks the pinned mutex */
-       btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
+       btrfs_pin_extent(root, bytenr, num_bytes, reserved);
 
        BUG_ON(err < 0);
        return 0;
 }
 
-
 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                u64 bytenr, u64 num_bytes, u64 parent,
@@ -3413,7 +3511,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                }
 
                ret = pin_down_bytes(trans, root, path, bytenr,
-                                    num_bytes, is_data, &must_clean);
+                                    num_bytes, is_data, 0, &must_clean);
                if (ret > 0)
                        mark_free = 1;
                BUG_ON(ret < 0);
@@ -3544,8 +3642,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
        if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
                WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
                /* unlocks the pinned mutex */
-               btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
-               update_reserved_extents(root, bytenr, num_bytes, 0);
+               btrfs_pin_extent(root, bytenr, num_bytes, 1);
                ret = 0;
        } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
                ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
@@ -3585,19 +3682,33 @@ static noinline int
 wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
                                u64 num_bytes)
 {
+       struct btrfs_caching_control *caching_ctl;
        DEFINE_WAIT(wait);
 
-       prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE);
-
-       if (block_group_cache_done(cache)) {
-               finish_wait(&cache->caching_q, &wait);
+       caching_ctl = get_caching_control(cache);
+       if (!caching_ctl)
                return 0;
-       }
-       schedule();
-       finish_wait(&cache->caching_q, &wait);
 
-       wait_event(cache->caching_q, block_group_cache_done(cache) ||
+       wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
                   (cache->free_space >= num_bytes));
+
+       put_caching_control(caching_ctl);
+       return 0;
+}
+
+static noinline int
+wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
+{
+       struct btrfs_caching_control *caching_ctl;
+       DEFINE_WAIT(wait);
+
+       caching_ctl = get_caching_control(cache);
+       if (!caching_ctl)
+               return 0;
+
+       wait_event(caching_ctl->wait, block_group_cache_done(cache));
+
+       put_caching_control(caching_ctl);
        return 0;
 }
 
@@ -3635,6 +3746,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
        int last_ptr_loop = 0;
        int loop = 0;
        bool found_uncached_bg = false;
+       bool failed_cluster_refill = false;
 
        WARN_ON(num_bytes < root->sectorsize);
        btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
@@ -3732,7 +3844,16 @@ have_block_group:
                if (unlikely(block_group->ro))
                        goto loop;
 
-               if (last_ptr) {
+               /*
+                * Ok we want to try and use the cluster allocator, so lets look
+                * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
+                * have tried the cluster allocator plenty of times at this
+                * point and not have found anything, so we are likely way too
+                * fragmented for the clustering stuff to find anything, so lets
+                * just skip it and let the allocator find whatever block it can
+                * find
+                */
+               if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) {
                        /*
                         * the refill lock keeps out other
                         * people trying to start a new cluster
@@ -3807,9 +3928,11 @@ refill_cluster:
                                        spin_unlock(&last_ptr->refill_lock);
                                        goto checks;
                                }
-                       } else if (!cached && loop > LOOP_CACHING_NOWAIT) {
+                       } else if (!cached && loop > LOOP_CACHING_NOWAIT
+                                  && !failed_cluster_refill) {
                                spin_unlock(&last_ptr->refill_lock);
 
+                               failed_cluster_refill = true;
                                wait_block_group_cache_progress(block_group,
                                       num_bytes + empty_cluster + empty_size);
                                goto have_block_group;
@@ -3821,13 +3944,9 @@ refill_cluster:
                         * cluster.  Free the cluster we've been trying
                         * to use, and go to the next block group
                         */
-                       if (loop < LOOP_NO_EMPTY_SIZE) {
-                               btrfs_return_cluster_to_free_space(NULL,
-                                                                  last_ptr);
-                               spin_unlock(&last_ptr->refill_lock);
-                               goto loop;
-                       }
+                       btrfs_return_cluster_to_free_space(NULL, last_ptr);
                        spin_unlock(&last_ptr->refill_lock);
+                       goto loop;
                }
 
                offset = btrfs_find_space_for_alloc(block_group, search_start,
@@ -3881,9 +4000,12 @@ checks:
                                             search_start - offset);
                BUG_ON(offset > search_start);
 
+               update_reserved_extents(block_group, num_bytes, 1);
+
                /* we are all good, lets return */
                break;
 loop:
+               failed_cluster_refill = false;
                btrfs_put_block_group(block_group);
        }
        up_read(&space_info->groups_sem);
@@ -3973,12 +4095,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
        up_read(&info->groups_sem);
 }
 
-static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
-                                 struct btrfs_root *root,
-                                 u64 num_bytes, u64 min_alloc_size,
-                                 u64 empty_size, u64 hint_byte,
-                                 u64 search_end, struct btrfs_key *ins,
-                                 u64 data)
+int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
+                        struct btrfs_root *root,
+                        u64 num_bytes, u64 min_alloc_size,
+                        u64 empty_size, u64 hint_byte,
+                        u64 search_end, struct btrfs_key *ins,
+                        u64 data)
 {
        int ret;
        u64 search_start = 0;
@@ -4044,25 +4166,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
        ret = btrfs_discard_extent(root, start, len);
 
        btrfs_add_free_space(cache, start, len);
+       update_reserved_extents(cache, len, 0);
        btrfs_put_block_group(cache);
-       update_reserved_extents(root, start, len, 0);
-
-       return ret;
-}
-
-int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
-                                 struct btrfs_root *root,
-                                 u64 num_bytes, u64 min_alloc_size,
-                                 u64 empty_size, u64 hint_byte,
-                                 u64 search_end, struct btrfs_key *ins,
-                                 u64 data)
-{
-       int ret;
-       ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
-                                    empty_size, hint_byte, search_end, ins,
-                                    data);
-       if (!ret)
-               update_reserved_extents(root, ins->objectid, ins->offset, 1);
 
        return ret;
 }
@@ -4223,15 +4328,46 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
 {
        int ret;
        struct btrfs_block_group_cache *block_group;
+       struct btrfs_caching_control *caching_ctl;
+       u64 start = ins->objectid;
+       u64 num_bytes = ins->offset;
 
        block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
        cache_block_group(block_group);
-       wait_event(block_group->caching_q,
-                  block_group_cache_done(block_group));
+       caching_ctl = get_caching_control(block_group);
 
-       ret = btrfs_remove_free_space(block_group, ins->objectid,
-                                     ins->offset);
-       BUG_ON(ret);
+       if (!caching_ctl) {
+               BUG_ON(!block_group_cache_done(block_group));
+               ret = btrfs_remove_free_space(block_group, start, num_bytes);
+               BUG_ON(ret);
+       } else {
+               mutex_lock(&caching_ctl->mutex);
+
+               if (start >= caching_ctl->progress) {
+                       ret = add_excluded_extent(root, start, num_bytes);
+                       BUG_ON(ret);
+               } else if (start + num_bytes <= caching_ctl->progress) {
+                       ret = btrfs_remove_free_space(block_group,
+                                                     start, num_bytes);
+                       BUG_ON(ret);
+               } else {
+                       num_bytes = caching_ctl->progress - start;
+                       ret = btrfs_remove_free_space(block_group,
+                                                     start, num_bytes);
+                       BUG_ON(ret);
+
+                       start = caching_ctl->progress;
+                       num_bytes = ins->objectid + ins->offset -
+                                   caching_ctl->progress;
+                       ret = add_excluded_extent(root, start, num_bytes);
+                       BUG_ON(ret);
+               }
+
+               mutex_unlock(&caching_ctl->mutex);
+               put_caching_control(caching_ctl);
+       }
+
+       update_reserved_extents(block_group, ins->offset, 1);
        btrfs_put_block_group(block_group);
        ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
                                         0, owner, offset, ins, 1);
@@ -4255,9 +4391,9 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
        int ret;
        u64 flags = 0;
 
-       ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
-                                    empty_size, hint_byte, search_end,
-                                    ins, 0);
+       ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
+                                  empty_size, hint_byte, search_end,
+                                  ins, 0);
        if (ret)
                return ret;
 
@@ -4268,7 +4404,6 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
        } else
                BUG_ON(parent > 0);
 
-       update_reserved_extents(root, ins->objectid, ins->offset, 1);
        if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
                struct btrfs_delayed_extent_op *extent_op;
                extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
@@ -4347,452 +4482,99 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
        return buf;
 }
 
-#if 0
-int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
-                       struct btrfs_root *root, struct extent_buffer *leaf)
+struct walk_control {
+       u64 refs[BTRFS_MAX_LEVEL];
+       u64 flags[BTRFS_MAX_LEVEL];
+       struct btrfs_key update_progress;
+       int stage;
+       int level;
+       int shared_level;
+       int update_ref;
+       int keep_locks;
+       int reada_slot;
+       int reada_count;
+};
+
+#define DROP_REFERENCE 1
+#define UPDATE_BACKREF 2
+
+static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
+                                    struct btrfs_root *root,
+                                    struct walk_control *wc,
+                                    struct btrfs_path *path)
 {
-       u64 disk_bytenr;
-       u64 num_bytes;
-       struct btrfs_key key;
-       struct btrfs_file_extent_item *fi;
+       u64 bytenr;
+       u64 generation;
+       u64 refs;
+       u64 last = 0;
        u32 nritems;
-       int i;
+       u32 blocksize;
+       struct btrfs_key key;
+       struct extent_buffer *eb;
        int ret;
+       int slot;
+       int nread = 0;
 
-       BUG_ON(!btrfs_is_leaf(leaf));
-       nritems = btrfs_header_nritems(leaf);
+       if (path->slots[wc->level] < wc->reada_slot) {
+               wc->reada_count = wc->reada_count * 2 / 3;
+               wc->reada_count = max(wc->reada_count, 2);
+       } else {
+               wc->reada_count = wc->reada_count * 3 / 2;
+               wc->reada_count = min_t(int, wc->reada_count,
+                                       BTRFS_NODEPTRS_PER_BLOCK(root));
+       }
 
-       for (i = 0; i < nritems; i++) {
-               cond_resched();
-               btrfs_item_key_to_cpu(leaf, &key, i);
+       eb = path->nodes[wc->level];
+       nritems = btrfs_header_nritems(eb);
+       blocksize = btrfs_level_size(root, wc->level - 1);
 
-               /* only extents have references, skip everything else */
-               if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
-                       continue;
-
-               fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
-
-               /* inline extents live in the btree, they don't have refs */
-               if (btrfs_file_extent_type(leaf, fi) ==
-                   BTRFS_FILE_EXTENT_INLINE)
-                       continue;
-
-               disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
-
-               /* holes don't have refs */
-               if (disk_bytenr == 0)
-                       continue;
-
-               num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
-               ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes,
-                                       leaf->start, 0, key.objectid, 0);
-               BUG_ON(ret);
-       }
-       return 0;
-}
-
-static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
-                                       struct btrfs_root *root,
-                                       struct btrfs_leaf_ref *ref)
-{
-       int i;
-       int ret;
-       struct btrfs_extent_info *info;
-       struct refsort *sorted;
-
-       if (ref->nritems == 0)
-               return 0;
-
-       sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS);
-       for (i = 0; i < ref->nritems; i++) {
-               sorted[i].bytenr = ref->extents[i].bytenr;
-               sorted[i].slot = i;
-       }
-       sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL);
+       for (slot = path->slots[wc->level]; slot < nritems; slot++) {
+               if (nread >= wc->reada_count)
+                       break;
 
-       /*
-        * the items in the ref were sorted when the ref was inserted
-        * into the ref cache, so this is already in order
-        */
-       for (i = 0; i < ref->nritems; i++) {
-               info = ref->extents + sorted[i].slot;
-               ret = btrfs_free_extent(trans, root, info->bytenr,
-                                         info->num_bytes, ref->bytenr,
-                                         ref->owner, ref->generation,
-                                         info->objectid, 0);
-
-               atomic_inc(&root->fs_info->throttle_gen);
-               wake_up(&root->fs_info->transaction_throttle);
                cond_resched();
+               bytenr = btrfs_node_blockptr(eb, slot);
+               generation = btrfs_node_ptr_generation(eb, slot);
 
-               BUG_ON(ret);
-               info++;
-       }
-
-       kfree(sorted);
-       return 0;
-}
-
-
-static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
-                                    struct btrfs_root *root, u64 start,
-                                    u64 len, u32 *refs)
-{
-       int ret;
-
-       ret = btrfs_lookup_extent_refs(trans, root, start, len, refs);
-       BUG_ON(ret);
-
-#if 0 /* some debugging code in case we see problems here */
-       /* if the refs count is one, it won't get increased again.  But
-        * if the ref count is > 1, someone may be decreasing it at
-        * the same time we are.
-        */
-       if (*refs != 1) {
-               struct extent_buffer *eb = NULL;
-               eb = btrfs_find_create_tree_block(root, start, len);
-               if (eb)
-                       btrfs_tree_lock(eb);
-
-               mutex_lock(&root->fs_info->alloc_mutex);
-               ret = lookup_extent_ref(NULL, root, start, len, refs);
-               BUG_ON(ret);
-               mutex_unlock(&root->fs_info->alloc_mutex);
-
-               if (eb) {
-                       btrfs_tree_unlock(eb);
-                       free_extent_buffer(eb);
-               }
-               if (*refs == 1) {
-                       printk(KERN_ERR "btrfs block %llu went down to one "
-                              "during drop_snap\n", (unsigned long long)start);
-               }
-
-       }
-#endif
-
-       cond_resched();
-       return ret;
-}
-
+               if (slot == path->slots[wc->level])
+                       goto reada;
 
-/*
- * this is used while deleting old snapshots, and it drops the refs
- * on a whole subtree starting from a level 1 node.
- *
- * The idea is to sort all the leaf pointers, and then drop the
- * ref on all the leaves in order.  Most of the time the leaves
- * will have ref cache entries, so no leaf IOs will be required to
- * find the extents they have references on.
- *
- * For each leaf, any references it has are also dropped in order
- *
- * This ends up dropping the references in something close to optimal
- * order for reading and modifying the extent allocation tree.
- */
-static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,
-                                       struct btrfs_root *root,
-                                       struct btrfs_path *path)
-{
-       u64 bytenr;
-       u64 root_owner;
-       u64 root_gen;
-       struct extent_buffer *eb = path->nodes[1];
-       struct extent_buffer *leaf;
-       struct btrfs_leaf_ref *ref;
-       struct refsort *sorted = NULL;
-       int nritems = btrfs_header_nritems(eb);
-       int ret;
-       int i;
-       int refi = 0;
-       int slot = path->slots[1];
-       u32 blocksize = btrfs_level_size(root, 0);
-       u32 refs;
-
-       if (nritems == 0)
-               goto out;
-
-       root_owner = btrfs_header_owner(eb);
-       root_gen = btrfs_header_generation(eb);
-       sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
-
-       /*
-        * step one, sort all the leaf pointers so we don't scribble
-        * randomly into the extent allocation tree
-        */
-       for (i = slot; i < nritems; i++) {
-               sorted[refi].bytenr = btrfs_node_blockptr(eb, i);
-               sorted[refi].slot = i;
-               refi++;
-       }
-
-       /*
-        * nritems won't be zero, but if we're picking up drop_snapshot
-        * after a crash, slot might be > 0, so double check things
-        * just in case.
-        */
-       if (refi == 0)
-               goto out;
-
-       sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
-
-       /*
-        * the first loop frees everything the leaves point to
-        */
-       for (i = 0; i < refi; i++) {
-               u64 ptr_gen;
-
-               bytenr = sorted[i].bytenr;
-
-               /*
-                * check the reference count on this leaf.  If it is > 1
-                * we just decrement it below and don't update any
-                * of the refs the leaf points to.
-                */
-               ret = drop_snap_lookup_refcount(trans, root, bytenr,
-                                               blocksize, &refs);
-               BUG_ON(ret);
-               if (refs != 1)
+               if (wc->stage == UPDATE_BACKREF &&
+                   generation <= root->root_key.offset)
                        continue;
 
-               ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot);
-
-               /*
-                * the leaf only had one reference, which means the
-                * only thing pointing to this leaf is the snapshot
-                * we're deleting.  It isn't possible for the reference
-                * count to increase again later
-                *
-                * The reference cache is checked for the leaf,
-                * and if found we'll be able to drop any refs held by
-                * the leaf without needing to read it in.
-                */
-               ref = btrfs_lookup_leaf_ref(root, bytenr);
-               if (ref && ref->generation != ptr_gen) {
-                       btrfs_free_leaf_ref(root, ref);
-                       ref = NULL;
-               }
-               if (ref) {
-                       ret = cache_drop_leaf_ref(trans, root, ref);
-                       BUG_ON(ret);
-                       btrfs_remove_leaf_ref(root, ref);
-                       btrfs_free_leaf_ref(root, ref);
-               } else {
-                       /*
-                        * the leaf wasn't in the reference cache, so
-                        * we have to read it.
-                        */
-                       leaf = read_tree_block(root, bytenr, blocksize,
-                                              ptr_gen);
-                       ret = btrfs_drop_leaf_ref(trans, root, leaf);
+               if (wc->stage == DROP_REFERENCE) {
+                       ret = btrfs_lookup_extent_info(trans, root,
+                                               bytenr, blocksize,
+                                               &refs, NULL);
                        BUG_ON(ret);
-                       free_extent_buffer(leaf);
-               }
-               atomic_inc(&root->fs_info->throttle_gen);
-               wake_up(&root->fs_info->transaction_throttle);
-               cond_resched();
-       }
-
-       /*
-        * run through the loop again to free the refs on the leaves.
-        * This is faster than doing it in the loop above because
-        * the leaves are likely to be clustered together.  We end up
-        * working in nice chunks on the extent allocation tree.
-        */
-       for (i = 0; i < refi; i++) {
-               bytenr = sorted[i].bytenr;
-               ret = btrfs_free_extent(trans, root, bytenr,
-                                       blocksize, eb->start,
-                                       root_owner, root_gen, 0, 1);
-               BUG_ON(ret);
+                       BUG_ON(refs == 0);
+                       if (refs == 1)
+                               goto reada;
 
-               atomic_inc(&root->fs_info->throttle_gen);
-               wake_up(&root->fs_info->transaction_throttle);
-               cond_resched();
-       }
-out:
-       kfree(sorted);
-
-       /*
-        * update the path to show we've processed the entire level 1
-        * node.  This will get saved into the root's drop_snapshot_progress
-        * field so these drops are not repeated again if this transaction
-        * commits.
-        */
-       path->slots[1] = nritems;
-       return 0;
-}
-
-/*
- * helper function for drop_snapshot, this walks down the tree dropping ref
- * counts as it goes.
- */
-static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
-                                  struct btrfs_root *root,
-                                  struct btrfs_path *path, int *level)
-{
-       u64 root_owner;
-       u64 root_gen;
-       u64 bytenr;
-       u64 ptr_gen;
-       struct extent_buffer *next;
-       struct extent_buffer *cur;
-       struct extent_buffer *parent;
-       u32 blocksize;
-       int ret;
-       u32 refs;
-
-       WARN_ON(*level < 0);
-       WARN_ON(*level >= BTRFS_MAX_LEVEL);
-       ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start,
-                               path->nodes[*level]->len, &refs);
-       BUG_ON(ret);
-       if (refs > 1)
-               goto out;
-
-       /*
-        * walk down to the last node level and free all the leaves
-        */
-       while (*level >= 0) {
-               WARN_ON(*level < 0);
-               WARN_ON(*level >= BTRFS_MAX_LEVEL);
-               cur = path->nodes[*level];
-
-               if (btrfs_header_level(cur) != *level)
-                       WARN_ON(1);
-
-               if (path->slots[*level] >=
-                   btrfs_header_nritems(cur))
-                       break;
-
-               /* the new code goes down to level 1 and does all the
-                * leaves pointed to that node in bulk.  So, this check
-                * for level 0 will always be false.
-                *
-                * But, the disk format allows the drop_snapshot_progress
-                * field in the root to leave things in a state where
-                * a leaf will need cleaning up here.  If someone crashes
-                * with the old code and then boots with the new code,
-                * we might find a leaf here.
-                */
-               if (*level == 0) {
-                       ret = btrfs_drop_leaf_ref(trans, root, cur);
-                       BUG_ON(ret);
-                       break;
+                       if (!wc->update_ref ||
+                           generation <= root->root_key.offset)
+                               continue;
+                       btrfs_node_key_to_cpu(eb, &key, slot);
+                       ret = btrfs_comp_cpu_keys(&key,
+                                                 &wc->update_progress);
+                       if (ret < 0)
+                               continue;
                }
-
-               /*
-                * once we get to level one, process the whole node
-                * at once, including everything below it.
-                */
-               if (*level == 1) {
-                       ret = drop_level_one_refs(trans, root, path);
-                       BUG_ON(ret);
+reada:
+               ret = readahead_tree_block(root, bytenr, blocksize,
+                                          generation);
+               if (ret)
                        break;
-               }
-
-               bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
-               ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
-               blocksize = btrfs_level_size(root, *level - 1);
-
-               ret = drop_snap_lookup_refcount(trans, root, bytenr,
-                                               blocksize, &refs);
-               BUG_ON(ret);
-
-               /*
-                * if there is more than one reference, we don't need
-                * to read that node to drop any references it has.  We
-                * just drop the ref we hold on that node and move on to the
-                * next slot in this level.
-                */
-               if (refs != 1) {
-                       parent = path->nodes[*level];
-                       root_owner = btrfs_header_owner(parent);
-                       root_gen = btrfs_header_generation(parent);
-                       path->slots[*level]++;
-
-                       ret = btrfs_free_extent(trans, root, bytenr,
-                                               blocksize, parent->start,
-                                               root_owner, root_gen,
-                                               *level - 1, 1);
-                       BUG_ON(ret);
-
-                       atomic_inc(&root->fs_info->throttle_gen);
-                       wake_up(&root->fs_info->transaction_throttle);
-                       cond_resched();
-
-                       continue;
-               }
-
-               /*
-                * we need to keep freeing things in the next level down.
-                * read the block and loop around to process it
-                */
-               next = read_tree_block(root, bytenr, blocksize, ptr_gen);
-               WARN_ON(*level <= 0);
-               if (path->nodes[*level-1])
-                       free_extent_buffer(path->nodes[*level-1]);
-               path->nodes[*level-1] = next;
-               *level = btrfs_header_level(next);
-               path->slots[*level] = 0;
-               cond_resched();
+               last = bytenr + blocksize;
+               nread++;
        }
-out:
-       WARN_ON(*level < 0);
-       WARN_ON(*level >= BTRFS_MAX_LEVEL);
-
-       if (path->nodes[*level] == root->node) {
-               parent = path->nodes[*level];
-               bytenr = path->nodes[*level]->start;
-       } else {
-               parent = path->nodes[*level + 1];
-               bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
-       }
-
-       blocksize = btrfs_level_size(root, *level);
-       root_owner = btrfs_header_owner(parent);
-       root_gen = btrfs_header_generation(parent);
-
-       /*
-        * cleanup and free the reference on the last node
-        * we processed
-        */
-       ret = btrfs_free_extent(trans, root, bytenr, blocksize,
-                                 parent->start, root_owner, root_gen,
-                                 *level, 1);
-       free_extent_buffer(path->nodes[*level]);
-       path->nodes[*level] = NULL;
-
-       *level += 1;
-       BUG_ON(ret);
-
-       cond_resched();
-       return 0;
+       wc->reada_slot = slot;
 }
-#endif
-
-struct walk_control {
-       u64 refs[BTRFS_MAX_LEVEL];
-       u64 flags[BTRFS_MAX_LEVEL];
-       struct btrfs_key update_progress;
-       int stage;
-       int level;
-       int shared_level;
-       int update_ref;
-       int keep_locks;
-};
-
-#define DROP_REFERENCE 1
-#define UPDATE_BACKREF 2
 
 /*
  * hepler to process tree block while walking down the tree.
  *
- * when wc->stage == DROP_REFERENCE, this function checks
- * reference count of the block. if the block is shared and
- * we need update back refs for the subtree rooted at the
- * block, this function changes wc->stage to UPDATE_BACKREF
- *
  * when wc->stage == UPDATE_BACKREF, this function updates
  * back refs for pointers in the block.
  *
@@ -4805,7 +4587,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
 {
        int level = wc->level;
        struct extent_buffer *eb = path->nodes[level];
-       struct btrfs_key key;
        u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
        int ret;
 
@@ -4828,21 +4609,6 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
                BUG_ON(wc->refs[level] == 0);
        }
 
-       if (wc->stage == DROP_REFERENCE &&
-           wc->update_ref && wc->refs[level] > 1) {
-               BUG_ON(eb == root->node);
-               BUG_ON(path->slots[level] > 0);
-               if (level == 0)
-                       btrfs_item_key_to_cpu(eb, &key, path->slots[level]);
-               else
-                       btrfs_node_key_to_cpu(eb, &key, path->slots[level]);
-               if (btrfs_header_owner(eb) == root->root_key.objectid &&
-                   btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) {
-                       wc->stage = UPDATE_BACKREF;
-                       wc->shared_level = level;
-               }
-       }
-
        if (wc->stage == DROP_REFERENCE) {
                if (wc->refs[level] > 1)
                        return 1;
@@ -4878,6 +4644,123 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+/*
+ * hepler to process tree block pointer.
+ *
+ * when wc->stage == DROP_REFERENCE, this function checks
+ * reference count of the block pointed to. if the block
+ * is shared and we need update back refs for the subtree
+ * rooted at the block, this function changes wc->stage to
+ * UPDATE_BACKREF. if the block is shared and there is no
+ * need to update back, this function drops the reference
+ * to the block.
+ *
+ * NOTE: return value 1 means we should stop walking down.
+ */
+static noinline int do_walk_down(struct btrfs_trans_handle *trans,
+                                struct btrfs_root *root,
+                                struct btrfs_path *path,
+                                struct walk_control *wc)
+{
+       u64 bytenr;
+       u64 generation;
+       u64 parent;
+       u32 blocksize;
+       struct btrfs_key key;
+       struct extent_buffer *next;
+       int level = wc->level;
+       int reada = 0;
+       int ret = 0;
+
+       generation = btrfs_node_ptr_generation(path->nodes[level],
+                                              path->slots[level]);
+       /*
+        * if the lower level block was created before the snapshot
+        * was created, we know there is no need to update back refs
+        * for the subtree
+        */
+       if (wc->stage == UPDATE_BACKREF &&
+           generation <= root->root_key.offset)
+               return 1;
+
+       bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
+       blocksize = btrfs_level_size(root, level - 1);
+
+       next = btrfs_find_tree_block(root, bytenr, blocksize);
+       if (!next) {
+               next = btrfs_find_create_tree_block(root, bytenr, blocksize);
+               reada = 1;
+       }
+       btrfs_tree_lock(next);
+       btrfs_set_lock_blocking(next);
+
+       if (wc->stage == DROP_REFERENCE) {
+               ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
+                                              &wc->refs[level - 1],
+                                              &wc->flags[level - 1]);
+               BUG_ON(ret);
+               BUG_ON(wc->refs[level - 1] == 0);
+
+               if (wc->refs[level - 1] > 1) {
+                       if (!wc->update_ref ||
+                           generation <= root->root_key.offset)
+                               goto skip;
+
+                       btrfs_node_key_to_cpu(path->nodes[level], &key,
+                                             path->slots[level]);
+                       ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
+                       if (ret < 0)
+                               goto skip;
+
+                       wc->stage = UPDATE_BACKREF;
+                       wc->shared_level = level - 1;
+               }
+       }
+
+       if (!btrfs_buffer_uptodate(next, generation)) {
+               btrfs_tree_unlock(next);
+               free_extent_buffer(next);
+               next = NULL;
+       }
+
+       if (!next) {
+               if (reada && level == 1)
+                       reada_walk_down(trans, root, wc, path);
+               next = read_tree_block(root, bytenr, blocksize, generation);
+               btrfs_tree_lock(next);
+               btrfs_set_lock_blocking(next);
+       }
+
+       level--;
+       BUG_ON(level != btrfs_header_level(next));
+       path->nodes[level] = next;
+       path->slots[level] = 0;
+       path->locks[level] = 1;
+       wc->level = level;
+       if (wc->level == 1)
+               wc->reada_slot = 0;
+       return 0;
+skip:
+       wc->refs[level - 1] = 0;
+       wc->flags[level - 1] = 0;
+
+       if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
+               parent = path->nodes[level]->start;
+       } else {
+               BUG_ON(root->root_key.objectid !=
+                      btrfs_header_owner(path->nodes[level]));
+               parent = 0;
+       }
+
+       ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
+                               root->root_key.objectid, level - 1, 0);
+       BUG_ON(ret);
+
+       btrfs_tree_unlock(next);
+       free_extent_buffer(next);
+       return 1;
+}
+
 /*
  * hepler to process tree block while walking up the tree.
  *
@@ -4905,7 +4788,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                if (level < wc->shared_level)
                        goto out;
 
-               BUG_ON(wc->refs[level] <= 1);
                ret = find_next_key(path, level + 1, &wc->update_progress);
                if (ret > 0)
                        wc->update_ref = 0;
@@ -4936,8 +4818,6 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                                path->locks[level] = 0;
                                return 1;
                        }
-               } else {
-                       BUG_ON(level != 0);
                }
        }
 
@@ -4990,17 +4870,13 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
                                   struct btrfs_path *path,
                                   struct walk_control *wc)
 {
-       struct extent_buffer *next;
-       struct extent_buffer *cur;
-       u64 bytenr;
-       u64 ptr_gen;
-       u32 blocksize;
        int level = wc->level;
        int ret;
 
        while (level >= 0) {
-               cur = path->nodes[level];
-               BUG_ON(path->slots[level] >= btrfs_header_nritems(cur));
+               if (path->slots[level] >=
+                   btrfs_header_nritems(path->nodes[level]))
+                       break;
 
                ret = walk_down_proc(trans, root, path, wc);
                if (ret > 0)
@@ -5009,20 +4885,12 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
                if (level == 0)
                        break;
 
-               bytenr = btrfs_node_blockptr(cur, path->slots[level]);
-               blocksize = btrfs_level_size(root, level - 1);
-               ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]);
-
-               next = read_tree_block(root, bytenr, blocksize, ptr_gen);
-               btrfs_tree_lock(next);
-               btrfs_set_lock_blocking(next);
-
-               level--;
-               BUG_ON(level != btrfs_header_level(next));
-               path->nodes[level] = next;
-               path->slots[level] = 0;
-               path->locks[level] = 1;
-               wc->level = level;
+               ret = do_walk_down(trans, root, path, wc);
+               if (ret > 0) {
+                       path->slots[level]++;
+                       continue;
+               }
+               level = wc->level;
        }
        return 0;
 }
@@ -5112,9 +4980,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
                        err = ret;
                        goto out;
                }
-               btrfs_node_key_to_cpu(path->nodes[level], &key,
-                                     path->slots[level]);
-               WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key)));
+               WARN_ON(ret > 0);
 
                /*
                 * unlock our path, this is safe because only this
@@ -5149,6 +5015,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
        wc->stage = DROP_REFERENCE;
        wc->update_ref = update_ref;
        wc->keep_locks = 0;
+       wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
 
        while (1) {
                ret = walk_down_tree(trans, root, path, wc);
@@ -5201,9 +5068,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
        ret = btrfs_del_root(trans, tree_root, &root->root_key);
        BUG_ON(ret);
 
-       free_extent_buffer(root->node);
-       free_extent_buffer(root->commit_root);
-       kfree(root);
+       if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
+               ret = btrfs_find_last_root(tree_root, root->root_key.objectid,
+                                          NULL, NULL);
+               BUG_ON(ret < 0);
+               if (ret > 0) {
+                       ret = btrfs_del_orphan_item(trans, tree_root,
+                                                   root->root_key.objectid);
+                       BUG_ON(ret);
+               }
+       }
+
+       if (root->in_radix) {
+               btrfs_free_fs_root(tree_root->fs_info, root);
+       } else {
+               free_extent_buffer(root->node);
+               free_extent_buffer(root->commit_root);
+               kfree(root);
+       }
 out:
        btrfs_end_transaction(trans, tree_root);
        kfree(wc);
@@ -5255,6 +5137,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
        wc->stage = DROP_REFERENCE;
        wc->update_ref = 0;
        wc->keep_locks = 1;
+       wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
 
        while (1) {
                wret = walk_down_tree(trans, root, path, wc);
@@ -5397,9 +5280,9 @@ static noinline int relocate_data_extent(struct inode *reloc_inode,
        lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
        while (1) {
                int ret;
-               spin_lock(&em_tree->lock);
+               write_lock(&em_tree->lock);
                ret = add_extent_mapping(em_tree, em);
-               spin_unlock(&em_tree->lock);
+               write_unlock(&em_tree->lock);
                if (ret != -EEXIST) {
                        free_extent_map(em);
                        break;
@@ -6842,287 +6725,86 @@ int btrfs_prepare_block_group_relocation(struct btrfs_root *root,
        return 0;
 }
 
-#if 0
-static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
-                                struct btrfs_root *root,
-                                u64 objectid, u64 size)
-{
-       struct btrfs_path *path;
-       struct btrfs_inode_item *item;
-       struct extent_buffer *leaf;
-       int ret;
-
-       path = btrfs_alloc_path();
-       if (!path)
-               return -ENOMEM;
-
-       path->leave_spinning = 1;
-       ret = btrfs_insert_empty_inode(trans, root, path, objectid);
-       if (ret)
-               goto out;
-
-       leaf = path->nodes[0];
-       item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
-       memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
-       btrfs_set_inode_generation(leaf, item, 1);
-       btrfs_set_inode_size(leaf, item, size);
-       btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
-       btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
-       btrfs_mark_buffer_dirty(leaf);
-       btrfs_release_path(root, path);
-out:
-       btrfs_free_path(path);
-       return ret;
-}
-
-static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
-                                       struct btrfs_block_group_cache *group)
+/*
+ * checks to see if its even possible to relocate this block group.
+ *
+ * @return - -1 if it's not a good idea to relocate this block group, 0 if its
+ * ok to go ahead and try.
+ */
+int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
 {
-       struct inode *inode = NULL;
-       struct btrfs_trans_handle *trans;
-       struct btrfs_root *root;
-       struct btrfs_key root_key;
-       u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
-       int err = 0;
+       struct btrfs_block_group_cache *block_group;
+       struct btrfs_space_info *space_info;
+       struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
+       struct btrfs_device *device;
+       int full = 0;
+       int ret = 0;
 
-       root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
-       root_key.type = BTRFS_ROOT_ITEM_KEY;
-       root_key.offset = (u64)-1;
-       root = btrfs_read_fs_root_no_name(fs_info, &root_key);
-       if (IS_ERR(root))
-               return ERR_CAST(root);
+       block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
 
-       trans = btrfs_start_transaction(root, 1);
-       BUG_ON(!trans);
+       /* odd, couldn't find the block group, leave it alone */
+       if (!block_group)
+               return -1;
 
-       err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
-       if (err)
+       /* no bytes used, we're good */
+       if (!btrfs_block_group_used(&block_group->item))
                goto out;
 
-       err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
-       BUG_ON(err);
-
-       err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
-                                      group->key.offset, 0, group->key.offset,
-                                      0, 0, 0);
-       BUG_ON(err);
-
-       inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
-       if (inode->i_state & I_NEW) {
-               BTRFS_I(inode)->root = root;
-               BTRFS_I(inode)->location.objectid = objectid;
-               BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
-               BTRFS_I(inode)->location.offset = 0;
-               btrfs_read_locked_inode(inode);
-               unlock_new_inode(inode);
-               BUG_ON(is_bad_inode(inode));
-       } else {
-               BUG_ON(1);
-       }
-       BTRFS_I(inode)->index_cnt = group->key.objectid;
-
-       err = btrfs_orphan_add(trans, inode);
-out:
-       btrfs_end_transaction(trans, root);
-       if (err) {
-               if (inode)
-                       iput(inode);
-               inode = ERR_PTR(err);
-       }
-       return inode;
-}
-
-int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
-{
-
-       struct btrfs_ordered_sum *sums;
-       struct btrfs_sector_sum *sector_sum;
-       struct btrfs_ordered_extent *ordered;
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct list_head list;
-       size_t offset;
-       int ret;
-       u64 disk_bytenr;
-
-       INIT_LIST_HEAD(&list);
-
-       ordered = btrfs_lookup_ordered_extent(inode, file_pos);
-       BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
-
-       disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
-       ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
-                                      disk_bytenr + len - 1, &list);
-
-       while (!list_empty(&list)) {
-               sums = list_entry(list.next, struct btrfs_ordered_sum, list);
-               list_del_init(&sums->list);
-
-               sector_sum = sums->sums;
-               sums->bytenr = ordered->start;
+       space_info = block_group->space_info;
+       spin_lock(&space_info->lock);
 
-               offset = 0;
-               while (offset < sums->len) {
-                       sector_sum->bytenr += ordered->start - disk_bytenr;
-                       sector_sum++;
-                       offset += root->sectorsize;
-               }
+       full = space_info->full;
 
-               btrfs_add_ordered_sum(inode, ordered, sums);
+       /*
+        * if this is the last block group we have in this space, we can't
+        * relocate it unless we're able to allocate a new chunk below.
+        *
+        * Otherwise, we need to make sure we have room in the space to handle
+        * all of the extents from this block group.  If we can, we're good
+        */
+       if ((space_info->total_bytes != block_group->key.offset) &&
+          (space_info->bytes_used + space_info->bytes_reserved +
+           space_info->bytes_pinned + space_info->bytes_readonly +
+           btrfs_block_group_used(&block_group->item) <
+           space_info->total_bytes)) {
+               spin_unlock(&space_info->lock);
+               goto out;
        }
-       btrfs_put_ordered_extent(ordered);
-       return 0;
-}
-
-int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
-{
-       struct btrfs_trans_handle *trans;
-       struct btrfs_path *path;
-       struct btrfs_fs_info *info = root->fs_info;
-       struct extent_buffer *leaf;
-       struct inode *reloc_inode;
-       struct btrfs_block_group_cache *block_group;
-       struct btrfs_key key;
-       u64 skipped;
-       u64 cur_byte;
-       u64 total_found;
-       u32 nritems;
-       int ret;
-       int progress;
-       int pass = 0;
-
-       root = root->fs_info->extent_root;
-
-       block_group = btrfs_lookup_block_group(info, group_start);
-       BUG_ON(!block_group);
-
-       printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n",
-              (unsigned long long)block_group->key.objectid,
-              (unsigned long long)block_group->flags);
-
-       path = btrfs_alloc_path();
-       BUG_ON(!path);
-
-       reloc_inode = create_reloc_inode(info, block_group);
-       BUG_ON(IS_ERR(reloc_inode));
-
-       __alloc_chunk_for_shrink(root, block_group, 1);
-       set_block_group_readonly(block_group);
-
-       btrfs_start_delalloc_inodes(info->tree_root);
-       btrfs_wait_ordered_extents(info->tree_root, 0);
-again:
-       skipped = 0;
-       total_found = 0;
-       progress = 0;
-       key.objectid = block_group->key.objectid;
-       key.offset = 0;
-       key.type = 0;
-       cur_byte = key.objectid;
-
-       trans = btrfs_start_transaction(info->tree_root, 1);
-       btrfs_commit_transaction(trans, info->tree_root);
+       spin_unlock(&space_info->lock);
 
-       mutex_lock(&root->fs_info->cleaner_mutex);
-       btrfs_clean_old_snapshots(info->tree_root);
-       btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);
-       mutex_unlock(&root->fs_info->cleaner_mutex);
+       /*
+        * ok we don't have enough space, but maybe we have free space on our
+        * devices to allocate new chunks for relocation, so loop through our
+        * alloc devices and guess if we have enough space.  However, if we
+        * were marked as full, then we know there aren't enough chunks, and we
+        * can just return.
+        */
+       ret = -1;
+       if (full)
+               goto out;
 
-       trans = btrfs_start_transaction(info->tree_root, 1);
-       btrfs_commit_transaction(trans, info->tree_root);
+       mutex_lock(&root->fs_info->chunk_mutex);
+       list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
+               u64 min_free = btrfs_block_group_used(&block_group->item);
+               u64 dev_offset, max_avail;
 
-       while (1) {
-               ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-               if (ret < 0)
-                       goto out;
-next:
-               leaf = path->nodes[0];
-               nritems = btrfs_header_nritems(leaf);
-               if (path->slots[0] >= nritems) {
-                       ret = btrfs_next_leaf(root, path);
-                       if (ret < 0)
-                               goto out;
-                       if (ret == 1) {
-                               ret = 0;
+               /*
+                * check to make sure we can actually find a chunk with enough
+                * space to fit our block group in.
+                */
+               if (device->total_bytes > device->bytes_used + min_free) {
+                       ret = find_free_dev_extent(NULL, device, min_free,
+                                                  &dev_offset, &max_avail);
+                       if (!ret)
                                break;
-                       }
-                       leaf = path->nodes[0];
-                       nritems = btrfs_header_nritems(leaf);
-               }
-
-               btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-
-               if (key.objectid >= block_group->key.objectid +
-                   block_group->key.offset)
-                       break;
-
-               if (progress && need_resched()) {
-                       btrfs_release_path(root, path);
-                       cond_resched();
-                       progress = 0;
-                       continue;
+                       ret = -1;
                }
-               progress = 1;
-
-               if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY ||
-                   key.objectid + key.offset <= cur_byte) {
-                       path->slots[0]++;
-                       goto next;
-               }
-
-               total_found++;
-               cur_byte = key.objectid + key.offset;
-               btrfs_release_path(root, path);
-
-               __alloc_chunk_for_shrink(root, block_group, 0);
-               ret = relocate_one_extent(root, path, &key, block_group,
-                                         reloc_inode, pass);
-               BUG_ON(ret < 0);
-               if (ret > 0)
-                       skipped++;
-
-               key.objectid = cur_byte;
-               key.type = 0;
-               key.offset = 0;
        }
-
-       btrfs_release_path(root, path);
-
-       if (pass == 0) {
-               btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1);
-               invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1);
-       }
-
-       if (total_found > 0) {
-               printk(KERN_INFO "btrfs found %llu extents in pass %d\n",
-                      (unsigned long long)total_found, pass);
-               pass++;
-               if (total_found == skipped && pass > 2) {
-                       iput(reloc_inode);
-                       reloc_inode = create_reloc_inode(info, block_group);
-                       pass = 0;
-               }
-               goto again;
-       }
-
-       /* delete reloc_inode */
-       iput(reloc_inode);
-
-       /* unpin extents in this range */
-       trans = btrfs_start_transaction(info->tree_root, 1);
-       btrfs_commit_transaction(trans, info->tree_root);
-
-       spin_lock(&block_group->lock);
-       WARN_ON(block_group->pinned > 0);
-       WARN_ON(block_group->reserved > 0);
-       WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
-       spin_unlock(&block_group->lock);
-       btrfs_put_block_group(block_group);
-       ret = 0;
+       mutex_unlock(&root->fs_info->chunk_mutex);
 out:
-       btrfs_free_path(path);
+       btrfs_put_block_group(block_group);
        return ret;
 }
-#endif
 
 static int find_first_block_group(struct btrfs_root *root,
                struct btrfs_path *path, struct btrfs_key *key)
@@ -7165,8 +6847,18 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
 {
        struct btrfs_block_group_cache *block_group;
        struct btrfs_space_info *space_info;
+       struct btrfs_caching_control *caching_ctl;
        struct rb_node *n;
 
+       down_write(&info->extent_commit_sem);
+       while (!list_empty(&info->caching_block_groups)) {
+               caching_ctl = list_entry(info->caching_block_groups.next,
+                                        struct btrfs_caching_control, list);
+               list_del(&caching_ctl->list);
+               put_caching_control(caching_ctl);
+       }
+       up_write(&info->extent_commit_sem);
+
        spin_lock(&info->block_group_cache_lock);
        while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
                block_group = rb_entry(n, struct btrfs_block_group_cache,
@@ -7180,8 +6872,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
                up_write(&block_group->space_info->groups_sem);
 
                if (block_group->cached == BTRFS_CACHE_STARTED)
-                       wait_event(block_group->caching_q,
-                                  block_group_cache_done(block_group));
+                       wait_block_group_cache_done(block_group);
 
                btrfs_remove_free_space_cache(block_group);
 
@@ -7251,7 +6942,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                spin_lock_init(&cache->lock);
                spin_lock_init(&cache->tree_lock);
                cache->fs_info = info;
-               init_waitqueue_head(&cache->caching_q);
                INIT_LIST_HEAD(&cache->list);
                INIT_LIST_HEAD(&cache->cluster_list);
 
@@ -7273,8 +6963,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                cache->flags = btrfs_block_group_flags(&cache->item);
                cache->sectorsize = root->sectorsize;
 
-               remove_sb_from_cache(root, cache);
-
                /*
                 * check for two cases, either we are full, and therefore
                 * don't need to bother with the caching work since we won't
@@ -7283,13 +6971,19 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                 * time, particularly in the full case.
                 */
                if (found_key.offset == btrfs_block_group_used(&cache->item)) {
+                       exclude_super_stripes(root, cache);
+                       cache->last_byte_to_unpin = (u64)-1;
                        cache->cached = BTRFS_CACHE_FINISHED;
+                       free_excluded_extents(root, cache);
                } else if (btrfs_block_group_used(&cache->item) == 0) {
+                       exclude_super_stripes(root, cache);
+                       cache->last_byte_to_unpin = (u64)-1;
                        cache->cached = BTRFS_CACHE_FINISHED;
                        add_new_free_space(cache, root->fs_info,
                                           found_key.objectid,
                                           found_key.objectid +
                                           found_key.offset);
+                       free_excluded_extents(root, cache);
                }
 
                ret = update_space_info(info, cache->flags, found_key.offset,
@@ -7297,6 +6991,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                                        &space_info);
                BUG_ON(ret);
                cache->space_info = space_info;
+               spin_lock(&cache->space_info->lock);
+               cache->space_info->bytes_super += cache->bytes_super;
+               spin_unlock(&cache->space_info->lock);
+
                down_write(&space_info->groups_sem);
                list_add_tail(&cache->list, &space_info->block_groups);
                up_write(&space_info->groups_sem);
@@ -7346,7 +7044,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
        atomic_set(&cache->count, 1);
        spin_lock_init(&cache->lock);
        spin_lock_init(&cache->tree_lock);
-       init_waitqueue_head(&cache->caching_q);
        INIT_LIST_HEAD(&cache->list);
        INIT_LIST_HEAD(&cache->cluster_list);
 
@@ -7355,15 +7052,23 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
        cache->flags = type;
        btrfs_set_block_group_flags(&cache->item, type);
 
+       cache->last_byte_to_unpin = (u64)-1;
        cache->cached = BTRFS_CACHE_FINISHED;
-       remove_sb_from_cache(root, cache);
+       exclude_super_stripes(root, cache);
 
        add_new_free_space(cache, root->fs_info, chunk_offset,
                           chunk_offset + size);
 
+       free_excluded_extents(root, cache);
+
        ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
                                &cache->space_info);
        BUG_ON(ret);
+
+       spin_lock(&cache->space_info->lock);
+       cache->space_info->bytes_super += cache->bytes_super;
+       spin_unlock(&cache->space_info->lock);
+
        down_write(&cache->space_info->groups_sem);
        list_add_tail(&cache->list, &cache->space_info->block_groups);
        up_write(&cache->space_info->groups_sem);
@@ -7429,8 +7134,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        up_write(&block_group->space_info->groups_sem);
 
        if (block_group->cached == BTRFS_CACHE_STARTED)
-               wait_event(block_group->caching_q,
-                          block_group_cache_done(block_group));
+               wait_block_group_cache_done(block_group);
 
        btrfs_remove_free_space_cache(block_group);
 
index 6826018..0cb88f8 100644 (file)
@@ -367,10 +367,10 @@ static int insert_state(struct extent_io_tree *tree,
        }
        if (bits & EXTENT_DIRTY)
                tree->dirty_bytes += end - start + 1;
-       set_state_cb(tree, state, bits);
-       state->state |= bits;
        state->start = start;
        state->end = end;
+       set_state_cb(tree, state, bits);
+       state->state |= bits;
        node = tree_insert(&tree->state, end, &state->rb_node);
        if (node) {
                struct extent_state *found;
@@ -471,10 +471,14 @@ static int clear_state_bit(struct extent_io_tree *tree,
  * bits were already set, or zero if none of the bits were already set.
  */
 int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
-                    int bits, int wake, int delete, gfp_t mask)
+                    int bits, int wake, int delete,
+                    struct extent_state **cached_state,
+                    gfp_t mask)
 {
        struct extent_state *state;
+       struct extent_state *cached;
        struct extent_state *prealloc = NULL;
+       struct rb_node *next_node;
        struct rb_node *node;
        u64 last_end;
        int err;
@@ -488,6 +492,17 @@ again:
        }
 
        spin_lock(&tree->lock);
+       if (cached_state) {
+               cached = *cached_state;
+               *cached_state = NULL;
+               cached_state = NULL;
+               if (cached && cached->tree && cached->start == start) {
+                       atomic_dec(&cached->refs);
+                       state = cached;
+                       goto hit_next;
+               }
+               free_extent_state(cached);
+       }
        /*
         * this search will find the extents that end after
         * our range starts
@@ -496,6 +511,7 @@ again:
        if (!node)
                goto out;
        state = rb_entry(node, struct extent_state, rb_node);
+hit_next:
        if (state->start > end)
                goto out;
        WARN_ON(state->end < start);
@@ -531,8 +547,6 @@ again:
                        if (last_end == (u64)-1)
                                goto out;
                        start = last_end + 1;
-               } else {
-                       start = state->start;
                }
                goto search_again;
        }
@@ -550,16 +564,28 @@ again:
 
                if (wake)
                        wake_up(&state->wq);
+
                set |= clear_state_bit(tree, prealloc, bits,
                                       wake, delete);
                prealloc = NULL;
                goto out;
        }
 
+       if (state->end < end && prealloc && !need_resched())
+               next_node = rb_next(&state->rb_node);
+       else
+               next_node = NULL;
+
        set |= clear_state_bit(tree, state, bits, wake, delete);
        if (last_end == (u64)-1)
                goto out;
        start = last_end + 1;
+       if (start <= end && next_node) {
+               state = rb_entry(next_node, struct extent_state,
+                                rb_node);
+               if (state->start == start)
+                       goto hit_next;
+       }
        goto search_again;
 
 out:
@@ -653,28 +679,40 @@ static void set_state_bits(struct extent_io_tree *tree,
        state->state |= bits;
 }
 
+static void cache_state(struct extent_state *state,
+                       struct extent_state **cached_ptr)
+{
+       if (cached_ptr && !(*cached_ptr)) {
+               if (state->state & (EXTENT_IOBITS | EXTENT_BOUNDARY)) {
+                       *cached_ptr = state;
+                       atomic_inc(&state->refs);
+               }
+       }
+}
+
 /*
- * set some bits on a range in the tree.  This may require allocations
- * or sleeping, so the gfp mask is used to indicate what is allowed.
+ * set some bits on a range in the tree.  This may require allocations or
+ * sleeping, so the gfp mask is used to indicate what is allowed.
  *
- * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
- * range already has the desired bits set.  The start of the existing
- * range is returned in failed_start in this case.
+ * If any of the exclusive bits are set, this will fail with -EEXIST if some
+ * part of the range already has the desired bits set.  The start of the
+ * existing range is returned in failed_start in this case.
  *
- * [start, end] is inclusive
- * This takes the tree lock.
+ * [start, end] is inclusive This takes the tree lock.
  */
+
 static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
-                         int bits, int exclusive, u64 *failed_start,
+                         int bits, int exclusive_bits, u64 *failed_start,
+                         struct extent_state **cached_state,
                          gfp_t mask)
 {
        struct extent_state *state;
        struct extent_state *prealloc = NULL;
        struct rb_node *node;
        int err = 0;
-       int set;
        u64 last_start;
        u64 last_end;
+
 again:
        if (!prealloc && (mask & __GFP_WAIT)) {
                prealloc = alloc_extent_state(mask);
@@ -683,6 +721,13 @@ again:
        }
 
        spin_lock(&tree->lock);
+       if (cached_state && *cached_state) {
+               state = *cached_state;
+               if (state->start == start && state->tree) {
+                       node = &state->rb_node;
+                       goto hit_next;
+               }
+       }
        /*
         * this search will find all the extents that end after
         * our range starts.
@@ -694,8 +739,8 @@ again:
                BUG_ON(err == -EEXIST);
                goto out;
        }
-
        state = rb_entry(node, struct extent_state, rb_node);
+hit_next:
        last_start = state->start;
        last_end = state->end;
 
@@ -706,17 +751,29 @@ again:
         * Just lock what we found and keep going
         */
        if (state->start == start && state->end <= end) {
-               set = state->state & bits;
-               if (set && exclusive) {
+               struct rb_node *next_node;
+               if (state->state & exclusive_bits) {
                        *failed_start = state->start;
                        err = -EEXIST;
                        goto out;
                }
+
                set_state_bits(tree, state, bits);
+               cache_state(state, cached_state);
                merge_state(tree, state);
                if (last_end == (u64)-1)
                        goto out;
+
                start = last_end + 1;
+               if (start < end && prealloc && !need_resched()) {
+                       next_node = rb_next(node);
+                       if (next_node) {
+                               state = rb_entry(next_node, struct extent_state,
+                                                rb_node);
+                               if (state->start == start)
+                                       goto hit_next;
+                       }
+               }
                goto search_again;
        }
 
@@ -737,8 +794,7 @@ again:
         * desired bit on it.
         */
        if (state->start < start) {
-               set = state->state & bits;
-               if (exclusive && set) {
+               if (state->state & exclusive_bits) {
                        *failed_start = start;
                        err = -EEXIST;
                        goto out;
@@ -750,12 +806,11 @@ again:
                        goto out;
                if (state->end <= end) {
                        set_state_bits(tree, state, bits);
+                       cache_state(state, cached_state);
                        merge_state(tree, state);
                        if (last_end == (u64)-1)
                                goto out;
                        start = last_end + 1;
-               } else {
-                       start = state->start;
                }
                goto search_again;
        }
@@ -774,6 +829,7 @@ again:
                        this_end = last_start - 1;
                err = insert_state(tree, prealloc, start, this_end,
                                   bits);
+               cache_state(prealloc, cached_state);
                prealloc = NULL;
                BUG_ON(err == -EEXIST);
                if (err)
@@ -788,8 +844,7 @@ again:
         * on the first half
         */
        if (state->start <= end && state->end > end) {
-               set = state->state & bits;
-               if (exclusive && set) {
+               if (state->state & exclusive_bits) {
                        *failed_start = start;
                        err = -EEXIST;
                        goto out;
@@ -798,6 +853,7 @@ again:
                BUG_ON(err == -EEXIST);
 
                set_state_bits(tree, prealloc, bits);
+               cache_state(prealloc, cached_state);
                merge_state(tree, prealloc);
                prealloc = NULL;
                goto out;
@@ -826,86 +882,64 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
                     gfp_t mask)
 {
        return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
-                             mask);
-}
-
-int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
-                      gfp_t mask)
-{
-       return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
+                             NULL, mask);
 }
 
 int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
                    int bits, gfp_t mask)
 {
        return set_extent_bit(tree, start, end, bits, 0, NULL,
-                             mask);
+                             NULL, mask);
 }
 
 int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
                      int bits, gfp_t mask)
 {
-       return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
+       return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
 }
 
 int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
                     gfp_t mask)
 {
        return set_extent_bit(tree, start, end,
-                             EXTENT_DELALLOC | EXTENT_DIRTY,
-                             0, NULL, mask);
+                             EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE,
+                             0, NULL, NULL, mask);
 }
 
 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
                       gfp_t mask)
 {
        return clear_extent_bit(tree, start, end,
-                               EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
-}
-
-int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
-                        gfp_t mask)
-{
-       return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
+                               EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
+                               NULL, mask);
 }
 
 int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
                     gfp_t mask)
 {
        return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
-                             mask);
+                             NULL, mask);
 }
 
 static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
                       gfp_t mask)
 {
-       return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
+       return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0,
+                               NULL, mask);
 }
 
 int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
                        gfp_t mask)
 {
        return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
-                             mask);
+                             NULL, mask);
 }
 
 static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
                                 u64 end, gfp_t mask)
 {
-       return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
-}
-
-static int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
-                        gfp_t mask)
-{
-       return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
-                             0, NULL, mask);
-}
-
-static int clear_extent_writeback(struct extent_io_tree *tree, u64 start,
-                                 u64 end, gfp_t mask)
-{
-       return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
+       return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
+                               NULL, mask);
 }
 
 int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
@@ -917,13 +951,15 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
  * either insert or lock state struct between start and end use mask to tell
  * us if waiting is desired.
  */
-int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
+int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+                    int bits, struct extent_state **cached_state, gfp_t mask)
 {
        int err;
        u64 failed_start;
        while (1) {
-               err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
-                                    &failed_start, mask);
+               err = set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
+                                    EXTENT_LOCKED, &failed_start,
+                                    cached_state, mask);
                if (err == -EEXIST && (mask & __GFP_WAIT)) {
                        wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
                        start = failed_start;
@@ -935,27 +971,40 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
        return err;
 }
 
+int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
+{
+       return lock_extent_bits(tree, start, end, 0, NULL, mask);
+}
+
 int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
                    gfp_t mask)
 {
        int err;
        u64 failed_start;
 
-       err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
-                            &failed_start, mask);
+       err = set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED,
+                            &failed_start, NULL, mask);
        if (err == -EEXIST) {
                if (failed_start > start)
                        clear_extent_bit(tree, start, failed_start - 1,
-                                        EXTENT_LOCKED, 1, 0, mask);
+                                        EXTENT_LOCKED, 1, 0, NULL, mask);
                return 0;
        }
        return 1;
 }
 
+int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
+                        struct extent_state **cached, gfp_t mask)
+{
+       return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
+                               mask);
+}
+
 int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
                  gfp_t mask)
 {
-       return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
+       return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
+                               mask);
 }
 
 /*
@@ -974,7 +1023,6 @@ int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
                page_cache_release(page);
                index++;
        }
-       set_extent_dirty(tree, start, end, GFP_NOFS);
        return 0;
 }
 
@@ -994,7 +1042,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
                page_cache_release(page);
                index++;
        }
-       set_extent_writeback(tree, start, end, GFP_NOFS);
        return 0;
 }
 
@@ -1232,6 +1279,7 @@ static noinline u64 find_lock_delalloc_range(struct inode *inode,
        u64 delalloc_start;
        u64 delalloc_end;
        u64 found;
+       struct extent_state *cached_state = NULL;
        int ret;
        int loops = 0;
 
@@ -1269,6 +1317,7 @@ again:
                /* some of the pages are gone, lets avoid looping by
                 * shortening the size of the delalloc range we're searching
                 */
+               free_extent_state(cached_state);
                if (!loops) {
                        unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
                        max_bytes = PAGE_CACHE_SIZE - offset;
@@ -1282,18 +1331,21 @@ again:
        BUG_ON(ret);
 
        /* step three, lock the state bits for the whole range */
-       lock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS);
+       lock_extent_bits(tree, delalloc_start, delalloc_end,
+                        0, &cached_state, GFP_NOFS);
 
        /* then test to make sure it is all still delalloc */
        ret = test_range_bit(tree, delalloc_start, delalloc_end,
-                            EXTENT_DELALLOC, 1);
+                            EXTENT_DELALLOC, 1, cached_state);
        if (!ret) {
-               unlock_extent(tree, delalloc_start, delalloc_end, GFP_NOFS);
+               unlock_extent_cached(tree, delalloc_start, delalloc_end,
+                                    &cached_state, GFP_NOFS);
                __unlock_for_delalloc(inode, locked_page,
                              delalloc_start, delalloc_end);
                cond_resched();
                goto again;
        }
+       free_extent_state(cached_state);
        *start = delalloc_start;
        *end = delalloc_end;
 out_failed:
@@ -1307,7 +1359,8 @@ int extent_clear_unlock_delalloc(struct inode *inode,
                                int clear_unlock,
                                int clear_delalloc, int clear_dirty,
                                int set_writeback,
-                               int end_writeback)
+                               int end_writeback,
+                               int set_private2)
 {
        int ret;
        struct page *pages[16];
@@ -1325,8 +1378,9 @@ int extent_clear_unlock_delalloc(struct inode *inode,
        if (clear_delalloc)
                clear_bits |= EXTENT_DELALLOC;
 
-       clear_extent_bit(tree, start, end, clear_bits, 1, 0, GFP_NOFS);
-       if (!(unlock_pages || clear_dirty || set_writeback || end_writeback))
+       clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
+       if (!(unlock_pages || clear_dirty || set_writeback || end_writeback ||
+             set_private2))
                return 0;
 
        while (nr_pages > 0) {
@@ -1334,6 +1388,10 @@ int extent_clear_unlock_delalloc(struct inode *inode,
                                     min_t(unsigned long,
                                     nr_pages, ARRAY_SIZE(pages)), pages);
                for (i = 0; i < ret; i++) {
+
+                       if (set_private2)
+                               SetPagePrivate2(pages[i]);
+
                        if (pages[i] == locked_page) {
                                page_cache_release(pages[i]);
                                continue;
@@ -1476,14 +1534,17 @@ out:
  * range is found set.
  */
 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
-                  int bits, int filled)
+                  int bits, int filled, struct extent_state *cached)
 {
        struct extent_state *state = NULL;
        struct rb_node *node;
        int bitset = 0;
 
        spin_lock(&tree->lock);
-       node = tree_search(tree, start);
+       if (cached && cached->tree && cached->start == start)
+               node = &cached->rb_node;
+       else
+               node = tree_search(tree, start);
        while (node && start <= end) {
                state = rb_entry(node, struct extent_state, rb_node);
 
@@ -1503,6 +1564,10 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
                        bitset = 0;
                        break;
                }
+
+               if (state->end == (u64)-1)
+                       break;
+
                start = state->end + 1;
                if (start > end)
                        break;
@@ -1526,7 +1591,7 @@ static int check_page_uptodate(struct extent_io_tree *tree,
 {
        u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
        u64 end = start + PAGE_CACHE_SIZE - 1;
-       if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
+       if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
                SetPageUptodate(page);
        return 0;
 }
@@ -1540,7 +1605,7 @@ static int check_page_locked(struct extent_io_tree *tree,
 {
        u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
        u64 end = start + PAGE_CACHE_SIZE - 1;
-       if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
+       if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
                unlock_page(page);
        return 0;
 }
@@ -1552,10 +1617,7 @@ static int check_page_locked(struct extent_io_tree *tree,
 static int check_page_writeback(struct extent_io_tree *tree,
                             struct page *page)
 {
-       u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
-       u64 end = start + PAGE_CACHE_SIZE - 1;
-       if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
-               end_page_writeback(page);
+       end_page_writeback(page);
        return 0;
 }
 
@@ -1613,13 +1675,11 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
                }
 
                if (!uptodate) {
-                       clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
+                       clear_extent_uptodate(tree, start, end, GFP_NOFS);
                        ClearPageUptodate(page);
                        SetPageError(page);
                }
 
-               clear_extent_writeback(tree, start, end, GFP_ATOMIC);
-
                if (whole_page)
                        end_page_writeback(page);
                else
@@ -1983,7 +2043,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                        continue;
                }
                /* the get_extent function already copied into the page */
-               if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
+               if (test_range_bit(tree, cur, cur_end,
+                                  EXTENT_UPTODATE, 1, NULL)) {
                        check_page_uptodate(tree, page);
                        unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
                        cur = cur + iosize;
@@ -2078,6 +2139,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        u64 iosize;
        u64 unlock_start;
        sector_t sector;
+       struct extent_state *cached_state = NULL;
        struct extent_map *em;
        struct block_device *bdev;
        int ret;
@@ -2124,6 +2186,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        delalloc_end = 0;
        page_started = 0;
        if (!epd->extent_locked) {
+               u64 delalloc_to_write = 0;
                /*
                 * make sure the wbc mapping index is at least updated
                 * to this page.
@@ -2143,8 +2206,24 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                        tree->ops->fill_delalloc(inode, page, delalloc_start,
                                                 delalloc_end, &page_started,
                                                 &nr_written);
+                       /*
+                        * delalloc_end is already one less than the total
+                        * length, so we don't subtract one from
+                        * PAGE_CACHE_SIZE
+                        */
+                       delalloc_to_write += (delalloc_end - delalloc_start +
+                                             PAGE_CACHE_SIZE) >>
+                                             PAGE_CACHE_SHIFT;
                        delalloc_start = delalloc_end + 1;
                }
+               if (wbc->nr_to_write < delalloc_to_write) {
+                       int thresh = 8192;
+
+                       if (delalloc_to_write < thresh * 2)
+                               thresh = delalloc_to_write;
+                       wbc->nr_to_write = min_t(u64, delalloc_to_write,
+                                                thresh);
+               }
 
                /* did the fill delalloc function already unlock and start
                 * the IO?
@@ -2160,15 +2239,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                        goto done_unlocked;
                }
        }
-       lock_extent(tree, start, page_end, GFP_NOFS);
-
-       unlock_start = start;
-
        if (tree->ops && tree->ops->writepage_start_hook) {
                ret = tree->ops->writepage_start_hook(page, start,
                                                      page_end);
                if (ret == -EAGAIN) {
-                       unlock_extent(tree, start, page_end, GFP_NOFS);
                        redirty_page_for_writepage(wbc, page);
                        update_nr_written(page, wbc, nr_written);
                        unlock_page(page);
@@ -2184,12 +2258,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        update_nr_written(page, wbc, nr_written + 1);
 
        end = page_end;
-       if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0))
-               printk(KERN_ERR "btrfs delalloc bits after lock_extent\n");
-
        if (last_byte <= start) {
-               clear_extent_dirty(tree, start, page_end, GFP_NOFS);
-               unlock_extent(tree, start, page_end, GFP_NOFS);
                if (tree->ops && tree->ops->writepage_end_io_hook)
                        tree->ops->writepage_end_io_hook(page, start,
                                                         page_end, NULL, 1);
@@ -2197,13 +2266,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                goto done;
        }
 
-       set_extent_uptodate(tree, start, page_end, GFP_NOFS);
        blocksize = inode->i_sb->s_blocksize;
 
        while (cur <= end) {
                if (cur >= last_byte) {
-                       clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
-                       unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
                        if (tree->ops && tree->ops->writepage_end_io_hook)
                                tree->ops->writepage_end_io_hook(page, cur,
                                                         page_end, NULL, 1);
@@ -2235,12 +2301,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                 */
                if (compressed || block_start == EXTENT_MAP_HOLE ||
                    block_start == EXTENT_MAP_INLINE) {
-                       clear_extent_dirty(tree, cur,
-                                          cur + iosize - 1, GFP_NOFS);
-
-                       unlock_extent(tree, unlock_start, cur + iosize - 1,
-                                     GFP_NOFS);
-
                        /*
                         * end_io notification does not happen here for
                         * compressed extents
@@ -2265,13 +2325,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                }
                /* leave this out until we have a page_mkwrite call */
                if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
-                                  EXTENT_DIRTY, 0)) {
+                                  EXTENT_DIRTY, 0, NULL)) {
                        cur = cur + iosize;
                        pg_offset += iosize;
                        continue;
                }
 
-               clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
                if (tree->ops && tree->ops->writepage_io_hook) {
                        ret = tree->ops->writepage_io_hook(page, cur,
                                                cur + iosize - 1);
@@ -2309,12 +2368,12 @@ done:
                set_page_writeback(page);
                end_page_writeback(page);
        }
-       if (unlock_start <= page_end)
-               unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
        unlock_page(page);
 
 done_unlocked:
 
+       /* drop our reference on any cached states */
+       free_extent_state(cached_state);
        return 0;
 }
 
@@ -2339,9 +2398,9 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
                             writepage_t writepage, void *data,
                             void (*flush_fn)(void *))
 {
-       struct backing_dev_info *bdi = mapping->backing_dev_info;
        int ret = 0;
        int done = 0;
+       int nr_to_write_done = 0;
        struct pagevec pvec;
        int nr_pages;
        pgoff_t index;
@@ -2361,7 +2420,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
                scanned = 1;
        }
 retry:
-       while (!done && (index <= end) &&
+       while (!done && !nr_to_write_done && (index <= end) &&
               (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
                              PAGECACHE_TAG_DIRTY, min(end - index,
                                  (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
@@ -2412,12 +2471,15 @@ retry:
                                unlock_page(page);
                                ret = 0;
                        }
-                       if (ret || wbc->nr_to_write <= 0)
-                               done = 1;
-                       if (wbc->nonblocking && bdi_write_congested(bdi)) {
-                               wbc->encountered_congestion = 1;
+                       if (ret)
                                done = 1;
-                       }
+
+                       /*
+                        * the filesystem may choose to bump up nr_to_write.
+                        * We have to make sure to honor the new nr_to_write
+                        * at any time
+                        */
+                       nr_to_write_done = wbc->nr_to_write <= 0;
                }
                pagevec_release(&pvec);
                cond_resched();
@@ -2604,10 +2666,10 @@ int extent_invalidatepage(struct extent_io_tree *tree,
                return 0;
 
        lock_extent(tree, start, end, GFP_NOFS);
-       wait_on_extent_writeback(tree, start, end);
+       wait_on_page_writeback(page);
        clear_extent_bit(tree, start, end,
                         EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
-                        1, 1, GFP_NOFS);
+                        1, 1, NULL, GFP_NOFS);
        return 0;
 }
 
@@ -2687,7 +2749,7 @@ int extent_prepare_write(struct extent_io_tree *tree,
                    !isnew && !PageUptodate(page) &&
                    (block_off_end > to || block_off_start < from) &&
                    !test_range_bit(tree, block_start, cur_end,
-                                   EXTENT_UPTODATE, 1)) {
+                                   EXTENT_UPTODATE, 1, NULL)) {
                        u64 sector;
                        u64 extent_offset = block_start - em->start;
                        size_t iosize;
@@ -2701,7 +2763,7 @@ int extent_prepare_write(struct extent_io_tree *tree,
                         */
                        set_extent_bit(tree, block_start,
                                       block_start + iosize - 1,
-                                      EXTENT_LOCKED, 0, NULL, GFP_NOFS);
+                                      EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS);
                        ret = submit_extent_page(READ, tree, page,
                                         sector, iosize, page_offset, em->bdev,
                                         NULL, 1,
@@ -2742,13 +2804,18 @@ int try_release_extent_state(struct extent_map_tree *map,
        int ret = 1;
 
        if (test_range_bit(tree, start, end,
-                          EXTENT_IOBITS | EXTENT_ORDERED, 0))
+                          EXTENT_IOBITS, 0, NULL))
                ret = 0;
        else {
                if ((mask & GFP_NOFS) == GFP_NOFS)
                        mask = GFP_NOFS;
-               clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
-                                1, 1, mask);
+               /*
+                * at this point we can safely clear everything except the
+                * locked bit and the nodatasum bit
+                */
+               clear_extent_bit(tree, start, end,
+                                ~(EXTENT_LOCKED | EXTENT_NODATASUM),
+                                0, 0, NULL, mask);
        }
        return ret;
 }
@@ -2771,29 +2838,28 @@ int try_release_extent_mapping(struct extent_map_tree *map,
                u64 len;
                while (start <= end) {
                        len = end - start + 1;
-                       spin_lock(&map->lock);
+                       write_lock(&map->lock);
                        em = lookup_extent_mapping(map, start, len);
                        if (!em || IS_ERR(em)) {
-                               spin_unlock(&map->lock);
+                               write_unlock(&map->lock);
                                break;
                        }
                        if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
                            em->start != start) {
-                               spin_unlock(&map->lock);
+                               write_unlock(&map->lock);
                                free_extent_map(em);
                                break;
                        }
                        if (!test_range_bit(tree, em->start,
                                            extent_map_end(em) - 1,
-                                           EXTENT_LOCKED | EXTENT_WRITEBACK |
-                                           EXTENT_ORDERED,
-                                           0)) {
+                                           EXTENT_LOCKED | EXTENT_WRITEBACK,
+                                           0, NULL)) {
                                remove_extent_mapping(map, em);
                                /* once for the rb tree */
                                free_extent_map(em);
                        }
                        start = extent_map_end(em);
-                       spin_unlock(&map->lock);
+                       write_unlock(&map->lock);
 
                        /* once for us */
                        free_extent_map(em);
@@ -3203,7 +3269,7 @@ int extent_range_uptodate(struct extent_io_tree *tree,
        int uptodate;
        unsigned long index;
 
-       ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1);
+       ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL);
        if (ret)
                return 1;
        while (start <= end) {
@@ -3233,7 +3299,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree,
                return 1;
 
        ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
-                          EXTENT_UPTODATE, 1);
+                          EXTENT_UPTODATE, 1, NULL);
        if (ret)
                return ret;
 
@@ -3269,7 +3335,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
                return 0;
 
        if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
-                          EXTENT_UPTODATE, 1)) {
+                          EXTENT_UPTODATE, 1, NULL)) {
                return 0;
        }
 
index 5bc20ab..14ed16f 100644 (file)
 #define EXTENT_DEFRAG (1 << 6)
 #define EXTENT_DEFRAG_DONE (1 << 7)
 #define EXTENT_BUFFER_FILLED (1 << 8)
-#define EXTENT_ORDERED (1 << 9)
-#define EXTENT_ORDERED_METADATA (1 << 10)
-#define EXTENT_BOUNDARY (1 << 11)
-#define EXTENT_NODATASUM (1 << 12)
+#define EXTENT_BOUNDARY (1 << 9)
+#define EXTENT_NODATASUM (1 << 10)
 #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
 
 /* flags for bio submission */
@@ -142,6 +140,8 @@ int try_release_extent_state(struct extent_map_tree *map,
                             struct extent_io_tree *tree, struct page *page,
                             gfp_t mask);
 int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
+int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+                    int bits, struct extent_state **cached, gfp_t mask);
 int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
 int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
                    gfp_t mask);
@@ -155,11 +155,12 @@ u64 count_range_bits(struct extent_io_tree *tree,
                     u64 max_bytes, unsigned long bits);
 
 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
-                  int bits, int filled);
+                  int bits, int filled, struct extent_state *cached_state);
 int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
                      int bits, gfp_t mask);
 int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
-                    int bits, int wake, int delete, gfp_t mask);
+                    int bits, int wake, int delete, struct extent_state **cached,
+                    gfp_t mask);
 int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
                    int bits, gfp_t mask);
 int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
@@ -282,5 +283,6 @@ int extent_clear_unlock_delalloc(struct inode *inode,
                                int clear_unlock,
                                int clear_delalloc, int clear_dirty,
                                int set_writeback,
-                               int end_writeback);
+                               int end_writeback,
+                               int set_private2);
 #endif
index 30c9365..2c726b7 100644 (file)
@@ -36,7 +36,7 @@ void extent_map_exit(void)
 void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
 {
        tree->map.rb_node = NULL;
-       spin_lock_init(&tree->lock);
+       rwlock_init(&tree->lock);
 }
 
 /**
@@ -198,6 +198,56 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
        return 0;
 }
 
+int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len)
+{
+       int ret = 0;
+       struct extent_map *merge = NULL;
+       struct rb_node *rb;
+       struct extent_map *em;
+
+       write_lock(&tree->lock);
+       em = lookup_extent_mapping(tree, start, len);
+
+       WARN_ON(em->start != start || !em);
+
+       if (!em)
+               goto out;
+
+       clear_bit(EXTENT_FLAG_PINNED, &em->flags);
+
+       if (em->start != 0) {
+               rb = rb_prev(&em->rb_node);
+               if (rb)
+                       merge = rb_entry(rb, struct extent_map, rb_node);
+               if (rb && mergable_maps(merge, em)) {
+                       em->start = merge->start;
+                       em->len += merge->len;
+                       em->block_len += merge->block_len;
+                       em->block_start = merge->block_start;
+                       merge->in_tree = 0;
+                       rb_erase(&merge->rb_node, &tree->map);
+                       free_extent_map(merge);
+               }
+       }
+
+       rb = rb_next(&em->rb_node);
+       if (rb)
+               merge = rb_entry(rb, struct extent_map, rb_node);
+       if (rb && mergable_maps(em, merge)) {
+               em->len += merge->len;
+               em->block_len += merge->len;
+               rb_erase(&merge->rb_node, &tree->map);
+               merge->in_tree = 0;
+               free_extent_map(merge);
+       }
+
+       free_extent_map(em);
+out:
+       write_unlock(&tree->lock);
+       return ret;
+
+}
+
 /**
  * add_extent_mapping - add new extent map to the extent tree
  * @tree:      tree to insert new map in
@@ -222,7 +272,6 @@ int add_extent_mapping(struct extent_map_tree *tree,
                ret = -EEXIST;
                goto out;
        }
-       assert_spin_locked(&tree->lock);
        rb = tree_insert(&tree->map, em->start, &em->rb_node);
        if (rb) {
                ret = -EEXIST;
@@ -285,7 +334,6 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
        struct rb_node *next = NULL;
        u64 end = range_end(start, len);
 
-       assert_spin_locked(&tree->lock);
        rb_node = __tree_search(&tree->map, start, &prev, &next);
        if (!rb_node && prev) {
                em = rb_entry(prev, struct extent_map, rb_node);
@@ -318,6 +366,54 @@ out:
        return em;
 }
 
+/**
+ * search_extent_mapping - find a nearby extent map
+ * @tree:      tree to lookup in
+ * @start:     byte offset to start the search
+ * @len:       length of the lookup range
+ *
+ * Find and return the first extent_map struct in @tree that intersects the
+ * [start, len] range.
+ *
+ * If one can't be found, any nearby extent may be returned
+ */
+struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
+                                        u64 start, u64 len)
+{
+       struct extent_map *em;
+       struct rb_node *rb_node;
+       struct rb_node *prev = NULL;
+       struct rb_node *next = NULL;
+
+       rb_node = __tree_search(&tree->map, start, &prev, &next);
+       if (!rb_node && prev) {
+               em = rb_entry(prev, struct extent_map, rb_node);
+               goto found;
+       }
+       if (!rb_node && next) {
+               em = rb_entry(next, struct extent_map, rb_node);
+               goto found;
+       }
+       if (!rb_node) {
+               em = NULL;
+               goto out;
+       }
+       if (IS_ERR(rb_node)) {
+               em = ERR_PTR(PTR_ERR(rb_node));
+               goto out;
+       }
+       em = rb_entry(rb_node, struct extent_map, rb_node);
+       goto found;
+
+       em = NULL;
+       goto out;
+
+found:
+       atomic_inc(&em->refs);
+out:
+       return em;
+}
+
 /**
  * remove_extent_mapping - removes an extent_map from the extent tree
  * @tree:      extent tree to remove from
@@ -331,7 +427,6 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
        int ret = 0;
 
        WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
-       assert_spin_locked(&tree->lock);
        rb_erase(&em->rb_node, &tree->map);
        em->in_tree = 0;
        return ret;
index fb6eeef..ab6d74b 100644 (file)
@@ -31,7 +31,7 @@ struct extent_map {
 
 struct extent_map_tree {
        struct rb_root map;
-       spinlock_t lock;
+       rwlock_t lock;
 };
 
 static inline u64 extent_map_end(struct extent_map *em)
@@ -59,4 +59,7 @@ struct extent_map *alloc_extent_map(gfp_t mask);
 void free_extent_map(struct extent_map *em);
 int __init extent_map_init(void);
 void extent_map_exit(void);
+int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len);
+struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
+                                        u64 start, u64 len);
 #endif
index 4b83397..571ad3c 100644 (file)
@@ -112,8 +112,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
        int err = 0;
        int i;
        struct inode *inode = fdentry(file)->d_inode;
-       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
-       u64 hint_byte;
        u64 num_bytes;
        u64 start_pos;
        u64 end_of_last_block;
@@ -125,22 +123,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
                    root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
 
        end_of_last_block = start_pos + num_bytes - 1;
-
-       lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
-       trans = btrfs_join_transaction(root, 1);
-       if (!trans) {
-               err = -ENOMEM;
-               goto out_unlock;
-       }
-       btrfs_set_trans_block_group(trans, inode);
-       hint_byte = 0;
-
-       set_extent_uptodate(io_tree, start_pos, end_of_last_block, GFP_NOFS);
-
-       /* check for reserved extents on each page, we don't want
-        * to reset the delalloc bit on things that already have
-        * extents reserved.
-        */
        btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
        for (i = 0; i < num_pages; i++) {
                struct page *p = pages[i];
@@ -155,9 +137,6 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
                 * at this time.
                 */
        }
-       err = btrfs_end_transaction(trans, root);
-out_unlock:
-       unlock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
        return err;
 }
 
@@ -189,18 +168,18 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                if (!split2)
                        split2 = alloc_extent_map(GFP_NOFS);
 
-               spin_lock(&em_tree->lock);
+               write_lock(&em_tree->lock);
                em = lookup_extent_mapping(em_tree, start, len);
                if (!em) {
-                       spin_unlock(&em_tree->lock);
+                       write_unlock(&em_tree->lock);
                        break;
                }
                flags = em->flags;
                if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
-                       spin_unlock(&em_tree->lock);
                        if (em->start <= start &&
                            (!testend || em->start + em->len >= start + len)) {
                                free_extent_map(em);
+                               write_unlock(&em_tree->lock);
                                break;
                        }
                        if (start < em->start) {
@@ -210,6 +189,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                                start = em->start + em->len;
                        }
                        free_extent_map(em);
+                       write_unlock(&em_tree->lock);
                        continue;
                }
                compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
@@ -260,7 +240,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                        free_extent_map(split);
                        split = NULL;
                }
-               spin_unlock(&em_tree->lock);
+               write_unlock(&em_tree->lock);
 
                /* once for us */
                free_extent_map(em);
@@ -289,7 +269,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root, struct inode *inode,
                       u64 start, u64 end, u64 locked_end,
-                      u64 inline_limit, u64 *hint_byte)
+                      u64 inline_limit, u64 *hint_byte, int drop_cache)
 {
        u64 extent_end = 0;
        u64 search_start = start;
@@ -314,7 +294,8 @@ noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
        int ret;
 
        inline_limit = 0;
-       btrfs_drop_extent_cache(inode, start, end - 1, 0);
+       if (drop_cache)
+               btrfs_drop_extent_cache(inode, start, end - 1, 0);
 
        path = btrfs_alloc_path();
        if (!path)
index 5edcee3..5c2caad 100644 (file)
@@ -259,7 +259,9 @@ static int link_free_space(struct btrfs_block_group_cache *block_group,
 
 static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
 {
-       u64 max_bytes, possible_bytes;
+       u64 max_bytes;
+       u64 bitmap_bytes;
+       u64 extent_bytes;
 
        /*
         * The goal is to keep the total amount of memory used per 1gb of space
@@ -269,22 +271,27 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
        max_bytes = MAX_CACHE_BYTES_PER_GIG *
                (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
 
-       possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) +
-               (sizeof(struct btrfs_free_space) *
-                block_group->extents_thresh);
+       /*
+        * we want to account for 1 more bitmap than what we have so we can make
+        * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
+        * we add more bitmaps.
+        */
+       bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE;
 
-       if (possible_bytes > max_bytes) {
-               int extent_bytes = max_bytes -
-                       (block_group->total_bitmaps * PAGE_CACHE_SIZE);
+       if (bitmap_bytes >= max_bytes) {
+               block_group->extents_thresh = 0;
+               return;
+       }
 
-               if (extent_bytes <= 0) {
-                       block_group->extents_thresh = 0;
-                       return;
-               }
+       /*
+        * we want the extent entry threshold to always be at most 1/2 the maxw
+        * bytes we can have, or whatever is less than that.
+        */
+       extent_bytes = max_bytes - bitmap_bytes;
+       extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
 
-               block_group->extents_thresh = extent_bytes /
-                       (sizeof(struct btrfs_free_space));
-       }
+       block_group->extents_thresh =
+               div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
 }
 
 static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
@@ -403,6 +410,7 @@ static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
        BUG_ON(block_group->total_bitmaps >= max_bitmaps);
 
        info->offset = offset_to_bitmap(block_group, offset);
+       info->bytes = 0;
        link_free_space(block_group, info);
        block_group->total_bitmaps++;
 
index 6b627c6..72ce3c1 100644 (file)
@@ -149,6 +149,8 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
                ptr = (unsigned long)(ref + 1);
                ret = 0;
        } else if (ret < 0) {
+               if (ret == -EOVERFLOW)
+                       ret = -EMLINK;
                goto out;
        } else {
                ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
@@ -177,8 +179,6 @@ int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
 
        ret = btrfs_insert_empty_item(trans, root, path, &key,
                                      sizeof(struct btrfs_inode_item));
-       if (ret == 0 && objectid > root->highest_inode)
-               root->highest_inode = objectid;
        return ret;
 }
 
index 9abbced..c56eb59 100644 (file)
@@ -43,9 +43,10 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid)
                slot = path->slots[0] - 1;
                l = path->nodes[0];
                btrfs_item_key_to_cpu(l, &found_key, slot);
-               *objectid = found_key.objectid;
+               *objectid = max_t(u64, found_key.objectid,
+                                 BTRFS_FIRST_FREE_OBJECTID - 1);
        } else {
-               *objectid = BTRFS_FIRST_FREE_OBJECTID;
+               *objectid = BTRFS_FIRST_FREE_OBJECTID - 1;
        }
        ret = 0;
 error:
@@ -53,91 +54,27 @@ error:
        return ret;
 }
 
-/*
- * walks the btree of allocated inodes and find a hole.
- */
 int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
                             u64 dirid, u64 *objectid)
 {
-       struct btrfs_path *path;
-       struct btrfs_key key;
        int ret;
-       int slot = 0;
-       u64 last_ino = 0;
-       int start_found;
-       struct extent_buffer *l;
-       struct btrfs_key search_key;
-       u64 search_start = dirid;
-
        mutex_lock(&root->objectid_mutex);
-       if (root->last_inode_alloc >= BTRFS_FIRST_FREE_OBJECTID &&
-           root->last_inode_alloc < BTRFS_LAST_FREE_OBJECTID) {
-               *objectid = ++root->last_inode_alloc;
-               mutex_unlock(&root->objectid_mutex);
-               return 0;
-       }
-       path = btrfs_alloc_path();
-       BUG_ON(!path);
-       search_start = max(search_start, (u64)BTRFS_FIRST_FREE_OBJECTID);
-       search_key.objectid = search_start;
-       search_key.type = 0;
-       search_key.offset = 0;
-
-       start_found = 0;
-       ret = btrfs_search_slot(trans, root, &search_key, path, 0, 0);
-       if (ret < 0)
-               goto error;
 
-       while (1) {
-               l = path->nodes[0];
-               slot = path->slots[0];
-               if (slot >= btrfs_header_nritems(l)) {
-                       ret = btrfs_next_leaf(root, path);
-                       if (ret == 0)
-                               continue;
-                       if (ret < 0)
-                               goto error;
-                       if (!start_found) {
-                               *objectid = search_start;
-                               start_found = 1;
-                               goto found;
-                       }
-                       *objectid = last_ino > search_start ?
-                               last_ino : search_start;
-                       goto found;
-               }
-               btrfs_item_key_to_cpu(l, &key, slot);
-               if (key.objectid >= search_start) {
-                       if (start_found) {
-                               if (last_ino < search_start)
-                                       last_ino = search_start;
-                               if (key.objectid > last_ino) {
-                                       *objectid = last_ino;
-                                       goto found;
-                               }
-                       } else if (key.objectid > search_start) {
-                               *objectid = search_start;
-                               goto found;
-                       }
-               }
-               if (key.objectid >= BTRFS_LAST_FREE_OBJECTID)
-                       break;
+       if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) {
+               ret = btrfs_find_highest_inode(root, &root->highest_objectid);
+               if (ret)
+                       goto out;
+       }
 
-               start_found = 1;
-               last_ino = key.objectid + 1;
-               path->slots[0]++;
+       if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
+               ret = -ENOSPC;
+               goto out;
        }
-       BUG_ON(1);
-found:
-       btrfs_release_path(root, path);
-       btrfs_free_path(path);
-       BUG_ON(*objectid < search_start);
-       mutex_unlock(&root->objectid_mutex);
-       return 0;
-error:
-       btrfs_release_path(root, path);
-       btrfs_free_path(path);
+
+       *objectid = ++root->highest_objectid;
+       ret = 0;
+out:
        mutex_unlock(&root->objectid_mutex);
        return ret;
 }
index 9096fd0..e9b76bc 100644 (file)
@@ -231,7 +231,8 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
        }
 
        ret = btrfs_drop_extents(trans, root, inode, start,
-                                aligned_end, aligned_end, start, &hint_byte);
+                                aligned_end, aligned_end, start,
+                                &hint_byte, 1);
        BUG_ON(ret);
 
        if (isize > actual_end)
@@ -240,7 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
                                   inline_len, compressed_size,
                                   compressed_pages);
        BUG_ON(ret);
-       btrfs_drop_extent_cache(inode, start, aligned_end, 0);
+       btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
        return 0;
 }
 
@@ -425,7 +426,7 @@ again:
                        extent_clear_unlock_delalloc(inode,
                                                     &BTRFS_I(inode)->io_tree,
                                                     start, end, NULL, 1, 0,
-                                                    0, 1, 1, 1);
+                                                    0, 1, 1, 1, 0);
                        ret = 0;
                        goto free_pages_out;
                }
@@ -611,9 +612,9 @@ static noinline int submit_compressed_extents(struct inode *inode,
                set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
 
                while (1) {
-                       spin_lock(&em_tree->lock);
+                       write_lock(&em_tree->lock);
                        ret = add_extent_mapping(em_tree, em);
-                       spin_unlock(&em_tree->lock);
+                       write_unlock(&em_tree->lock);
                        if (ret != -EEXIST) {
                                free_extent_map(em);
                                break;
@@ -640,7 +641,7 @@ static noinline int submit_compressed_extents(struct inode *inode,
                                             async_extent->start,
                                             async_extent->start +
                                             async_extent->ram_size - 1,
-                                            NULL, 1, 1, 0, 1, 1, 0);
+                                            NULL, 1, 1, 0, 1, 1, 0, 0);
 
                ret = btrfs_submit_compressed_write(inode,
                                    async_extent->start,
@@ -713,7 +714,7 @@ static noinline int cow_file_range(struct inode *inode,
                        extent_clear_unlock_delalloc(inode,
                                                     &BTRFS_I(inode)->io_tree,
                                                     start, end, NULL, 1, 1,
-                                                    1, 1, 1, 1);
+                                                    1, 1, 1, 1, 0);
                        *nr_written = *nr_written +
                             (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
                        *page_started = 1;
@@ -725,6 +726,15 @@ static noinline int cow_file_range(struct inode *inode,
        BUG_ON(disk_num_bytes >
               btrfs_super_total_bytes(&root->fs_info->super_copy));
 
+
+       read_lock(&BTRFS_I(inode)->extent_tree.lock);
+       em = search_extent_mapping(&BTRFS_I(inode)->extent_tree,
+                                  start, num_bytes);
+       if (em) {
+               alloc_hint = em->block_start;
+               free_extent_map(em);
+       }
+       read_unlock(&BTRFS_I(inode)->extent_tree.lock);
        btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
 
        while (disk_num_bytes > 0) {
@@ -737,7 +747,6 @@ static noinline int cow_file_range(struct inode *inode,
                em = alloc_extent_map(GFP_NOFS);
                em->start = start;
                em->orig_start = em->start;
-
                ram_size = ins.offset;
                em->len = ins.offset;
 
@@ -747,9 +756,9 @@ static noinline int cow_file_range(struct inode *inode,
                set_bit(EXTENT_FLAG_PINNED, &em->flags);
 
                while (1) {
-                       spin_lock(&em_tree->lock);
+                       write_lock(&em_tree->lock);
                        ret = add_extent_mapping(em_tree, em);
-                       spin_unlock(&em_tree->lock);
+                       write_unlock(&em_tree->lock);
                        if (ret != -EEXIST) {
                                free_extent_map(em);
                                break;
@@ -776,11 +785,14 @@ static noinline int cow_file_range(struct inode *inode,
                /* we're not doing compressed IO, don't unlock the first
                 * page (which the caller expects to stay locked), don't
                 * clear any dirty bits and don't set any writeback bits
+                *
+                * Do set the Private2 bit so we know this page was properly
+                * setup for writepage
                 */
                extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
                                             start, start + ram_size - 1,
                                             locked_page, unlock, 1,
-                                            1, 0, 0, 0);
+                                            1, 0, 0, 0, 1);
                disk_num_bytes -= cur_alloc_size;
                num_bytes -= cur_alloc_size;
                alloc_hint = ins.objectid + ins.offset;
@@ -853,7 +865,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
        int limit = 10 * 1024 * 1042;
 
        clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED |
-                        EXTENT_DELALLOC, 1, 0, GFP_NOFS);
+                        EXTENT_DELALLOC, 1, 0, NULL, GFP_NOFS);
        while (start < end) {
                async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
                async_cow->inode = inode;
@@ -1080,9 +1092,9 @@ out_check:
                        em->bdev = root->fs_info->fs_devices->latest_bdev;
                        set_bit(EXTENT_FLAG_PINNED, &em->flags);
                        while (1) {
-                               spin_lock(&em_tree->lock);
+                               write_lock(&em_tree->lock);
                                ret = add_extent_mapping(em_tree, em);
-                               spin_unlock(&em_tree->lock);
+                               write_unlock(&em_tree->lock);
                                if (ret != -EEXIST) {
                                        free_extent_map(em);
                                        break;
@@ -1101,7 +1113,7 @@ out_check:
 
                extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
                                        cur_offset, cur_offset + num_bytes - 1,
-                                       locked_page, 1, 1, 1, 0, 0, 0);
+                                       locked_page, 1, 1, 1, 0, 0, 0, 1);
                cur_offset = extent_end;
                if (cur_offset > end)
                        break;
@@ -1374,10 +1386,8 @@ again:
        lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
 
        /* already ordered? We're done */
-       if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
-                            EXTENT_ORDERED, 0)) {
+       if (PagePrivate2(page))
                goto out;
-       }
 
        ordered = btrfs_lookup_ordered_extent(inode, page_start);
        if (ordered) {
@@ -1413,11 +1423,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
        struct inode *inode = page->mapping->host;
        struct btrfs_writepage_fixup *fixup;
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       int ret;
 
-       ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
-                            EXTENT_ORDERED, 0);
-       if (ret)
+       /* this page is properly in the ordered list */
+       if (TestClearPagePrivate2(page))
                return 0;
 
        if (PageChecked(page))
@@ -1455,9 +1463,19 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
        BUG_ON(!path);
 
        path->leave_spinning = 1;
+
+       /*
+        * we may be replacing one extent in the tree with another.
+        * The new extent is pinned in the extent map, and we don't want
+        * to drop it from the cache until it is completely in the btree.
+        *
+        * So, tell btrfs_drop_extents to leave this extent in the cache.
+        * the caller is expected to unpin it and allow it to be merged
+        * with the others.
+        */
        ret = btrfs_drop_extents(trans, root, inode, file_pos,
                                 file_pos + num_bytes, locked_end,
-                                file_pos, &hint);
+                                file_pos, &hint, 0);
        BUG_ON(ret);
 
        ins.objectid = inode->i_ino;
@@ -1485,7 +1503,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(leaf);
 
        inode_add_bytes(inode, num_bytes);
-       btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0);
 
        ins.objectid = disk_bytenr;
        ins.offset = disk_num_bytes;
@@ -1596,6 +1613,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                                                ordered_extent->len,
                                                compressed, 0, 0,
                                                BTRFS_FILE_EXTENT_REG);
+               unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
+                                  ordered_extent->file_offset,
+                                  ordered_extent->len);
                BUG_ON(ret);
        }
        unlock_extent(io_tree, ordered_extent->file_offset,
@@ -1623,6 +1643,7 @@ nocow:
 static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
                                struct extent_state *state, int uptodate)
 {
+       ClearPagePrivate2(page);
        return btrfs_finish_ordered_io(page->mapping->host, start, end);
 }
 
@@ -1669,13 +1690,13 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
                failrec->last_mirror = 0;
                failrec->bio_flags = 0;
 
-               spin_lock(&em_tree->lock);
+               read_lock(&em_tree->lock);
                em = lookup_extent_mapping(em_tree, start, failrec->len);
                if (em->start > start || em->start + em->len < start) {
                        free_extent_map(em);
                        em = NULL;
                }
-               spin_unlock(&em_tree->lock);
+               read_unlock(&em_tree->lock);
 
                if (!em || IS_ERR(em)) {
                        kfree(failrec);
@@ -1794,7 +1815,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
                return 0;
 
        if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
-           test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) {
+           test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
                clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM,
                                  GFP_NOFS);
                return 0;
@@ -2352,6 +2373,69 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
        return ret;
 }
 
+int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *root,
+                       struct inode *dir, u64 objectid,
+                       const char *name, int name_len)
+{
+       struct btrfs_path *path;
+       struct extent_buffer *leaf;
+       struct btrfs_dir_item *di;
+       struct btrfs_key key;
+       u64 index;
+       int ret;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
+                                  name, name_len, -1);
+       BUG_ON(!di || IS_ERR(di));
+
+       leaf = path->nodes[0];
+       btrfs_dir_item_key_to_cpu(leaf, di, &key);
+       WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
+       ret = btrfs_delete_one_dir_name(trans, root, path, di);
+       BUG_ON(ret);
+       btrfs_release_path(root, path);
+
+       ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
+                                objectid, root->root_key.objectid,
+                                dir->i_ino, &index, name, name_len);
+       if (ret < 0) {
+               BUG_ON(ret != -ENOENT);
+               di = btrfs_search_dir_index_item(root, path, dir->i_ino,
+                                                name, name_len);
+               BUG_ON(!di || IS_ERR(di));
+
+               leaf = path->nodes[0];
+               btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+               btrfs_release_path(root, path);
+               index = key.offset;
+       }
+
+       di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
+                                        index, name, name_len, -1);
+       BUG_ON(!di || IS_ERR(di));
+
+       leaf = path->nodes[0];
+       btrfs_dir_item_key_to_cpu(leaf, di, &key);
+       WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
+       ret = btrfs_delete_one_dir_name(trans, root, path, di);
+       BUG_ON(ret);
+       btrfs_release_path(root, path);
+
+       btrfs_i_size_write(dir, dir->i_size - name_len * 2);
+       dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+       ret = btrfs_update_inode(trans, root, dir);
+       BUG_ON(ret);
+       dir->i_sb->s_dirt = 1;
+
+       btrfs_free_path(path);
+       return 0;
+}
+
 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
        struct inode *inode = dentry->d_inode;
@@ -2361,29 +2445,31 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
        struct btrfs_trans_handle *trans;
        unsigned long nr = 0;
 
-       /*
-        * the FIRST_FREE_OBJECTID check makes sure we don't try to rmdir
-        * the root of a subvolume or snapshot
-        */
        if (inode->i_size > BTRFS_EMPTY_DIR_SIZE ||
-           inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+           inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
                return -ENOTEMPTY;
-       }
 
        trans = btrfs_start_transaction(root, 1);
        btrfs_set_trans_block_group(trans, dir);
 
+       if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
+               err = btrfs_unlink_subvol(trans, root, dir,
+                                         BTRFS_I(inode)->location.objectid,
+                                         dentry->d_name.name,
+                                         dentry->d_name.len);
+               goto out;
+       }
+
        err = btrfs_orphan_add(trans, inode);
        if (err)
-               goto fail_trans;
+               goto out;
 
        /* now the directory is empty */
        err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
                                 dentry->d_name.name, dentry->d_name.len);
        if (!err)
                btrfs_i_size_write(inode, 0);
-
-fail_trans:
+out:
        nr = trans->blocks_used;
        ret = btrfs_end_transaction_throttle(trans, root);
        btrfs_btree_balance_dirty(root, nr);
@@ -2935,7 +3021,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
                                                 cur_offset,
                                                 cur_offset + hole_size,
                                                 block_end,
-                                                cur_offset, &hint_byte);
+                                                cur_offset, &hint_byte, 1);
                        if (err)
                                break;
                        err = btrfs_insert_file_extent(trans, root,
@@ -3003,6 +3089,11 @@ void btrfs_delete_inode(struct inode *inode)
        }
        btrfs_wait_ordered_range(inode, 0, (u64)-1);
 
+       if (inode->i_nlink > 0) {
+               BUG_ON(btrfs_root_refs(&root->root_item) != 0);
+               goto no_delete;
+       }
+
        btrfs_i_size_write(inode, 0);
        trans = btrfs_join_transaction(root, 1);
 
@@ -3070,29 +3161,67 @@ out_err:
  * is kind of like crossing a mount point.
  */
 static int fixup_tree_root_location(struct btrfs_root *root,
-                            struct btrfs_key *location,
-                            struct btrfs_root **sub_root,
-                            struct dentry *dentry)
+                                   struct inode *dir,
+                                   struct dentry *dentry,
+                                   struct btrfs_key *location,
+                                   struct btrfs_root **sub_root)
 {
-       struct btrfs_root_item *ri;
+       struct btrfs_path *path;
+       struct btrfs_root *new_root;
+       struct btrfs_root_ref *ref;
+       struct extent_buffer *leaf;
+       int ret;
+       int err = 0;
 
-       if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
-               return 0;
-       if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
-               return 0;
+       path = btrfs_alloc_path();
+       if (!path) {
+               err = -ENOMEM;
+               goto out;
+       }
 
-       *sub_root = btrfs_read_fs_root(root->fs_info, location,
-                                       dentry->d_name.name,
-                                       dentry->d_name.len);
-       if (IS_ERR(*sub_root))
-               return PTR_ERR(*sub_root);
+       err = -ENOENT;
+       ret = btrfs_find_root_ref(root->fs_info->tree_root, path,
+                                 BTRFS_I(dir)->root->root_key.objectid,
+                                 location->objectid);
+       if (ret) {
+               if (ret < 0)
+                       err = ret;
+               goto out;
+       }
 
-       ri = &(*sub_root)->root_item;
-       location->objectid = btrfs_root_dirid(ri);
-       btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
-       location->offset = 0;
+       leaf = path->nodes[0];
+       ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
+       if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino ||
+           btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
+               goto out;
 
-       return 0;
+       ret = memcmp_extent_buffer(leaf, dentry->d_name.name,
+                                  (unsigned long)(ref + 1),
+                                  dentry->d_name.len);
+       if (ret)
+               goto out;
+
+       btrfs_release_path(root->fs_info->tree_root, path);
+
+       new_root = btrfs_read_fs_root_no_name(root->fs_info, location);
+       if (IS_ERR(new_root)) {
+               err = PTR_ERR(new_root);
+               goto out;
+       }
+
+       if (btrfs_root_refs(&new_root->root_item) == 0) {
+               err = -ENOENT;
+               goto out;
+       }
+
+       *sub_root = new_root;
+       location->objectid = btrfs_root_dirid(&new_root->root_item);
+       location->type = BTRFS_INODE_ITEM_KEY;
+       location->offset = 0;
+       err = 0;
+out:
+       btrfs_free_path(path);
+       return err;
 }
 
 static void inode_tree_add(struct inode *inode)
@@ -3101,11 +3230,13 @@ static void inode_tree_add(struct inode *inode)
        struct btrfs_inode *entry;
        struct rb_node **p;
        struct rb_node *parent;
-
 again:
        p = &root->inode_tree.rb_node;
        parent = NULL;
 
+       if (hlist_unhashed(&inode->i_hash))
+               return;
+
        spin_lock(&root->inode_lock);
        while (*p) {
                parent = *p;
@@ -3132,13 +3263,87 @@ again:
 static void inode_tree_del(struct inode *inode)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
+       int empty = 0;
 
        spin_lock(&root->inode_lock);
        if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
                rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
                RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
+               empty = RB_EMPTY_ROOT(&root->inode_tree);
        }
        spin_unlock(&root->inode_lock);
+
+       if (empty && btrfs_root_refs(&root->root_item) == 0) {
+               synchronize_srcu(&root->fs_info->subvol_srcu);
+               spin_lock(&root->inode_lock);
+               empty = RB_EMPTY_ROOT(&root->inode_tree);
+               spin_unlock(&root->inode_lock);
+               if (empty)
+                       btrfs_add_dead_root(root);
+       }
+}
+
+int btrfs_invalidate_inodes(struct btrfs_root *root)
+{
+       struct rb_node *node;
+       struct rb_node *prev;
+       struct btrfs_inode *entry;
+       struct inode *inode;
+       u64 objectid = 0;
+
+       WARN_ON(btrfs_root_refs(&root->root_item) != 0);
+
+       spin_lock(&root->inode_lock);
+again:
+       node = root->inode_tree.rb_node;
+       prev = NULL;
+       while (node) {
+               prev = node;
+               entry = rb_entry(node, struct btrfs_inode, rb_node);
+
+               if (objectid < entry->vfs_inode.i_ino)
+                       node = node->rb_left;
+               else if (objectid > entry->vfs_inode.i_ino)
+                       node = node->rb_right;
+               else
+                       break;
+       }
+       if (!node) {
+               while (prev) {
+                       entry = rb_entry(prev, struct btrfs_inode, rb_node);
+                       if (objectid <= entry->vfs_inode.i_ino) {
+                               node = prev;
+                               break;
+                       }
+                       prev = rb_next(prev);
+               }
+       }
+       while (node) {
+               entry = rb_entry(node, struct btrfs_inode, rb_node);
+               objectid = entry->vfs_inode.i_ino + 1;
+               inode = igrab(&entry->vfs_inode);
+               if (inode) {
+                       spin_unlock(&root->inode_lock);
+                       if (atomic_read(&inode->i_count) > 1)
+                               d_prune_aliases(inode);
+                       /*
+                        * btrfs_drop_inode will remove it from
+                        * the inode cache when its usage count
+                        * hits zero.
+                        */
+                       iput(inode);
+                       cond_resched();
+                       spin_lock(&root->inode_lock);
+                       goto again;
+               }
+
+               if (cond_resched_lock(&root->inode_lock))
+                       goto again;
+
+               node = rb_next(node);
+       }
+       spin_unlock(&root->inode_lock);
+       return 0;
 }
 
 static noinline void init_btrfs_i(struct inode *inode)
@@ -3225,15 +3430,41 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
        return inode;
 }
 
+static struct inode *new_simple_dir(struct super_block *s,
+                                   struct btrfs_key *key,
+                                   struct btrfs_root *root)
+{
+       struct inode *inode = new_inode(s);
+
+       if (!inode)
+               return ERR_PTR(-ENOMEM);
+
+       init_btrfs_i(inode);
+
+       BTRFS_I(inode)->root = root;
+       memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
+       BTRFS_I(inode)->dummy_inode = 1;
+
+       inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
+       inode->i_op = &simple_dir_inode_operations;
+       inode->i_fop = &simple_dir_operations;
+       inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
+       inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+
+       return inode;
+}
+
 struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
 {
        struct inode *inode;
-       struct btrfs_inode *bi = BTRFS_I(dir);
-       struct btrfs_root *root = bi->root;
+       struct btrfs_root *root = BTRFS_I(dir)->root;
        struct btrfs_root *sub_root = root;
        struct btrfs_key location;
+       int index;
        int ret;
 
+       dentry->d_op = &btrfs_dentry_operations;
+
        if (dentry->d_name.len > BTRFS_NAME_LEN)
                return ERR_PTR(-ENAMETOOLONG);
 
@@ -3242,29 +3473,50 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
        if (ret < 0)
                return ERR_PTR(ret);
 
-       inode = NULL;
-       if (location.objectid) {
-               ret = fixup_tree_root_location(root, &location, &sub_root,
-                                               dentry);
-               if (ret < 0)
-                       return ERR_PTR(ret);
-               if (ret > 0)
-                       return ERR_PTR(-ENOENT);
+       if (location.objectid == 0)
+               return NULL;
+
+       if (location.type == BTRFS_INODE_ITEM_KEY) {
+               inode = btrfs_iget(dir->i_sb, &location, root);
+               return inode;
+       }
+
+       BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY);
+
+       index = srcu_read_lock(&root->fs_info->subvol_srcu);
+       ret = fixup_tree_root_location(root, dir, dentry,
+                                      &location, &sub_root);
+       if (ret < 0) {
+               if (ret != -ENOENT)
+                       inode = ERR_PTR(ret);
+               else
+                       inode = new_simple_dir(dir->i_sb, &location, sub_root);
+       } else {
                inode = btrfs_iget(dir->i_sb, &location, sub_root);
-               if (IS_ERR(inode))
-                       return ERR_CAST(inode);
        }
+       srcu_read_unlock(&root->fs_info->subvol_srcu, index);
+
        return inode;
 }
 
+static int btrfs_dentry_delete(struct dentry *dentry)
+{
+       struct btrfs_root *root;
+
+       if (!dentry->d_inode)
+               return 0;
+
+       root = BTRFS_I(dentry->d_inode)->root;
+       if (btrfs_root_refs(&root->root_item) == 0)
+               return 1;
+       return 0;
+}
+
 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
                                   struct nameidata *nd)
 {
        struct inode *inode;
 
-       if (dentry->d_name.len > BTRFS_NAME_LEN)
-               return ERR_PTR(-ENAMETOOLONG);
-
        inode = btrfs_lookup_dentry(dir, dentry);
        if (IS_ERR(inode))
                return ERR_CAST(inode);
@@ -3603,9 +3855,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        if (ret != 0)
                goto fail;
 
-       if (objectid > root->highest_inode)
-               root->highest_inode = objectid;
-
        inode->i_uid = current_fsuid();
 
        if (dir && (dir->i_mode & S_ISGID)) {
@@ -3673,26 +3922,35 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
                   struct inode *parent_inode, struct inode *inode,
                   const char *name, int name_len, int add_backref, u64 index)
 {
-       int ret;
+       int ret = 0;
        struct btrfs_key key;
        struct btrfs_root *root = BTRFS_I(parent_inode)->root;
 
-       key.objectid = inode->i_ino;
-       btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
-       key.offset = 0;
+       if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+               memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
+       } else {
+               key.objectid = inode->i_ino;
+               btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
+               key.offset = 0;
+       }
+
+       if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+               ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
+                                        key.objectid, root->root_key.objectid,
+                                        parent_inode->i_ino,
+                                        index, name, name_len);
+       } else if (add_backref) {
+               ret = btrfs_insert_inode_ref(trans, root,
+                                            name, name_len, inode->i_ino,
+                                            parent_inode->i_ino, index);
+       }
 
-       ret = btrfs_insert_dir_item(trans, root, name, name_len,
-                                   parent_inode->i_ino,
-                                   &key, btrfs_inode_type(inode),
-                                   index);
        if (ret == 0) {
-               if (add_backref) {
-                       ret = btrfs_insert_inode_ref(trans, root,
-                                                    name, name_len,
-                                                    inode->i_ino,
-                                                    parent_inode->i_ino,
-                                                    index);
-               }
+               ret = btrfs_insert_dir_item(trans, root, name, name_len,
+                                           parent_inode->i_ino, &key,
+                                           btrfs_inode_type(inode), index);
+               BUG_ON(ret);
+
                btrfs_i_size_write(parent_inode, parent_inode->i_size +
                                   name_len * 2);
                parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
@@ -3875,18 +4133,16 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 
        err = btrfs_add_nondir(trans, dentry, inode, 1, index);
 
-       if (err)
-               drop_inode = 1;
-
-       btrfs_update_inode_block_group(trans, dir);
-       err = btrfs_update_inode(trans, root, inode);
-
-       if (err)
+       if (err) {
                drop_inode = 1;
+       } else {
+               btrfs_update_inode_block_group(trans, dir);
+               err = btrfs_update_inode(trans, root, inode);
+               BUG_ON(err);
+               btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
+       }
 
        nr = trans->blocks_used;
-
-       btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
        btrfs_end_transaction_throttle(trans, root);
 fail:
        if (drop_inode) {
@@ -4064,11 +4320,11 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
        int compressed;
 
 again:
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, start, len);
        if (em)
                em->bdev = root->fs_info->fs_devices->latest_bdev;
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
 
        if (em) {
                if (em->start > start || em->start + em->len <= start)
@@ -4215,6 +4471,11 @@ again:
                                map = kmap(page);
                                read_extent_buffer(leaf, map + pg_offset, ptr,
                                                   copy_size);
+                               if (pg_offset + copy_size < PAGE_CACHE_SIZE) {
+                                       memset(map + pg_offset + copy_size, 0,
+                                              PAGE_CACHE_SIZE - pg_offset -
+                                              copy_size);
+                               }
                                kunmap(page);
                        }
                        flush_dcache_page(page);
@@ -4259,7 +4520,7 @@ insert:
        }
 
        err = 0;
-       spin_lock(&em_tree->lock);
+       write_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em);
        /* it is possible that someone inserted the extent into the tree
         * while we had the lock dropped.  It is also possible that
@@ -4299,7 +4560,7 @@ insert:
                        err = 0;
                }
        }
-       spin_unlock(&em_tree->lock);
+       write_unlock(&em_tree->lock);
 out:
        if (path)
                btrfs_free_path(path);
@@ -4398,13 +4659,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
        u64 page_start = page_offset(page);
        u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
 
+
+       /*
+        * we have the page locked, so new writeback can't start,
+        * and the dirty bit won't be cleared while we are here.
+        *
+        * Wait for IO on this page so that we can safely clear
+        * the PagePrivate2 bit and do ordered accounting
+        */
        wait_on_page_writeback(page);
+
        tree = &BTRFS_I(page->mapping->host)->io_tree;
        if (offset) {
                btrfs_releasepage(page, GFP_NOFS);
                return;
        }
-
        lock_extent(tree, page_start, page_end, GFP_NOFS);
        ordered = btrfs_lookup_ordered_extent(page->mapping->host,
                                           page_offset(page));
@@ -4415,16 +4684,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
                 */
                clear_extent_bit(tree, page_start, page_end,
                                 EXTENT_DIRTY | EXTENT_DELALLOC |
-                                EXTENT_LOCKED, 1, 0, GFP_NOFS);
-               btrfs_finish_ordered_io(page->mapping->host,
-                                       page_start, page_end);
+                                EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
+               /*
+                * whoever cleared the private bit is responsible
+                * for the finish_ordered_io
+                */
+               if (TestClearPagePrivate2(page)) {
+                       btrfs_finish_ordered_io(page->mapping->host,
+                                               page_start, page_end);
+               }
                btrfs_put_ordered_extent(ordered);
                lock_extent(tree, page_start, page_end, GFP_NOFS);
        }
        clear_extent_bit(tree, page_start, page_end,
-                EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
-                EXTENT_ORDERED,
-                1, 1, GFP_NOFS);
+                EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
+                1, 1, NULL, GFP_NOFS);
        __btrfs_releasepage(page, GFP_NOFS);
 
        ClearPageChecked(page);
@@ -4521,11 +4795,14 @@ again:
        }
        ClearPageChecked(page);
        set_page_dirty(page);
+       SetPageUptodate(page);
 
        BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
        unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
 
 out_unlock:
+       if (!ret)
+               return VM_FAULT_LOCKED;
        unlock_page(page);
 out:
        return ret;
@@ -4594,11 +4871,11 @@ out:
  * create a new subvolume directory/inode (helper for the ioctl).
  */
 int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
-                            struct btrfs_root *new_root, struct dentry *dentry,
+                            struct btrfs_root *new_root,
                             u64 new_dirid, u64 alloc_hint)
 {
        struct inode *inode;
-       int error;
+       int err;
        u64 index = 0;
 
        inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
@@ -4611,11 +4888,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
        inode->i_nlink = 1;
        btrfs_i_size_write(inode, 0);
 
-       error = btrfs_update_inode(trans, new_root, inode);
-       if (error)
-               return error;
+       err = btrfs_update_inode(trans, new_root, inode);
+       BUG_ON(err);
 
-       d_instantiate(dentry, inode);
+       iput(inode);
        return 0;
 }
 
@@ -4693,6 +4969,16 @@ void btrfs_destroy_inode(struct inode *inode)
        kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
 }
 
+void btrfs_drop_inode(struct inode *inode)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+
+       if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0)
+               generic_delete_inode(inode);
+       else
+               generic_drop_inode(inode);
+}
+
 static void init_once(void *foo)
 {
        struct btrfs_inode *ei = (struct btrfs_inode *) foo;
@@ -4761,31 +5047,32 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 {
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(old_dir)->root;
+       struct btrfs_root *dest = BTRFS_I(new_dir)->root;
        struct inode *new_inode = new_dentry->d_inode;
        struct inode *old_inode = old_dentry->d_inode;
        struct timespec ctime = CURRENT_TIME;
        u64 index = 0;
+       u64 root_objectid;
        int ret;
 
-       /* we're not allowed to rename between subvolumes */
-       if (BTRFS_I(old_inode)->root->root_key.objectid !=
-           BTRFS_I(new_dir)->root->root_key.objectid)
+       if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+               return -EPERM;
+
+       /* we only allow rename subvolume link between subvolumes */
+       if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
                return -EXDEV;
 
-       if (S_ISDIR(old_inode->i_mode) && new_inode &&
-           new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
+       if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
+           (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID))
                return -ENOTEMPTY;
-       }
 
-       /* to rename a snapshot or subvolume, we need to juggle the
-        * backrefs.  This isn't coded yet
-        */
-       if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
-               return -EXDEV;
+       if (S_ISDIR(old_inode->i_mode) && new_inode &&
+           new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
+               return -ENOTEMPTY;
 
        ret = btrfs_check_metadata_free_space(root);
        if (ret)
-               goto out_unlock;
+               return ret;
 
        /*
         * we're using rename to replace one file with another.
@@ -4796,8 +5083,40 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
            old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
                filemap_flush(old_inode->i_mapping);
 
+       /* close the racy window with snapshot create/destroy ioctl */
+       if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
+               down_read(&root->fs_info->subvol_sem);
+
        trans = btrfs_start_transaction(root, 1);
+       btrfs_set_trans_block_group(trans, new_dir);
+
+       if (dest != root)
+               btrfs_record_root_in_trans(trans, dest);
 
+       ret = btrfs_set_inode_index(new_dir, &index);
+       if (ret)
+               goto out_fail;
+
+       if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+               /* force full log commit if subvolume involved. */
+               root->fs_info->last_trans_log_full_commit = trans->transid;
+       } else {
+               ret = btrfs_insert_inode_ref(trans, dest,
+                                            new_dentry->d_name.name,
+                                            new_dentry->d_name.len,
+                                            old_inode->i_ino,
+                                            new_dir->i_ino, index);
+               if (ret)
+                       goto out_fail;
+               /*
+                * this is an ugly little race, but the rename is required
+                * to make sure that if we crash, the inode is either at the
+                * old name or the new one.  pinning the log transaction lets
+                * us make sure we don't allow a log commit to come in after
+                * we unlink the name but before we add the new name back in.
+                */
+               btrfs_pin_log_trans(root);
+       }
        /*
         * make sure the inode gets flushed if it is replacing
         * something.
@@ -4807,18 +5126,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                btrfs_add_ordered_operation(trans, root, old_inode);
        }
 
-       /*
-        * this is an ugly little race, but the rename is required to make
-        * sure that if we crash, the inode is either at the old name
-        * or the new one.  pinning the log transaction lets us make sure
-        * we don't allow a log commit to come in after we unlink the
-        * name but before we add the new name back in.
-        */
-       btrfs_pin_log_trans(root);
-
-       btrfs_set_trans_block_group(trans, new_dir);
-
-       btrfs_inc_nlink(old_dentry->d_inode);
        old_dir->i_ctime = old_dir->i_mtime = ctime;
        new_dir->i_ctime = new_dir->i_mtime = ctime;
        old_inode->i_ctime = ctime;
@@ -4826,47 +5133,58 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (old_dentry->d_parent != new_dentry->d_parent)
                btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
 
-       ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode,
-                                old_dentry->d_name.name,
-                                old_dentry->d_name.len);
-       if (ret)
-               goto out_fail;
+       if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+               root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
+               ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid,
+                                       old_dentry->d_name.name,
+                                       old_dentry->d_name.len);
+       } else {
+               btrfs_inc_nlink(old_dentry->d_inode);
+               ret = btrfs_unlink_inode(trans, root, old_dir,
+                                        old_dentry->d_inode,
+                                        old_dentry->d_name.name,
+                                        old_dentry->d_name.len);
+       }
+       BUG_ON(ret);
 
        if (new_inode) {
                new_inode->i_ctime = CURRENT_TIME;
-               ret = btrfs_unlink_inode(trans, root, new_dir,
-                                        new_dentry->d_inode,
-                                        new_dentry->d_name.name,
-                                        new_dentry->d_name.len);
-               if (ret)
-                       goto out_fail;
+               if (unlikely(new_inode->i_ino ==
+                            BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
+                       root_objectid = BTRFS_I(new_inode)->location.objectid;
+                       ret = btrfs_unlink_subvol(trans, dest, new_dir,
+                                               root_objectid,
+                                               new_dentry->d_name.name,
+                                               new_dentry->d_name.len);
+                       BUG_ON(new_inode->i_nlink == 0);
+               } else {
+                       ret = btrfs_unlink_inode(trans, dest, new_dir,
+                                                new_dentry->d_inode,
+                                                new_dentry->d_name.name,
+                                                new_dentry->d_name.len);
+               }
+               BUG_ON(ret);
                if (new_inode->i_nlink == 0) {
                        ret = btrfs_orphan_add(trans, new_dentry->d_inode);
-                       if (ret)
-                               goto out_fail;
+                       BUG_ON(ret);
                }
-
        }
-       ret = btrfs_set_inode_index(new_dir, &index);
-       if (ret)
-               goto out_fail;
 
-       ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode,
-                            old_inode, new_dentry->d_name.name,
-                            new_dentry->d_name.len, 1, index);
-       if (ret)
-               goto out_fail;
+       ret = btrfs_add_link(trans, new_dir, old_inode,
+                            new_dentry->d_name.name,
+                            new_dentry->d_name.len, 0, index);
+       BUG_ON(ret);
 
-       btrfs_log_new_name(trans, old_inode, old_dir,
-                                      new_dentry->d_parent);
+       if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
+               btrfs_log_new_name(trans, old_inode, old_dir,
+                                  new_dentry->d_parent);
+               btrfs_end_log_trans(root);
+       }
 out_fail:
-
-       /* this btrfs_end_log_trans just allows the current
-        * log-sub transaction to complete
-        */
-       btrfs_end_log_trans(root);
        btrfs_end_transaction_throttle(trans, root);
-out_unlock:
+
+       if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
+               up_read(&root->fs_info->subvol_sem);
        return ret;
 }
 
@@ -5058,6 +5376,8 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans,
                                                  0, 0, 0,
                                                  BTRFS_FILE_EXTENT_PREALLOC);
                BUG_ON(ret);
+               btrfs_drop_extent_cache(inode, cur_offset,
+                                       cur_offset + ins.offset -1, 0);
                num_bytes -= ins.offset;
                cur_offset += ins.offset;
                alloc_hint = ins.objectid + ins.offset;
@@ -5223,6 +5543,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = {
        .lookup         = btrfs_lookup,
        .permission     = btrfs_permission,
 };
+
 static struct file_operations btrfs_dir_file_operations = {
        .llseek         = generic_file_llseek,
        .read           = generic_read_dir,
@@ -5269,6 +5590,7 @@ static const struct address_space_operations btrfs_aops = {
        .invalidatepage = btrfs_invalidatepage,
        .releasepage    = btrfs_releasepage,
        .set_page_dirty = btrfs_set_page_dirty,
+       .error_remove_page = generic_error_remove_page,
 };
 
 static const struct address_space_operations btrfs_symlink_aops = {
@@ -5309,3 +5631,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
        .listxattr      = btrfs_listxattr,
        .removexattr    = btrfs_removexattr,
 };
+
+struct dentry_operations btrfs_dentry_operations = {
+       .d_delete       = btrfs_dentry_delete,
+};
index bd88f25..a8577a7 100644 (file)
@@ -230,8 +230,8 @@ static noinline int create_subvol(struct btrfs_root *root,
        struct btrfs_root_item root_item;
        struct btrfs_inode_item *inode_item;
        struct extent_buffer *leaf;
-       struct btrfs_root *new_root = root;
-       struct inode *dir;
+       struct btrfs_root *new_root;
+       struct inode *dir = dentry->d_parent->d_inode;
        int ret;
        int err;
        u64 objectid;
@@ -241,7 +241,7 @@ static noinline int create_subvol(struct btrfs_root *root,
 
        ret = btrfs_check_metadata_free_space(root);
        if (ret)
-               goto fail_commit;
+               return ret;
 
        trans = btrfs_start_transaction(root, 1);
        BUG_ON(!trans);
@@ -304,11 +304,17 @@ static noinline int create_subvol(struct btrfs_root *root,
        if (ret)
                goto fail;
 
+       key.offset = (u64)-1;
+       new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
+       BUG_ON(IS_ERR(new_root));
+
+       btrfs_record_root_in_trans(trans, new_root);
+
+       ret = btrfs_create_subvol_root(trans, new_root, new_dirid,
+                                      BTRFS_I(dir)->block_group);
        /*
         * insert the directory item
         */
-       key.offset = (u64)-1;
-       dir = dentry->d_parent->d_inode;
        ret = btrfs_set_inode_index(dir, &index);
        BUG_ON(ret);
 
@@ -322,44 +328,18 @@ static noinline int create_subvol(struct btrfs_root *root,
        ret = btrfs_update_inode(trans, root, dir);
        BUG_ON(ret);
 
-       /* add the backref first */
        ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
-                                objectid, BTRFS_ROOT_BACKREF_KEY,
-                                root->root_key.objectid,
+                                objectid, root->root_key.objectid,
                                 dir->i_ino, index, name, namelen);
 
        BUG_ON(ret);
 
-       /* now add the forward ref */
-       ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
-                                root->root_key.objectid, BTRFS_ROOT_REF_KEY,
-                                objectid,
-                                dir->i_ino, index, name, namelen);
-
-       BUG_ON(ret);
-
-       ret = btrfs_commit_transaction(trans, root);
-       if (ret)
-               goto fail_commit;
-
-       new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
-       BUG_ON(!new_root);
-
-       trans = btrfs_start_transaction(new_root, 1);
-       BUG_ON(!trans);
-
-       ret = btrfs_create_subvol_root(trans, new_root, dentry, new_dirid,
-                                      BTRFS_I(dir)->block_group);
-       if (ret)
-               goto fail;
-
+       d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
 fail:
        nr = trans->blocks_used;
-       err = btrfs_commit_transaction(trans, new_root);
+       err = btrfs_commit_transaction(trans, root);
        if (err && !ret)
                ret = err;
-fail_commit:
-       btrfs_btree_balance_dirty(root, nr);
        return ret;
 }
 
@@ -420,14 +400,15 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
  * sys_mkdirat and vfs_mkdir, but we only do a single component lookup
  * inside this filesystem so it's quite a bit simpler.
  */
-static noinline int btrfs_mksubvol(struct path *parent, char *name,
-                                  int mode, int namelen,
+static noinline int btrfs_mksubvol(struct path *parent,
+                                  char *name, int namelen,
                                   struct btrfs_root *snap_src)
 {
+       struct inode *dir  = parent->dentry->d_inode;
        struct dentry *dentry;
        int error;
 
-       mutex_lock_nested(&parent->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
 
        dentry = lookup_one_len(name, parent->dentry, namelen);
        error = PTR_ERR(dentry);
@@ -438,99 +419,39 @@ static noinline int btrfs_mksubvol(struct path *parent, char *name,
        if (dentry->d_inode)
                goto out_dput;
 
-       if (!IS_POSIXACL(parent->dentry->d_inode))
-               mode &= ~current_umask();
-
        error = mnt_want_write(parent->mnt);
        if (error)
                goto out_dput;
 
-       error = btrfs_may_create(parent->dentry->d_inode, dentry);
+       error = btrfs_may_create(dir, dentry);
        if (error)
                goto out_drop_write;
 
-       /*
-        * Actually perform the low-level subvolume creation after all
-        * this VFS fuzz.
-        *
-        * Eventually we want to pass in an inode under which we create this
-        * subvolume, but for now all are under the filesystem root.
-        *
-        * Also we should pass on the mode eventually to allow creating new
-        * subvolume with specific mode bits.
-        */
+       down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
+
+       if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0)
+               goto out_up_read;
+
        if (snap_src) {
-               struct dentry *dir = dentry->d_parent;
-               struct dentry *test = dir->d_parent;
-               struct btrfs_path *path = btrfs_alloc_path();
-               int ret;
-               u64 test_oid;
-               u64 parent_oid = BTRFS_I(dir->d_inode)->root->root_key.objectid;
-
-               test_oid = snap_src->root_key.objectid;
-
-               ret = btrfs_find_root_ref(snap_src->fs_info->tree_root,
-                                         path, parent_oid, test_oid);
-               if (ret == 0)
-                       goto create;
-               btrfs_release_path(snap_src->fs_info->tree_root, path);
-
-               /* we need to make sure we aren't creating a directory loop
-                * by taking a snapshot of something that has our current
-                * subvol in its directory tree.  So, this loops through
-                * the dentries and checks the forward refs for each subvolume
-                * to see if is references the subvolume where we are
-                * placing this new snapshot.
-                */
-               while (1) {
-                       if (!test ||
-                           dir == snap_src->fs_info->sb->s_root ||
-                           test == snap_src->fs_info->sb->s_root ||
-                           test->d_inode->i_sb != snap_src->fs_info->sb) {
-                               break;
-                       }
-                       if (S_ISLNK(test->d_inode->i_mode)) {
-                               printk(KERN_INFO "Btrfs symlink in snapshot "
-                                      "path, failed\n");
-                               error = -EMLINK;
-                               btrfs_free_path(path);
-                               goto out_drop_write;
-                       }
-                       test_oid =
-                               BTRFS_I(test->d_inode)->root->root_key.objectid;
-                       ret = btrfs_find_root_ref(snap_src->fs_info->tree_root,
-                                 path, test_oid, parent_oid);
-                       if (ret == 0) {
-                               printk(KERN_INFO "Btrfs snapshot creation "
-                                      "failed, looping\n");
-                               error = -EMLINK;
-                               btrfs_free_path(path);
-                               goto out_drop_write;
-                       }
-                       btrfs_release_path(snap_src->fs_info->tree_root, path);
-                       test = test->d_parent;
-               }
-create:
-               btrfs_free_path(path);
-               error = create_snapshot(snap_src, dentry, name, namelen);
+               error = create_snapshot(snap_src, dentry,
+                                       name, namelen);
        } else {
-               error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root,
-                                     dentry, name, namelen);
+               error = create_subvol(BTRFS_I(dir)->root, dentry,
+                                     name, namelen);
        }
-       if (error)
-               goto out_drop_write;
-
-       fsnotify_mkdir(parent->dentry->d_inode, dentry);
+       if (!error)
+               fsnotify_mkdir(dir, dentry);
+out_up_read:
+       up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
 out_drop_write:
        mnt_drop_write(parent->mnt);
 out_dput:
        dput(dentry);
 out_unlock:
-       mutex_unlock(&parent->dentry->d_inode->i_mutex);
+       mutex_unlock(&dir->i_mutex);
        return error;
 }
 
-
 static int btrfs_defrag_file(struct file *file)
 {
        struct inode *inode = fdentry(file)->d_inode;
@@ -596,9 +517,8 @@ again:
                clear_page_dirty_for_io(page);
 
                btrfs_set_extent_delalloc(inode, page_start, page_end);
-
-               unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
                set_page_dirty(page);
+               unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
                unlock_page(page);
                page_cache_release(page);
                balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
@@ -609,7 +529,8 @@ out_unlock:
        return 0;
 }
 
-static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
+static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
+                                       void __user *arg)
 {
        u64 new_size;
        u64 old_size;
@@ -718,10 +639,7 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
 {
        struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
        struct btrfs_ioctl_vol_args *vol_args;
-       struct btrfs_dir_item *di;
-       struct btrfs_path *path;
        struct file *src_file;
-       u64 root_dirid;
        int namelen;
        int ret = 0;
 
@@ -739,32 +657,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
                goto out;
        }
 
-       path = btrfs_alloc_path();
-       if (!path) {
-               ret = -ENOMEM;
-               goto out;
-       }
-
-       root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
-       di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
-                           path, root_dirid,
-                           vol_args->name, namelen, 0);
-       btrfs_free_path(path);
-
-       if (di && !IS_ERR(di)) {
-               ret = -EEXIST;
-               goto out;
-       }
-
-       if (IS_ERR(di)) {
-               ret = PTR_ERR(di);
-               goto out;
-       }
-
        if (subvol) {
-               ret = btrfs_mksubvol(&file->f_path, vol_args->name,
-                                    file->f_path.dentry->d_inode->i_mode,
-                                    namelen, NULL);
+               ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
+                                    NULL);
        } else {
                struct inode *src_inode;
                src_file = fget(vol_args->fd);
@@ -781,17 +676,156 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
                        fput(src_file);
                        goto out;
                }
-               ret = btrfs_mksubvol(&file->f_path, vol_args->name,
-                            file->f_path.dentry->d_inode->i_mode,
-                            namelen, BTRFS_I(src_inode)->root);
+               ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
+                                    BTRFS_I(src_inode)->root);
                fput(src_file);
        }
-
 out:
        kfree(vol_args);
        return ret;
 }
 
+/*
+ * helper to check if the subvolume references other subvolumes
+ */
+static noinline int may_destroy_subvol(struct btrfs_root *root)
+{
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       int ret;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       key.objectid = root->root_key.objectid;
+       key.type = BTRFS_ROOT_REF_KEY;
+       key.offset = (u64)-1;
+
+       ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
+                               &key, path, 0, 0);
+       if (ret < 0)
+               goto out;
+       BUG_ON(ret == 0);
+
+       ret = 0;
+       if (path->slots[0] > 0) {
+               path->slots[0]--;
+               btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+               if (key.objectid == root->root_key.objectid &&
+                   key.type == BTRFS_ROOT_REF_KEY)
+                       ret = -ENOTEMPTY;
+       }
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+static noinline int btrfs_ioctl_snap_destroy(struct file *file,
+                                            void __user *arg)
+{
+       struct dentry *parent = fdentry(file);
+       struct dentry *dentry;
+       struct inode *dir = parent->d_inode;
+       struct inode *inode;
+       struct btrfs_root *root = BTRFS_I(dir)->root;
+       struct btrfs_root *dest = NULL;
+       struct btrfs_ioctl_vol_args *vol_args;
+       struct btrfs_trans_handle *trans;
+       int namelen;
+       int ret;
+       int err = 0;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       vol_args = memdup_user(arg, sizeof(*vol_args));
+       if (IS_ERR(vol_args))
+               return PTR_ERR(vol_args);
+
+       vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+       namelen = strlen(vol_args->name);
+       if (strchr(vol_args->name, '/') ||
+           strncmp(vol_args->name, "..", namelen) == 0) {
+               err = -EINVAL;
+               goto out;
+       }
+
+       err = mnt_want_write(file->f_path.mnt);
+       if (err)
+               goto out;
+
+       mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+       dentry = lookup_one_len(vol_args->name, parent, namelen);
+       if (IS_ERR(dentry)) {
+               err = PTR_ERR(dentry);
+               goto out_unlock_dir;
+       }
+
+       if (!dentry->d_inode) {
+               err = -ENOENT;
+               goto out_dput;
+       }
+
+       inode = dentry->d_inode;
+       if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
+               err = -EINVAL;
+               goto out_dput;
+       }
+
+       dest = BTRFS_I(inode)->root;
+
+       mutex_lock(&inode->i_mutex);
+       err = d_invalidate(dentry);
+       if (err)
+               goto out_unlock;
+
+       down_write(&root->fs_info->subvol_sem);
+
+       err = may_destroy_subvol(dest);
+       if (err)
+               goto out_up_write;
+
+       trans = btrfs_start_transaction(root, 1);
+       ret = btrfs_unlink_subvol(trans, root, dir,
+                               dest->root_key.objectid,
+                               dentry->d_name.name,
+                               dentry->d_name.len);
+       BUG_ON(ret);
+
+       btrfs_record_root_in_trans(trans, dest);
+
+       memset(&dest->root_item.drop_progress, 0,
+               sizeof(dest->root_item.drop_progress));
+       dest->root_item.drop_level = 0;
+       btrfs_set_root_refs(&dest->root_item, 0);
+
+       ret = btrfs_insert_orphan_item(trans,
+                               root->fs_info->tree_root,
+                               dest->root_key.objectid);
+       BUG_ON(ret);
+
+       ret = btrfs_commit_transaction(trans, root);
+       BUG_ON(ret);
+       inode->i_flags |= S_DEAD;
+out_up_write:
+       up_write(&root->fs_info->subvol_sem);
+out_unlock:
+       mutex_unlock(&inode->i_mutex);
+       if (!err) {
+               btrfs_invalidate_inodes(dest);
+               d_delete(dentry);
+       }
+out_dput:
+       dput(dentry);
+out_unlock_dir:
+       mutex_unlock(&dir->i_mutex);
+       mnt_drop_write(file->f_path.mnt);
+out:
+       kfree(vol_args);
+       return err;
+}
+
 static int btrfs_ioctl_defrag(struct file *file)
 {
        struct inode *inode = fdentry(file)->d_inode;
@@ -865,8 +899,8 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
        return ret;
 }
 
-static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
-               u64 off, u64 olen, u64 destoff)
+static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
+                                      u64 off, u64 olen, u64 destoff)
 {
        struct inode *inode = fdentry(file)->d_inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -976,7 +1010,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 
        /* punch hole in destination first */
        btrfs_drop_extents(trans, root, inode, off, off + len,
-                          off + len, 0, &hint_byte);
+                          off + len, 0, &hint_byte, 1);
 
        /* clone data */
        key.objectid = src->i_ino;
@@ -1071,8 +1105,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                                        datao += off - key.offset;
                                        datal -= off - key.offset;
                                }
-                               if (key.offset + datao + datal + key.offset >
-                                   off + len)
+                               if (key.offset + datao + datal > off + len)
                                        datal = off + len - key.offset - datao;
                                /* disko == 0 means it's a hole */
                                if (!disko)
@@ -1258,6 +1291,8 @@ long btrfs_ioctl(struct file *file, unsigned int
                return btrfs_ioctl_snap_create(file, argp, 0);
        case BTRFS_IOC_SUBVOL_CREATE:
                return btrfs_ioctl_snap_create(file, argp, 1);
+       case BTRFS_IOC_SNAP_DESTROY:
+               return btrfs_ioctl_snap_destroy(file, argp);
        case BTRFS_IOC_DEFRAG:
                return btrfs_ioctl_defrag(file);
        case BTRFS_IOC_RESIZE:
index b320b10..bc49914 100644 (file)
@@ -65,5 +65,6 @@ struct btrfs_ioctl_clone_range_args {
 
 #define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \
                                   struct btrfs_ioctl_vol_args)
-
+#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \
+                               struct btrfs_ioctl_vol_args)
 #endif
index 7b2f401..b5d6d24 100644 (file)
@@ -159,8 +159,6 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
  *
  * len is the length of the extent
  *
- * This also sets the EXTENT_ORDERED bit on the range in the inode.
- *
  * The tree is given a single reference on the ordered extent that was
  * inserted.
  */
@@ -181,6 +179,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
        entry->start = start;
        entry->len = len;
        entry->disk_len = disk_len;
+       entry->bytes_left = len;
        entry->inode = inode;
        if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
                set_bit(type, &entry->flags);
@@ -195,9 +194,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
                           &entry->rb_node);
        BUG_ON(node);
 
-       set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset,
-                          entry_end(entry) - 1, GFP_NOFS);
-
        spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
        list_add_tail(&entry->root_extent_list,
                      &BTRFS_I(inode)->root->fs_info->ordered_extents);
@@ -241,13 +237,10 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
        struct btrfs_ordered_inode_tree *tree;
        struct rb_node *node;
        struct btrfs_ordered_extent *entry;
-       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        int ret;
 
        tree = &BTRFS_I(inode)->ordered_tree;
        mutex_lock(&tree->mutex);
-       clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1,
-                            GFP_NOFS);
        node = tree_search(tree, file_offset);
        if (!node) {
                ret = 1;
@@ -260,11 +253,16 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
                goto out;
        }
 
-       ret = test_range_bit(io_tree, entry->file_offset,
-                            entry->file_offset + entry->len - 1,
-                            EXTENT_ORDERED, 0);
-       if (ret == 0)
+       if (io_size > entry->bytes_left) {
+               printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
+                      (unsigned long long)entry->bytes_left,
+                      (unsigned long long)io_size);
+       }
+       entry->bytes_left -= io_size;
+       if (entry->bytes_left == 0)
                ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
+       else
+               ret = 1;
 out:
        mutex_unlock(&tree->mutex);
        return ret == 0;
@@ -476,6 +474,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
        u64 orig_end;
        u64 wait_end;
        struct btrfs_ordered_extent *ordered;
+       int found;
 
        if (start + len < start) {
                orig_end = INT_LIMIT(loff_t);
@@ -502,6 +501,7 @@ again:
                                           orig_end >> PAGE_CACHE_SHIFT);
 
        end = orig_end;
+       found = 0;
        while (1) {
                ordered = btrfs_lookup_first_ordered_extent(inode, end);
                if (!ordered)
@@ -514,6 +514,7 @@ again:
                        btrfs_put_ordered_extent(ordered);
                        break;
                }
+               found++;
                btrfs_start_ordered_extent(inode, ordered, 1);
                end = ordered->file_offset;
                btrfs_put_ordered_extent(ordered);
@@ -521,8 +522,8 @@ again:
                        break;
                end--;
        }
-       if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
-                          EXTENT_ORDERED | EXTENT_DELALLOC, 0)) {
+       if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
+                          EXTENT_DELALLOC, 0, NULL)) {
                schedule_timeout(1);
                goto again;
        }
@@ -613,7 +614,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
         */
        if (test_range_bit(io_tree, disk_i_size,
                           ordered->file_offset + ordered->len - 1,
-                          EXTENT_DELALLOC, 0)) {
+                          EXTENT_DELALLOC, 0, NULL)) {
                goto out;
        }
        /*
@@ -664,7 +665,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
         */
        if (i_size_test > entry_end(ordered) &&
            !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1,
-                          EXTENT_DELALLOC, 0)) {
+                          EXTENT_DELALLOC, 0, NULL)) {
                new_i_size = min_t(u64, i_size_test, i_size_read(inode));
        }
        BTRFS_I(inode)->disk_i_size = new_i_size;
index 3d31c88..993a7ea 100644 (file)
@@ -85,6 +85,9 @@ struct btrfs_ordered_extent {
        /* extent length on disk */
        u64 disk_len;
 
+       /* number of bytes that still need writing */
+       u64 bytes_left;
+
        /* flags (described above) */
        unsigned long flags;
 
index 3c0d52a..79cba5f 100644 (file)
@@ -65,3 +65,23 @@ out:
        btrfs_free_path(path);
        return ret;
 }
+
+int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset)
+{
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       int ret;
+
+       key.objectid = BTRFS_ORPHAN_OBJECTID;
+       key.type = BTRFS_ORPHAN_ITEM_KEY;
+       key.offset = offset;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+
+       btrfs_free_path(path);
+       return ret;
+}
index c04f7f2..361ad32 100644 (file)
@@ -121,6 +121,15 @@ struct inodevec {
        int nr;
 };
 
+#define MAX_EXTENTS 128
+
+struct file_extent_cluster {
+       u64 start;
+       u64 end;
+       u64 boundary[MAX_EXTENTS];
+       unsigned int nr;
+};
+
 struct reloc_control {
        /* block group to relocate */
        struct btrfs_block_group_cache *block_group;
@@ -2180,7 +2189,7 @@ static int tree_block_processed(u64 bytenr, u32 blocksize,
                                struct reloc_control *rc)
 {
        if (test_range_bit(&rc->processed_blocks, bytenr,
-                          bytenr + blocksize - 1, EXTENT_DIRTY, 1))
+                          bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
                return 1;
        return 0;
 }
@@ -2529,56 +2538,94 @@ out:
 }
 
 static noinline_for_stack
-int relocate_inode_pages(struct inode *inode, u64 start, u64 len)
+int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
+                        u64 block_start)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_map *em;
+       int ret = 0;
+
+       em = alloc_extent_map(GFP_NOFS);
+       if (!em)
+               return -ENOMEM;
+
+       em->start = start;
+       em->len = end + 1 - start;
+       em->block_len = em->len;
+       em->block_start = block_start;
+       em->bdev = root->fs_info->fs_devices->latest_bdev;
+       set_bit(EXTENT_FLAG_PINNED, &em->flags);
+
+       lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+       while (1) {
+               write_lock(&em_tree->lock);
+               ret = add_extent_mapping(em_tree, em);
+               write_unlock(&em_tree->lock);
+               if (ret != -EEXIST) {
+                       free_extent_map(em);
+                       break;
+               }
+               btrfs_drop_extent_cache(inode, start, end, 0);
+       }
+       unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
+       return ret;
+}
+
+static int relocate_file_extent_cluster(struct inode *inode,
+                                       struct file_extent_cluster *cluster)
 {
        u64 page_start;
        u64 page_end;
-       unsigned long i;
-       unsigned long first_index;
+       u64 offset = BTRFS_I(inode)->index_cnt;
+       unsigned long index;
        unsigned long last_index;
-       unsigned int total_read = 0;
-       unsigned int total_dirty = 0;
+       unsigned int dirty_page = 0;
        struct page *page;
        struct file_ra_state *ra;
-       struct btrfs_ordered_extent *ordered;
-       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+       int nr = 0;
        int ret = 0;
 
+       if (!cluster->nr)
+               return 0;
+
        ra = kzalloc(sizeof(*ra), GFP_NOFS);
        if (!ra)
                return -ENOMEM;
 
+       index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
+       last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
+
        mutex_lock(&inode->i_mutex);
-       first_index = start >> PAGE_CACHE_SHIFT;
-       last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
 
-       /* make sure the dirty trick played by the caller work */
-       while (1) {
-               ret = invalidate_inode_pages2_range(inode->i_mapping,
-                                                   first_index, last_index);
-               if (ret != -EBUSY)
-                       break;
-               schedule_timeout(HZ/10);
-       }
+       i_size_write(inode, cluster->end + 1 - offset);
+       ret = setup_extent_mapping(inode, cluster->start - offset,
+                                  cluster->end - offset, cluster->start);
        if (ret)
                goto out_unlock;
 
        file_ra_state_init(ra, inode->i_mapping);
 
-       for (i = first_index ; i <= last_index; i++) {
-               if (total_read % ra->ra_pages == 0) {
-                       btrfs_force_ra(inode->i_mapping, ra, NULL, i,
-                               min(last_index, ra->ra_pages + i - 1));
-               }
-               total_read++;
-again:
-               if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
-                       BUG_ON(1);
-               page = grab_cache_page(inode->i_mapping, i);
+       WARN_ON(cluster->start != cluster->boundary[0]);
+       while (index <= last_index) {
+               page = find_lock_page(inode->i_mapping, index);
                if (!page) {
-                       ret = -ENOMEM;
-                       goto out_unlock;
+                       page_cache_sync_readahead(inode->i_mapping,
+                                                 ra, NULL, index,
+                                                 last_index + 1 - index);
+                       page = grab_cache_page(inode->i_mapping, index);
+                       if (!page) {
+                               ret = -ENOMEM;
+                               goto out_unlock;
+                       }
+               }
+
+               if (PageReadahead(page)) {
+                       page_cache_async_readahead(inode->i_mapping,
+                                                  ra, NULL, page, index,
+                                                  last_index + 1 - index);
                }
+
                if (!PageUptodate(page)) {
                        btrfs_readpage(NULL, page);
                        lock_page(page);
@@ -2589,75 +2636,79 @@ again:
                                goto out_unlock;
                        }
                }
-               wait_on_page_writeback(page);
 
                page_start = (u64)page->index << PAGE_CACHE_SHIFT;
                page_end = page_start + PAGE_CACHE_SIZE - 1;
-               lock_extent(io_tree, page_start, page_end, GFP_NOFS);
-
-               ordered = btrfs_lookup_ordered_extent(inode, page_start);
-               if (ordered) {
-                       unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
-                       unlock_page(page);
-                       page_cache_release(page);
-                       btrfs_start_ordered_extent(inode, ordered, 1);
-                       btrfs_put_ordered_extent(ordered);
-                       goto again;
-               }
+
+               lock_extent(&BTRFS_I(inode)->io_tree,
+                           page_start, page_end, GFP_NOFS);
+
                set_page_extent_mapped(page);
 
-               if (i == first_index)
-                       set_extent_bits(io_tree, page_start, page_end,
+               if (nr < cluster->nr &&
+                   page_start + offset == cluster->boundary[nr]) {
+                       set_extent_bits(&BTRFS_I(inode)->io_tree,
+                                       page_start, page_end,
                                        EXTENT_BOUNDARY, GFP_NOFS);
+                       nr++;
+               }
                btrfs_set_extent_delalloc(inode, page_start, page_end);
 
                set_page_dirty(page);
-               total_dirty++;
+               dirty_page++;
 
-               unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
+               unlock_extent(&BTRFS_I(inode)->io_tree,
+                             page_start, page_end, GFP_NOFS);
                unlock_page(page);
                page_cache_release(page);
+
+               index++;
+               if (nr < cluster->nr &&
+                   page_end + 1 + offset == cluster->boundary[nr]) {
+                       balance_dirty_pages_ratelimited_nr(inode->i_mapping,
+                                                          dirty_page);
+                       dirty_page = 0;
+               }
+       }
+       if (dirty_page) {
+               balance_dirty_pages_ratelimited_nr(inode->i_mapping,
+                                                  dirty_page);
        }
+       WARN_ON(nr != cluster->nr);
 out_unlock:
        mutex_unlock(&inode->i_mutex);
        kfree(ra);
-       balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
        return ret;
 }
 
 static noinline_for_stack
-int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key)
+int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
+                        struct file_extent_cluster *cluster)
 {
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
-       struct extent_map *em;
-       u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt;
-       u64 end = start + extent_key->offset - 1;
-
-       em = alloc_extent_map(GFP_NOFS);
-       em->start = start;
-       em->len = extent_key->offset;
-       em->block_len = extent_key->offset;
-       em->block_start = extent_key->objectid;
-       em->bdev = root->fs_info->fs_devices->latest_bdev;
-       set_bit(EXTENT_FLAG_PINNED, &em->flags);
+       int ret;
 
-       /* setup extent map to cheat btrfs_readpage */
-       lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
-       while (1) {
-               int ret;
-               spin_lock(&em_tree->lock);
-               ret = add_extent_mapping(em_tree, em);
-               spin_unlock(&em_tree->lock);
-               if (ret != -EEXIST) {
-                       free_extent_map(em);
-                       break;
-               }
-               btrfs_drop_extent_cache(inode, start, end, 0);
+       if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
+               ret = relocate_file_extent_cluster(inode, cluster);
+               if (ret)
+                       return ret;
+               cluster->nr = 0;
        }
-       unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
 
-       return relocate_inode_pages(inode, start, extent_key->offset);
+       if (!cluster->nr)
+               cluster->start = extent_key->objectid;
+       else
+               BUG_ON(cluster->nr >= MAX_EXTENTS);
+       cluster->end = extent_key->objectid + extent_key->offset - 1;
+       cluster->boundary[cluster->nr] = extent_key->objectid;
+       cluster->nr++;
+
+       if (cluster->nr >= MAX_EXTENTS) {
+               ret = relocate_file_extent_cluster(inode, cluster);
+               if (ret)
+                       return ret;
+               cluster->nr = 0;
+       }
+       return 0;
 }
 
 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
@@ -3203,10 +3254,12 @@ static int check_extent_flags(u64 flags)
        return 0;
 }
 
+
 static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
 {
        struct rb_root blocks = RB_ROOT;
        struct btrfs_key key;
+       struct file_extent_cluster *cluster;
        struct btrfs_trans_handle *trans = NULL;
        struct btrfs_path *path;
        struct btrfs_extent_item *ei;
@@ -3216,10 +3269,17 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
        int ret;
        int err = 0;
 
+       cluster = kzalloc(sizeof(*cluster), GFP_NOFS);
+       if (!cluster)
+               return -ENOMEM;
+
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
 
+       rc->extents_found = 0;
+       rc->extents_skipped = 0;
+
        rc->search_start = rc->block_group->key.objectid;
        clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
                          GFP_NOFS);
@@ -3306,14 +3366,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
                }
 
                nr = trans->blocks_used;
-               btrfs_end_transaction_throttle(trans, rc->extent_root);
+               btrfs_end_transaction(trans, rc->extent_root);
                trans = NULL;
                btrfs_btree_balance_dirty(rc->extent_root, nr);
 
                if (rc->stage == MOVE_DATA_EXTENTS &&
                    (flags & BTRFS_EXTENT_FLAG_DATA)) {
                        rc->found_file_extent = 1;
-                       ret = relocate_data_extent(rc->data_inode, &key);
+                       ret = relocate_data_extent(rc->data_inode,
+                                                  &key, cluster);
                        if (ret < 0) {
                                err = ret;
                                break;
@@ -3328,6 +3389,14 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
                btrfs_btree_balance_dirty(rc->extent_root, nr);
        }
 
+       if (!err) {
+               ret = relocate_file_extent_cluster(rc->data_inode, cluster);
+               if (ret < 0)
+                       err = ret;
+       }
+
+       kfree(cluster);
+
        rc->create_reloc_root = 0;
        smp_mb();
 
@@ -3348,8 +3417,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
 }
 
 static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
-                                struct btrfs_root *root,
-                                u64 objectid, u64 size)
+                                struct btrfs_root *root, u64 objectid)
 {
        struct btrfs_path *path;
        struct btrfs_inode_item *item;
@@ -3368,7 +3436,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
        item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
        memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
        btrfs_set_inode_generation(leaf, item, 1);
-       btrfs_set_inode_size(leaf, item, size);
+       btrfs_set_inode_size(leaf, item, 0);
        btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
        btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
        btrfs_mark_buffer_dirty(leaf);
@@ -3404,12 +3472,7 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
        if (err)
                goto out;
 
-       err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
-       BUG_ON(err);
-
-       err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
-                                      group->key.offset, 0, group->key.offset,
-                                      0, 0, 0);
+       err = __insert_orphan_inode(trans, root, objectid);
        BUG_ON(err);
 
        key.objectid = objectid;
@@ -3475,14 +3538,15 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
        btrfs_wait_ordered_extents(fs_info->tree_root, 0);
 
        while (1) {
-               mutex_lock(&fs_info->cleaner_mutex);
-               btrfs_clean_old_snapshots(fs_info->tree_root);
-               mutex_unlock(&fs_info->cleaner_mutex);
-
                rc->extents_found = 0;
                rc->extents_skipped = 0;
 
+               mutex_lock(&fs_info->cleaner_mutex);
+
+               btrfs_clean_old_snapshots(fs_info->tree_root);
                ret = relocate_block_group(rc);
+
+               mutex_unlock(&fs_info->cleaner_mutex);
                if (ret < 0) {
                        err = ret;
                        break;
@@ -3514,10 +3578,10 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
                }
        }
 
-       filemap_fdatawrite_range(fs_info->btree_inode->i_mapping,
-                                rc->block_group->key.objectid,
-                                rc->block_group->key.objectid +
-                                rc->block_group->key.offset - 1);
+       filemap_write_and_wait_range(fs_info->btree_inode->i_mapping,
+                                    rc->block_group->key.objectid,
+                                    rc->block_group->key.objectid +
+                                    rc->block_group->key.offset - 1);
 
        WARN_ON(rc->block_group->pinned > 0);
        WARN_ON(rc->block_group->reserved > 0);
@@ -3530,6 +3594,26 @@ out:
        return err;
 }
 
+static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
+{
+       struct btrfs_trans_handle *trans;
+       int ret;
+
+       trans = btrfs_start_transaction(root->fs_info->tree_root, 1);
+
+       memset(&root->root_item.drop_progress, 0,
+               sizeof(root->root_item.drop_progress));
+       root->root_item.drop_level = 0;
+       btrfs_set_root_refs(&root->root_item, 0);
+       ret = btrfs_update_root(trans, root->fs_info->tree_root,
+                               &root->root_key, &root->root_item);
+       BUG_ON(ret);
+
+       ret = btrfs_end_transaction(trans, root->fs_info->tree_root);
+       BUG_ON(ret);
+       return 0;
+}
+
 /*
  * recover relocation interrupted by system crash.
  *
@@ -3589,8 +3673,12 @@ int btrfs_recover_relocation(struct btrfs_root *root)
                        fs_root = read_fs_root(root->fs_info,
                                               reloc_root->root_key.offset);
                        if (IS_ERR(fs_root)) {
-                               err = PTR_ERR(fs_root);
-                               goto out;
+                               ret = PTR_ERR(fs_root);
+                               if (ret != -ENOENT) {
+                                       err = ret;
+                                       goto out;
+                               }
+                               mark_garbage_root(reloc_root);
                        }
                }
 
index 0ddc6d6..9351428 100644 (file)
@@ -94,17 +94,23 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
                goto out;
 
        BUG_ON(ret == 0);
+       if (path->slots[0] == 0) {
+               ret = 1;
+               goto out;
+       }
        l = path->nodes[0];
-       BUG_ON(path->slots[0] == 0);
        slot = path->slots[0] - 1;
        btrfs_item_key_to_cpu(l, &found_key, slot);
-       if (found_key.objectid != objectid) {
+       if (found_key.objectid != objectid ||
+           found_key.type != BTRFS_ROOT_ITEM_KEY) {
                ret = 1;
                goto out;
        }
-       read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot),
-                          sizeof(*item));
-       memcpy(key, &found_key, sizeof(found_key));
+       if (item)
+               read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot),
+                                  sizeof(*item));
+       if (key)
+               memcpy(key, &found_key, sizeof(found_key));
        ret = 0;
 out:
        btrfs_free_path(path);
@@ -249,6 +255,59 @@ err:
        return ret;
 }
 
+int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
+{
+       struct extent_buffer *leaf;
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       int err = 0;
+       int ret;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       key.objectid = BTRFS_ORPHAN_OBJECTID;
+       key.type = BTRFS_ORPHAN_ITEM_KEY;
+       key.offset = 0;
+
+       while (1) {
+               ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
+               if (ret < 0) {
+                       err = ret;
+                       break;
+               }
+
+               leaf = path->nodes[0];
+               if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+                       ret = btrfs_next_leaf(tree_root, path);
+                       if (ret < 0)
+                               err = ret;
+                       if (ret != 0)
+                               break;
+                       leaf = path->nodes[0];
+               }
+
+               btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+               btrfs_release_path(tree_root, path);
+
+               if (key.objectid != BTRFS_ORPHAN_OBJECTID ||
+                   key.type != BTRFS_ORPHAN_ITEM_KEY)
+                       break;
+
+               ret = btrfs_find_dead_roots(tree_root, key.offset);
+               if (ret) {
+                       err = ret;
+                       break;
+               }
+
+               key.offset++;
+       }
+
+       btrfs_free_path(path);
+       return err;
+}
+
 /* drop the root item for 'key' from 'root' */
 int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                   struct btrfs_key *key)
@@ -278,31 +337,57 @@ out:
        return ret;
 }
 
-#if 0 /* this will get used when snapshot deletion is implemented */
 int btrfs_del_root_ref(struct btrfs_trans_handle *trans,
                       struct btrfs_root *tree_root,
-                      u64 root_id, u8 type, u64 ref_id)
+                      u64 root_id, u64 ref_id, u64 dirid, u64 *sequence,
+                      const char *name, int name_len)
+
 {
+       struct btrfs_path *path;
+       struct btrfs_root_ref *ref;
+       struct extent_buffer *leaf;
        struct btrfs_key key;
+       unsigned long ptr;
+       int err = 0;
        int ret;
-       struct btrfs_path *path;
 
        path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
 
        key.objectid = root_id;
-       key.type = type;
+       key.type = BTRFS_ROOT_BACKREF_KEY;
        key.offset = ref_id;
-
+again:
        ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
-       BUG_ON(ret);
-
-       ret = btrfs_del_item(trans, tree_root, path);
-       BUG_ON(ret);
+       BUG_ON(ret < 0);
+       if (ret == 0) {
+               leaf = path->nodes[0];
+               ref = btrfs_item_ptr(leaf, path->slots[0],
+                                    struct btrfs_root_ref);
+
+               WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid);
+               WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len);
+               ptr = (unsigned long)(ref + 1);
+               WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len));
+               *sequence = btrfs_root_ref_sequence(leaf, ref);
+
+               ret = btrfs_del_item(trans, tree_root, path);
+               BUG_ON(ret);
+       } else
+               err = -ENOENT;
+
+       if (key.type == BTRFS_ROOT_BACKREF_KEY) {
+               btrfs_release_path(tree_root, path);
+               key.objectid = ref_id;
+               key.type = BTRFS_ROOT_REF_KEY;
+               key.offset = root_id;
+               goto again;
+       }
 
        btrfs_free_path(path);
-       return ret;
+       return err;
 }
-#endif
 
 int btrfs_find_root_ref(struct btrfs_root *tree_root,
                   struct btrfs_path *path,
@@ -319,7 +404,6 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root,
        return ret;
 }
 
-
 /*
  * add a btrfs_root_ref item.  type is either BTRFS_ROOT_REF_KEY
  * or BTRFS_ROOT_BACKREF_KEY.
@@ -335,8 +419,7 @@ int btrfs_find_root_ref(struct btrfs_root *tree_root,
  */
 int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
                       struct btrfs_root *tree_root,
-                      u64 root_id, u8 type, u64 ref_id,
-                      u64 dirid, u64 sequence,
+                      u64 root_id, u64 ref_id, u64 dirid, u64 sequence,
                       const char *name, int name_len)
 {
        struct btrfs_key key;
@@ -346,13 +429,14 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
        struct extent_buffer *leaf;
        unsigned long ptr;
 
-
        path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
 
        key.objectid = root_id;
-       key.type = type;
+       key.type = BTRFS_ROOT_BACKREF_KEY;
        key.offset = ref_id;
-
+again:
        ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
                                      sizeof(*ref) + name_len);
        BUG_ON(ret);
@@ -366,6 +450,14 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans,
        write_extent_buffer(leaf, name, ptr, name_len);
        btrfs_mark_buffer_dirty(leaf);
 
+       if (key.type == BTRFS_ROOT_BACKREF_KEY) {
+               btrfs_release_path(tree_root, path);
+               key.objectid = ref_id;
+               key.type = BTRFS_ROOT_REF_KEY;
+               key.offset = root_id;
+               goto again;
+       }
+
        btrfs_free_path(path);
-       return ret;
+       return 0;
 }
index 2db17cd..6703538 100644 (file)
@@ -676,6 +676,7 @@ static int btrfs_unfreeze(struct super_block *sb)
 }
 
 static const struct super_operations btrfs_super_ops = {
+       .drop_inode     = btrfs_drop_inode,
        .delete_inode   = btrfs_delete_inode,
        .put_super      = btrfs_put_super,
        .sync_fs        = btrfs_sync_fs,
index cdbb502..88f866f 100644 (file)
@@ -104,7 +104,6 @@ static noinline int record_root_in_trans(struct btrfs_trans_handle *trans,
 {
        if (root->ref_cows && root->last_trans < trans->transid) {
                WARN_ON(root == root->fs_info->extent_root);
-               WARN_ON(root->root_item.refs == 0);
                WARN_ON(root->commit_root != root->node);
 
                radix_tree_tag_set(&root->fs_info->fs_roots_radix,
@@ -720,7 +719,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
 
        key.objectid = objectid;
-       key.offset = 0;
+       /* record when the snapshot was created in key.offset */
+       key.offset = trans->transid;
        btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
 
        old = btrfs_lock_root_node(root);
@@ -778,24 +778,14 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
        ret = btrfs_update_inode(trans, parent_root, parent_inode);
        BUG_ON(ret);
 
-       /* add the backref first */
        ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root,
                                 pending->root_key.objectid,
-                                BTRFS_ROOT_BACKREF_KEY,
                                 parent_root->root_key.objectid,
                                 parent_inode->i_ino, index, pending->name,
                                 namelen);
 
        BUG_ON(ret);
 
-       /* now add the forward ref */
-       ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root,
-                                parent_root->root_key.objectid,
-                                BTRFS_ROOT_REF_KEY,
-                                pending->root_key.objectid,
-                                parent_inode->i_ino, index, pending->name,
-                                namelen);
-
        inode = btrfs_lookup_dentry(parent_inode, pending->dentry);
        d_instantiate(pending->dentry, inode);
 fail:
@@ -874,7 +864,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        unsigned long timeout = 1;
        struct btrfs_transaction *cur_trans;
        struct btrfs_transaction *prev_trans = NULL;
-       struct extent_io_tree *pinned_copy;
        DEFINE_WAIT(wait);
        int ret;
        int should_grow = 0;
@@ -915,13 +904,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                return 0;
        }
 
-       pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS);
-       if (!pinned_copy)
-               return -ENOMEM;
-
-       extent_io_tree_init(pinned_copy,
-                            root->fs_info->btree_inode->i_mapping, GFP_NOFS);
-
        trans->transaction->in_commit = 1;
        trans->transaction->blocked = 1;
        if (cur_trans->list.prev != &root->fs_info->trans_list) {
@@ -1019,6 +1001,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        ret = commit_cowonly_roots(trans, root);
        BUG_ON(ret);
 
+       btrfs_prepare_extent_commit(trans, root);
+
        cur_trans = root->fs_info->running_transaction;
        spin_lock(&root->fs_info->new_trans_lock);
        root->fs_info->running_transaction = NULL;
@@ -1042,8 +1026,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy,
               sizeof(root->fs_info->super_copy));
 
-       btrfs_copy_pinned(root, pinned_copy);
-
        trans->transaction->blocked = 0;
 
        wake_up(&root->fs_info->transaction_wait);
@@ -1059,8 +1041,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
         */
        mutex_unlock(&root->fs_info->tree_log_mutex);
 
-       btrfs_finish_extent_commit(trans, root, pinned_copy);
-       kfree(pinned_copy);
+       btrfs_finish_extent_commit(trans, root);
 
        /* do the directory inserts of any pending snapshot creations */
        finish_pending_snapshots(trans, root->fs_info);
@@ -1096,8 +1077,13 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
 
        while (!list_empty(&list)) {
                root = list_entry(list.next, struct btrfs_root, root_list);
-               list_del_init(&root->root_list);
-               btrfs_drop_snapshot(root, 0);
+               list_del(&root->root_list);
+
+               if (btrfs_header_backref_rev(root->node) <
+                   BTRFS_MIXED_BACKREF_REV)
+                       btrfs_drop_snapshot(root, 0);
+               else
+                       btrfs_drop_snapshot(root, 1);
        }
        return 0;
 }
index 30c0d45..7827841 100644 (file)
@@ -263,8 +263,8 @@ static int process_one_buffer(struct btrfs_root *log,
                              struct walk_control *wc, u64 gen)
 {
        if (wc->pin)
-               btrfs_update_pinned_extents(log->fs_info->extent_root,
-                                           eb->start, eb->len, 1);
+               btrfs_pin_extent(log->fs_info->extent_root,
+                                eb->start, eb->len, 0);
 
        if (btrfs_buffer_uptodate(eb, gen)) {
                if (wc->write)
@@ -534,7 +534,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
        saved_nbytes = inode_get_bytes(inode);
        /* drop any overlapping extents */
        ret = btrfs_drop_extents(trans, root, inode,
-                        start, extent_end, extent_end, start, &alloc_hint);
+                        start, extent_end, extent_end, start, &alloc_hint, 1);
        BUG_ON(ret);
 
        if (found_type == BTRFS_FILE_EXTENT_REG ||
@@ -2841,7 +2841,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
                if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
                        break;
 
-               if (parent == sb->s_root)
+               if (IS_ROOT(parent))
                        break;
 
                parent = parent->d_parent;
@@ -2880,6 +2880,12 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
                goto end_no_trans;
        }
 
+       if (root != BTRFS_I(inode)->root ||
+           btrfs_root_refs(&root->root_item) == 0) {
+               ret = 1;
+               goto end_no_trans;
+       }
+
        ret = check_parent_dirs_for_sync(trans, inode, parent,
                                         sb, last_committed);
        if (ret)
@@ -2907,12 +2913,15 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
                        break;
 
                inode = parent->d_inode;
+               if (root != BTRFS_I(inode)->root)
+                       break;
+
                if (BTRFS_I(inode)->generation >
                    root->fs_info->last_trans_committed) {
                        ret = btrfs_log_inode(trans, root, inode, inode_only);
                        BUG_ON(ret);
                }
-               if (parent == sb->s_root)
+               if (IS_ROOT(parent))
                        break;
 
                parent = parent->d_parent;
@@ -2951,7 +2960,6 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
        struct btrfs_key tmp_key;
        struct btrfs_root *log;
        struct btrfs_fs_info *fs_info = log_root_tree->fs_info;
-       u64 highest_inode;
        struct walk_control wc = {
                .process_func = process_one_buffer,
                .stage = 0,
@@ -3010,11 +3018,6 @@ again:
                                                      path);
                        BUG_ON(ret);
                }
-               ret = btrfs_find_highest_inode(wc.replay_dest, &highest_inode);
-               if (ret == 0) {
-                       wc.replay_dest->highest_inode = highest_inode;
-                       wc.replay_dest->last_inode_alloc = highest_inode;
-               }
 
                key.offset = found_key.offset - 1;
                wc.replay_dest->log_root = NULL;
index 5cf405b..23e7d36 100644 (file)
@@ -276,7 +276,7 @@ loop_lock:
                 * is now congested.  Back off and let other work structs
                 * run instead
                 */
-               if (pending && bdi_write_congested(bdi) && batch_run > 32 &&
+               if (pending && bdi_write_congested(bdi) && batch_run > 8 &&
                    fs_info->fs_devices->open_devices > 1) {
                        struct io_context *ioc;
 
@@ -719,10 +719,9 @@ error:
  * called very infrequently and that a given device has a small number
  * of extents
  */
-static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
-                                        struct btrfs_device *device,
-                                        u64 num_bytes, u64 *start,
-                                        u64 *max_avail)
+int find_free_dev_extent(struct btrfs_trans_handle *trans,
+                        struct btrfs_device *device, u64 num_bytes,
+                        u64 *start, u64 *max_avail)
 {
        struct btrfs_key key;
        struct btrfs_root *root = device->dev_root;
@@ -1736,6 +1735,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
        extent_root = root->fs_info->extent_root;
        em_tree = &root->fs_info->mapping_tree.map_tree;
 
+       ret = btrfs_can_relocate(extent_root, chunk_offset);
+       if (ret)
+               return -ENOSPC;
+
        /* step one, relocate all the extents inside this chunk */
        ret = btrfs_relocate_block_group(extent_root, chunk_offset);
        BUG_ON(ret);
@@ -1749,9 +1752,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
         * step two, delete the device extents and the
         * chunk tree entries
         */
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, chunk_offset, 1);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
 
        BUG_ON(em->start > chunk_offset ||
               em->start + em->len < chunk_offset);
@@ -1780,9 +1783,9 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
        ret = btrfs_remove_block_group(trans, extent_root, chunk_offset);
        BUG_ON(ret);
 
-       spin_lock(&em_tree->lock);
+       write_lock(&em_tree->lock);
        remove_extent_mapping(em_tree, em);
-       spin_unlock(&em_tree->lock);
+       write_unlock(&em_tree->lock);
 
        kfree(map);
        em->bdev = NULL;
@@ -1807,12 +1810,15 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
        struct btrfs_key found_key;
        u64 chunk_tree = chunk_root->root_key.objectid;
        u64 chunk_type;
+       bool retried = false;
+       int failed = 0;
        int ret;
 
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
 
+again:
        key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
        key.offset = (u64)-1;
        key.type = BTRFS_CHUNK_ITEM_KEY;
@@ -1842,7 +1848,10 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
                        ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
                                                   found_key.objectid,
                                                   found_key.offset);
-                       BUG_ON(ret);
+                       if (ret == -ENOSPC)
+                               failed++;
+                       else if (ret)
+                               BUG();
                }
 
                if (found_key.offset == 0)
@@ -1850,6 +1859,14 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
                key.offset = found_key.offset - 1;
        }
        ret = 0;
+       if (failed && !retried) {
+               failed = 0;
+               retried = true;
+               goto again;
+       } else if (failed && retried) {
+               WARN_ON(1);
+               ret = -ENOSPC;
+       }
 error:
        btrfs_free_path(path);
        return ret;
@@ -1894,6 +1911,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
                        continue;
 
                ret = btrfs_shrink_device(device, old_size - size_to_free);
+               if (ret == -ENOSPC)
+                       break;
                BUG_ON(ret);
 
                trans = btrfs_start_transaction(dev_root, 1);
@@ -1938,9 +1957,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
                chunk = btrfs_item_ptr(path->nodes[0],
                                       path->slots[0],
                                       struct btrfs_chunk);
-               key.offset = found_key.offset;
                /* chunk zero is special */
-               if (key.offset == 0)
+               if (found_key.offset == 0)
                        break;
 
                btrfs_release_path(chunk_root, path);
@@ -1948,7 +1966,8 @@ int btrfs_balance(struct btrfs_root *dev_root)
                                           chunk_root->root_key.objectid,
                                           found_key.objectid,
                                           found_key.offset);
-               BUG_ON(ret);
+               BUG_ON(ret && ret != -ENOSPC);
+               key.offset = found_key.offset - 1;
        }
        ret = 0;
 error:
@@ -1974,10 +1993,13 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
        u64 chunk_offset;
        int ret;
        int slot;
+       int failed = 0;
+       bool retried = false;
        struct extent_buffer *l;
        struct btrfs_key key;
        struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
        u64 old_total = btrfs_super_total_bytes(super_copy);
+       u64 old_size = device->total_bytes;
        u64 diff = device->total_bytes - new_size;
 
        if (new_size >= device->total_bytes)
@@ -1987,12 +2009,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
        if (!path)
                return -ENOMEM;
 
-       trans = btrfs_start_transaction(root, 1);
-       if (!trans) {
-               ret = -ENOMEM;
-               goto done;
-       }
-
        path->reada = 2;
 
        lock_chunks(root);
@@ -2001,8 +2017,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
        if (device->writeable)
                device->fs_devices->total_rw_bytes -= diff;
        unlock_chunks(root);
-       btrfs_end_transaction(trans, root);
 
+again:
        key.objectid = device->devid;
        key.offset = (u64)-1;
        key.type = BTRFS_DEV_EXTENT_KEY;
@@ -2017,6 +2033,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
                        goto done;
                if (ret) {
                        ret = 0;
+                       btrfs_release_path(root, path);
                        break;
                }
 
@@ -2024,14 +2041,18 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
                slot = path->slots[0];
                btrfs_item_key_to_cpu(l, &key, path->slots[0]);
 
-               if (key.objectid != device->devid)
+               if (key.objectid != device->devid) {
+                       btrfs_release_path(root, path);
                        break;
+               }
 
                dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
                length = btrfs_dev_extent_length(l, dev_extent);
 
-               if (key.offset + length <= new_size)
+               if (key.offset + length <= new_size) {
+                       btrfs_release_path(root, path);
                        break;
+               }
 
                chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
                chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
@@ -2040,8 +2061,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
 
                ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
                                           chunk_offset);
-               if (ret)
+               if (ret && ret != -ENOSPC)
                        goto done;
+               if (ret == -ENOSPC)
+                       failed++;
+               key.offset -= 1;
+       }
+
+       if (failed && !retried) {
+               failed = 0;
+               retried = true;
+               goto again;
+       } else if (failed && retried) {
+               ret = -ENOSPC;
+               lock_chunks(root);
+
+               device->total_bytes = old_size;
+               if (device->writeable)
+                       device->fs_devices->total_rw_bytes += diff;
+               unlock_chunks(root);
+               goto done;
        }
 
        /* Shrinking succeeded, else we would be at "done". */
@@ -2294,9 +2333,9 @@ again:
        em->block_len = em->len;
 
        em_tree = &extent_root->fs_info->mapping_tree.map_tree;
-       spin_lock(&em_tree->lock);
+       write_lock(&em_tree->lock);
        ret = add_extent_mapping(em_tree, em);
-       spin_unlock(&em_tree->lock);
+       write_unlock(&em_tree->lock);
        BUG_ON(ret);
        free_extent_map(em);
 
@@ -2491,9 +2530,9 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
        int readonly = 0;
        int i;
 
-       spin_lock(&map_tree->map_tree.lock);
+       read_lock(&map_tree->map_tree.lock);
        em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
-       spin_unlock(&map_tree->map_tree.lock);
+       read_unlock(&map_tree->map_tree.lock);
        if (!em)
                return 1;
 
@@ -2518,11 +2557,11 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
        struct extent_map *em;
 
        while (1) {
-               spin_lock(&tree->map_tree.lock);
+               write_lock(&tree->map_tree.lock);
                em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
                if (em)
                        remove_extent_mapping(&tree->map_tree, em);
-               spin_unlock(&tree->map_tree.lock);
+               write_unlock(&tree->map_tree.lock);
                if (!em)
                        break;
                kfree(em->bdev);
@@ -2540,9 +2579,9 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
        struct extent_map_tree *em_tree = &map_tree->map_tree;
        int ret;
 
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, logical, len);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
        BUG_ON(!em);
 
        BUG_ON(em->start > logical || em->start + em->len < logical);
@@ -2604,9 +2643,9 @@ again:
                atomic_set(&multi->error, 0);
        }
 
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, logical, *length);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
 
        if (!em && unplug_page)
                return 0;
@@ -2763,9 +2802,9 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
        u64 stripe_nr;
        int i, j, nr = 0;
 
-       spin_lock(&em_tree->lock);
+       read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, chunk_start, 1);
-       spin_unlock(&em_tree->lock);
+       read_unlock(&em_tree->lock);
 
        BUG_ON(!em || em->start != chunk_start);
        map = (struct map_lookup *)em->bdev;
@@ -3053,9 +3092,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
        logical = key->offset;
        length = btrfs_chunk_length(leaf, chunk);
 
-       spin_lock(&map_tree->map_tree.lock);
+       read_lock(&map_tree->map_tree.lock);
        em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
-       spin_unlock(&map_tree->map_tree.lock);
+       read_unlock(&map_tree->map_tree.lock);
 
        /* already mapped? */
        if (em && em->start <= logical && em->start + em->len > logical) {
@@ -3114,9 +3153,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
                map->stripes[i].dev->in_fs_metadata = 1;
        }
 
-       spin_lock(&map_tree->map_tree.lock);
+       write_lock(&map_tree->map_tree.lock);
        ret = add_extent_mapping(&map_tree->map_tree, em);
-       spin_unlock(&map_tree->map_tree.lock);
+       write_unlock(&map_tree->map_tree.lock);
        BUG_ON(ret);
        free_extent_map(em);
 
index 5139a83..31b0fab 100644 (file)
@@ -181,4 +181,7 @@ int btrfs_balance(struct btrfs_root *dev_root);
 void btrfs_unlock_volumes(void);
 void btrfs_lock_volumes(void);
 int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
+int find_free_dev_extent(struct btrfs_trans_handle *trans,
+                        struct btrfs_device *device, u64 num_bytes,
+                        u64 *start, u64 *max_avail);
 #endif
index 209f7f1..24afd74 100644 (file)
@@ -2239,16 +2239,10 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size)
        struct address_space *mapping = inode->i_mapping;
        struct page *page;
        void *fsdata;
-       unsigned long limit;
        int err;
 
-       err = -EFBIG;
-        limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-       if (limit != RLIM_INFINITY && size > (loff_t)limit) {
-               send_sig(SIGXFSZ, current, 0);
-               goto out;
-       }
-       if (size > inode->i_sb->s_maxbytes)
+       err = inode_newsize_ok(inode, size);
+       if (err)
                goto out;
 
        err = pagecache_write_begin(NULL, mapping, size, 0,
index 3cbc57f..d6db933 100644 (file)
@@ -264,7 +264,6 @@ int __register_chrdev(unsigned int major, unsigned int baseminor,
 {
        struct char_device_struct *cd;
        struct cdev *cdev;
-       char *s;
        int err = -ENOMEM;
 
        cd = __register_chrdev_region(major, baseminor, count, name);
@@ -278,8 +277,6 @@ int __register_chrdev(unsigned int major, unsigned int baseminor,
        cdev->owner = fops->owner;
        cdev->ops = fops;
        kobject_set_name(&cdev->kobj, "%s", name);
-       for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/'))
-               *s = '!';
                
        err = cdev_add(cdev, MKDEV(cd->major, baseminor), count);
        if (err)
index d79ce2e..90c5b39 100644 (file)
@@ -185,8 +185,7 @@ out_mount_failed:
                        cifs_sb->mountdata = NULL;
                }
 #endif
-               if (cifs_sb->local_nls)
-                       unload_nls(cifs_sb->local_nls);
+               unload_nls(cifs_sb->local_nls);
                kfree(cifs_sb);
        }
        return rc;
index 1f09c76..5e24925 100644 (file)
@@ -1557,57 +1557,24 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from)
 
 static int cifs_vmtruncate(struct inode *inode, loff_t offset)
 {
-       struct address_space *mapping = inode->i_mapping;
-       unsigned long limit;
+       loff_t oldsize;
+       int err;
 
        spin_lock(&inode->i_lock);
-       if (inode->i_size < offset)
-               goto do_expand;
-       /*
-        * truncation of in-use swapfiles is disallowed - it would cause
-        * subsequent swapout to scribble on the now-freed blocks.
-        */
-       if (IS_SWAPFILE(inode)) {
-               spin_unlock(&inode->i_lock);
-               goto out_busy;
-       }
-       i_size_write(inode, offset);
-       spin_unlock(&inode->i_lock);
-       /*
-        * unmap_mapping_range is called twice, first simply for efficiency
-        * so that truncate_inode_pages does fewer single-page unmaps. However
-        * after this first call, and before truncate_inode_pages finishes,
-        * it is possible for private pages to be COWed, which remain after
-        * truncate_inode_pages finishes, hence the second unmap_mapping_range
-        * call must be made for correctness.
-        */
-       unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-       truncate_inode_pages(mapping, offset);
-       unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-       goto out_truncate;
-
-do_expand:
-       limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-       if (limit != RLIM_INFINITY && offset > limit) {
+       err = inode_newsize_ok(inode, offset);
+       if (err) {
                spin_unlock(&inode->i_lock);
-               goto out_sig;
-       }
-       if (offset > inode->i_sb->s_maxbytes) {
-               spin_unlock(&inode->i_lock);
-               goto out_big;
+               goto out;
        }
+
+       oldsize = inode->i_size;
        i_size_write(inode, offset);
        spin_unlock(&inode->i_lock);
-out_truncate:
+       truncate_pagecache(inode, oldsize, offset);
        if (inode->i_op->truncate)
                inode->i_op->truncate(inode);
-       return 0;
-out_sig:
-       send_sig(SIGXFSZ, current, 0);
-out_big:
-       return -EFBIG;
-out_busy:
-       return -ETXTBSY;
+out:
+       return err;
 }
 
 static int
index 8ccd5ed..d99860a 100644 (file)
@@ -2,6 +2,7 @@
 #define _CODA_INT_
 
 struct dentry;
+struct file;
 
 extern struct file_system_type coda_fs_type;
 extern unsigned long coda_timeout;
index 3aa4883..d576b55 100644 (file)
@@ -768,13 +768,13 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
                                 char __user * type, unsigned long flags,
                                 void __user * data)
 {
-       unsigned long type_page;
+       char *kernel_type;
        unsigned long data_page;
-       unsigned long dev_page;
+       char *kernel_dev;
        char *dir_page;
        int retval;
 
-       retval = copy_mount_options (type, &type_page);
+       retval = copy_mount_string(type, &kernel_type);
        if (retval < 0)
                goto out;
 
@@ -783,38 +783,38 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
        if (IS_ERR(dir_page))
                goto out1;
 
-       retval = copy_mount_options (dev_name, &dev_page);
+       retval = copy_mount_string(dev_name, &kernel_dev);
        if (retval < 0)
                goto out2;
 
-       retval = copy_mount_options (data, &data_page);
+       retval = copy_mount_options(data, &data_page);
        if (retval < 0)
                goto out3;
 
        retval = -EINVAL;
 
-       if (type_page && data_page) {
-               if (!strcmp((char *)type_page, SMBFS_NAME)) {
+       if (kernel_type && data_page) {
+               if (!strcmp(kernel_type, SMBFS_NAME)) {
                        do_smb_super_data_conv((void *)data_page);
-               } else if (!strcmp((char *)type_page, NCPFS_NAME)) {
+               } else if (!strcmp(kernel_type, NCPFS_NAME)) {
                        do_ncp_super_data_conv((void *)data_page);
-               } else if (!strcmp((char *)type_page, NFS4_NAME)) {
+               } else if (!strcmp(kernel_type, NFS4_NAME)) {
                        if (do_nfs4_super_data_conv((void *) data_page))
                                goto out4;
                }
        }
 
-       retval = do_mount((char*)dev_page, dir_page, (char*)type_page,
+       retval = do_mount(kernel_dev, dir_page, kernel_type,
                        flags, (void*)data_page);
 
  out4:
        free_page(data_page);
  out3:
-       free_page(dev_page);
+       kfree(kernel_dev);
  out2:
        putname(dir_page);
  out1:
-       free_page(type_page);
+       kfree(kernel_type);
  out:
        return retval;
 }
index a2edb79..31f4b0e 100644 (file)
@@ -63,9 +63,9 @@ static void drop_slab(void)
 }
 
 int drop_caches_sysctl_handler(ctl_table *table, int write,
-       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+       void __user *buffer, size_t *length, loff_t *ppos)
 {
-       proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+       proc_dointvec_minmax(table, write, buffer, length, ppos);
        if (write) {
                if (sysctl_drop_caches & 1)
                        drop_pagecache();
index 0c754e6..8aadb99 100644 (file)
@@ -1,6 +1,8 @@
 config ECRYPT_FS
        tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
-       depends on EXPERIMENTAL && KEYS && CRYPTO && NET
+       depends on EXPERIMENTAL && KEYS && NET
+       select CRYPTO_ECB
+       select CRYPTO_CBC
        help
          Encrypted filesystem that operates on the VFS layer.  See
          <file:Documentation/filesystems/ecryptfs.txt> to learn more about
index b91851f..fbb6e5e 100644 (file)
@@ -245,13 +245,11 @@ void ecryptfs_destroy_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
                crypto_free_blkcipher(crypt_stat->tfm);
        if (crypt_stat->hash_tfm)
                crypto_free_hash(crypt_stat->hash_tfm);
-       mutex_lock(&crypt_stat->keysig_list_mutex);
        list_for_each_entry_safe(key_sig, key_sig_tmp,
                                 &crypt_stat->keysig_list, crypt_stat_list) {
                list_del(&key_sig->crypt_stat_list);
                kmem_cache_free(ecryptfs_key_sig_cache, key_sig);
        }
-       mutex_unlock(&crypt_stat->keysig_list_mutex);
        memset(crypt_stat, 0, sizeof(struct ecryptfs_crypt_stat));
 }
 
@@ -511,13 +509,14 @@ int ecryptfs_encrypt_page(struct page *page)
                                  + extent_offset), crypt_stat);
                rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt,
                                          offset, crypt_stat->extent_size);
-               if (rc) {
+               if (rc < 0) {
                        ecryptfs_printk(KERN_ERR, "Error attempting "
                                        "to write lower page; rc = [%d]"
                                        "\n", rc);
                        goto out;
                }
        }
+       rc = 0;
 out:
        if (enc_extent_page) {
                kunmap(enc_extent_page);
@@ -633,7 +632,7 @@ int ecryptfs_decrypt_page(struct page *page)
                rc = ecryptfs_read_lower(enc_extent_virt, offset,
                                         crypt_stat->extent_size,
                                         ecryptfs_inode);
-               if (rc) {
+               if (rc < 0) {
                        ecryptfs_printk(KERN_ERR, "Error attempting "
                                        "to read lower page; rc = [%d]"
                                        "\n", rc);
@@ -797,6 +796,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
        kfree(full_alg_name);
        if (IS_ERR(crypt_stat->tfm)) {
                rc = PTR_ERR(crypt_stat->tfm);
+               crypt_stat->tfm = NULL;
                ecryptfs_printk(KERN_ERR, "cryptfs: init_crypt_ctx(): "
                                "Error initializing cipher [%s]\n",
                                crypt_stat->cipher);
@@ -925,7 +925,9 @@ static int ecryptfs_copy_mount_wide_sigs_to_inode_sigs(
        struct ecryptfs_global_auth_tok *global_auth_tok;
        int rc = 0;
 
+       mutex_lock(&crypt_stat->keysig_list_mutex);
        mutex_lock(&mount_crypt_stat->global_auth_tok_list_mutex);
+
        list_for_each_entry(global_auth_tok,
                            &mount_crypt_stat->global_auth_tok_list,
                            mount_crypt_stat_list) {
@@ -934,13 +936,13 @@ static int ecryptfs_copy_mount_wide_sigs_to_inode_sigs(
                rc = ecryptfs_add_keysig(crypt_stat, global_auth_tok->sig);
                if (rc) {
                        printk(KERN_ERR "Error adding keysig; rc = [%d]\n", rc);
-                       mutex_unlock(
-                               &mount_crypt_stat->global_auth_tok_list_mutex);
                        goto out;
                }
        }
-       mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
+
 out:
+       mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
+       mutex_unlock(&crypt_stat->keysig_list_mutex);
        return rc;
 }
 
@@ -1212,14 +1214,15 @@ int ecryptfs_read_and_validate_header_region(char *data,
                crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE;
        rc = ecryptfs_read_lower(data, 0, crypt_stat->extent_size,
                                 ecryptfs_inode);
-       if (rc) {
+       if (rc < 0) {
                printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n",
                       __func__, rc);
                goto out;
        }
        if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) {
                rc = -EINVAL;
-       }
+       } else
+               rc = 0;
 out:
        return rc;
 }
@@ -1314,10 +1317,11 @@ ecryptfs_write_metadata_to_contents(struct dentry *ecryptfs_dentry,
 
        rc = ecryptfs_write_lower(ecryptfs_dentry->d_inode, virt,
                                  0, virt_len);
-       if (rc)
+       if (rc < 0)
                printk(KERN_ERR "%s: Error attempting to write header "
-                      "information to lower file; rc = [%d]\n", __func__,
-                      rc);
+                      "information to lower file; rc = [%d]\n", __func__, rc);
+       else
+               rc = 0;
        return rc;
 }
 
@@ -1597,7 +1601,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
        }
        rc = ecryptfs_read_lower(page_virt, 0, crypt_stat->extent_size,
                                 ecryptfs_inode);
-       if (!rc)
+       if (rc >= 0)
                rc = ecryptfs_read_headers_virt(page_virt, crypt_stat,
                                                ecryptfs_dentry,
                                                ECRYPTFS_VALIDATE_HEADER_SIZE);
@@ -1702,7 +1706,7 @@ ecryptfs_encrypt_filename(struct ecryptfs_filename *filename,
        } else {
                printk(KERN_ERR "%s: No support for requested filename "
                       "encryption method in this release\n", __func__);
-               rc = -ENOTSUPP;
+               rc = -EOPNOTSUPP;
                goto out;
        }
 out:
@@ -1763,7 +1767,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
        if (IS_ERR(*key_tfm)) {
                rc = PTR_ERR(*key_tfm);
                printk(KERN_ERR "Unable to allocate crypto cipher with name "
-                      "[%s]; rc = [%d]\n", cipher_name, rc);
+                      "[%s]; rc = [%d]\n", full_alg_name, rc);
                goto out;
        }
        crypto_blkcipher_set_flags(*key_tfm, CRYPTO_TFM_REQ_WEAK_KEY);
@@ -1776,7 +1780,8 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
        rc = crypto_blkcipher_setkey(*key_tfm, dummy_key, *key_size);
        if (rc) {
                printk(KERN_ERR "Error attempting to set key of size [%zd] for "
-                      "cipher [%s]; rc = [%d]\n", *key_size, cipher_name, rc);
+                      "cipher [%s]; rc = [%d]\n", *key_size, full_alg_name,
+                      rc);
                rc = -EINVAL;
                goto out;
        }
@@ -2166,7 +2171,7 @@ int ecryptfs_encrypt_and_encode_filename(
                        (*encoded_name)[(*encoded_name_size)] = '\0';
                        (*encoded_name_size)++;
                } else {
-                       rc = -ENOTSUPP;
+                       rc = -EOPNOTSUPP;
                }
                if (rc) {
                        printk(KERN_ERR "%s: Error attempting to encode "
index 2f0945d..056fed6 100644 (file)
@@ -476,6 +476,7 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
        struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir);
        struct dentry *lower_dir_dentry;
 
+       dget(lower_dentry);
        lower_dir_dentry = lock_parent(lower_dentry);
        rc = vfs_unlink(lower_dir_inode, lower_dentry);
        if (rc) {
@@ -489,6 +490,7 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
        d_drop(dentry);
 out_unlock:
        unlock_dir(lower_dir_dentry);
+       dput(lower_dentry);
        return rc;
 }
 
index 259525c..a0a7847 100644 (file)
@@ -416,7 +416,9 @@ ecryptfs_find_global_auth_tok_for_sig(
                            &mount_crypt_stat->global_auth_tok_list,
                            mount_crypt_stat_list) {
                if (memcmp(walker->sig, sig, ECRYPTFS_SIG_SIZE_HEX) == 0) {
-                       (*global_auth_tok) = walker;
+                       rc = key_validate(walker->global_auth_tok_key);
+                       if (!rc)
+                               (*global_auth_tok) = walker;
                        goto out;
                }
        }
@@ -612,7 +614,12 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
        }
        /* TODO: Support other key modules than passphrase for
         * filename encryption */
-       BUG_ON(s->auth_tok->token_type != ECRYPTFS_PASSWORD);
+       if (s->auth_tok->token_type != ECRYPTFS_PASSWORD) {
+               rc = -EOPNOTSUPP;
+               printk(KERN_INFO "%s: Filename encryption only supports "
+                      "password tokens\n", __func__);
+               goto out_free_unlock;
+       }
        sg_init_one(
                &s->hash_sg,
                (u8 *)s->auth_tok->token.password.session_key_encryption_key,
@@ -910,7 +917,12 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
        }
        /* TODO: Support other key modules than passphrase for
         * filename encryption */
-       BUG_ON(s->auth_tok->token_type != ECRYPTFS_PASSWORD);
+       if (s->auth_tok->token_type != ECRYPTFS_PASSWORD) {
+               rc = -EOPNOTSUPP;
+               printk(KERN_INFO "%s: Filename encryption only supports "
+                      "password tokens\n", __func__);
+               goto out_free_unlock;
+       }
        rc = crypto_blkcipher_setkey(
                s->desc.tfm,
                s->auth_tok->token.password.session_key_encryption_key,
@@ -1316,8 +1328,10 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
                rc = -EINVAL;
                goto out_free;
        }
-       ecryptfs_cipher_code_to_string(crypt_stat->cipher,
-                                      (u16)data[(*packet_size)]);
+       rc = ecryptfs_cipher_code_to_string(crypt_stat->cipher,
+                                           (u16)data[(*packet_size)]);
+       if (rc)
+               goto out_free;
        /* A little extra work to differentiate among the AES key
         * sizes; see RFC2440 */
        switch(data[(*packet_size)++]) {
@@ -1328,7 +1342,9 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
                crypt_stat->key_size =
                        (*new_auth_tok)->session_key.encrypted_key_size;
        }
-       ecryptfs_init_crypt_ctx(crypt_stat);
+       rc = ecryptfs_init_crypt_ctx(crypt_stat);
+       if (rc)
+               goto out_free;
        if (unlikely(data[(*packet_size)++] != 0x03)) {
                printk(KERN_WARNING "Only S2K ID 3 is currently supported\n");
                rc = -ENOSYS;
@@ -2366,21 +2382,18 @@ struct kmem_cache *ecryptfs_key_sig_cache;
 int ecryptfs_add_keysig(struct ecryptfs_crypt_stat *crypt_stat, char *sig)
 {
        struct ecryptfs_key_sig *new_key_sig;
-       int rc = 0;
 
        new_key_sig = kmem_cache_alloc(ecryptfs_key_sig_cache, GFP_KERNEL);
        if (!new_key_sig) {
-               rc = -ENOMEM;
                printk(KERN_ERR
                       "Error allocating from ecryptfs_key_sig_cache\n");
-               goto out;
+               return -ENOMEM;
        }
        memcpy(new_key_sig->keysig, sig, ECRYPTFS_SIG_SIZE_HEX);
-       mutex_lock(&crypt_stat->keysig_list_mutex);
+       /* Caller must hold keysig_list_mutex */
        list_add(&new_key_sig->crypt_stat_list, &crypt_stat->keysig_list);
-       mutex_unlock(&crypt_stat->keysig_list_mutex);
-out:
-       return rc;
+
+       return 0;
 }
 
 struct kmem_cache *ecryptfs_global_auth_tok_cache;
index c6d7a4d..e14cf7e 100644 (file)
@@ -136,6 +136,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
                             const struct cred *cred)
 {
        struct ecryptfs_open_req *req;
+       int flags = O_LARGEFILE;
        int rc = 0;
 
        /* Corresponding dput() and mntput() are done when the
@@ -143,10 +144,14 @@ int ecryptfs_privileged_open(struct file **lower_file,
         * destroyed. */
        dget(lower_dentry);
        mntget(lower_mnt);
-       (*lower_file) = dentry_open(lower_dentry, lower_mnt,
-                                   (O_RDWR | O_LARGEFILE), cred);
+       flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR;
+       (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred);
        if (!IS_ERR(*lower_file))
                goto out;
+       if (flags & O_RDONLY) {
+               rc = PTR_ERR((*lower_file));
+               goto out;
+       }
        req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL);
        if (!req) {
                rc = -ENOMEM;
@@ -180,21 +185,8 @@ int ecryptfs_privileged_open(struct file **lower_file,
                       __func__);
                goto out_unlock;
        }
-       if (IS_ERR(*req->lower_file)) {
+       if (IS_ERR(*req->lower_file))
                rc = PTR_ERR(*req->lower_file);
-               dget(lower_dentry);
-               mntget(lower_mnt);
-               (*lower_file) = dentry_open(lower_dentry, lower_mnt,
-                                           (O_RDONLY | O_LARGEFILE), cred);
-               if (IS_ERR(*lower_file)) {
-                       rc = PTR_ERR(*req->lower_file);
-                       (*lower_file) = NULL;
-                       printk(KERN_WARNING "%s: Error attempting privileged "
-                              "open of lower file with either RW or RO "
-                              "perms; rc = [%d]. Giving up.\n",
-                              __func__, rc);
-               }
-       }
 out_unlock:
        mutex_unlock(&req->mux);
 out_free:
index 9f0aa98..101fe4c 100644 (file)
@@ -129,11 +129,10 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
                lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
                rc = ecryptfs_privileged_open(&inode_info->lower_file,
                                              lower_dentry, lower_mnt, cred);
-               if (rc || IS_ERR(inode_info->lower_file)) {
+               if (rc) {
                        printk(KERN_ERR "Error opening lower persistent file "
                               "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
                               "rc = [%d]\n", lower_dentry, lower_mnt, rc);
-                       rc = PTR_ERR(inode_info->lower_file);
                        inode_info->lower_file = NULL;
                }
        }
index 05772ae..df4ce99 100644 (file)
@@ -396,9 +396,11 @@ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
        rc = ecryptfs_write_lower(ecryptfs_inode, file_size_virt, 0,
                                  sizeof(u64));
        kfree(file_size_virt);
-       if (rc)
+       if (rc < 0)
                printk(KERN_ERR "%s: Error writing file size to header; "
                       "rc = [%d]\n", __func__, rc);
+       else
+               rc = 0;
 out:
        return rc;
 }
index a137c6e..0cc4faf 100644 (file)
  *
  * Write data to the lower file.
  *
- * Returns zero on success; non-zero on error
+ * Returns bytes written on success; less than zero on error
  */
 int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
                         loff_t offset, size_t size)
 {
        struct ecryptfs_inode_info *inode_info;
-       ssize_t octets_written;
        mm_segment_t fs_save;
-       int rc = 0;
+       ssize_t rc;
 
        inode_info = ecryptfs_inode_to_private(ecryptfs_inode);
        mutex_lock(&inode_info->lower_file_mutex);
@@ -50,14 +49,9 @@ int ecryptfs_write_lower(struct inode *ecryptfs_inode, char *data,
        inode_info->lower_file->f_pos = offset;
        fs_save = get_fs();
        set_fs(get_ds());
-       octets_written = vfs_write(inode_info->lower_file, data, size,
-                                  &inode_info->lower_file->f_pos);
+       rc = vfs_write(inode_info->lower_file, data, size,
+                      &inode_info->lower_file->f_pos);
        set_fs(fs_save);
-       if (octets_written < 0) {
-               printk(KERN_ERR "%s: octets_written = [%td]; "
-                      "expected [%td]\n", __func__, octets_written, size);
-               rc = -EINVAL;
-       }
        mutex_unlock(&inode_info->lower_file_mutex);
        mark_inode_dirty_sync(ecryptfs_inode);
        return rc;
@@ -91,6 +85,8 @@ int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode,
                  + offset_in_page);
        virt = kmap(page_for_lower);
        rc = ecryptfs_write_lower(ecryptfs_inode, virt, offset, size);
+       if (rc > 0)
+               rc = 0;
        kunmap(page_for_lower);
        return rc;
 }
@@ -229,30 +225,24 @@ out:
  * Read @size bytes of data at byte offset @offset from the lower
  * inode into memory location @data.
  *
- * Returns zero on success; non-zero on error
+ * Returns bytes read on success; 0 on EOF; less than zero on error
  */
 int ecryptfs_read_lower(char *data, loff_t offset, size_t size,
                        struct inode *ecryptfs_inode)
 {
        struct ecryptfs_inode_info *inode_info =
                ecryptfs_inode_to_private(ecryptfs_inode);
-       ssize_t octets_read;
        mm_segment_t fs_save;
-       int rc = 0;
+       ssize_t rc;
 
        mutex_lock(&inode_info->lower_file_mutex);
        BUG_ON(!inode_info->lower_file);
        inode_info->lower_file->f_pos = offset;
        fs_save = get_fs();
        set_fs(get_ds());
-       octets_read = vfs_read(inode_info->lower_file, data, size,
-                              &inode_info->lower_file->f_pos);
+       rc = vfs_read(inode_info->lower_file, data, size,
+                     &inode_info->lower_file->f_pos);
        set_fs(fs_save);
-       if (octets_read < 0) {
-               printk(KERN_ERR "%s: octets_read = [%td]; "
-                      "expected [%td]\n", __func__, octets_read, size);
-               rc = -EINVAL;
-       }
        mutex_unlock(&inode_info->lower_file_mutex);
        return rc;
 }
@@ -284,6 +274,8 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
        offset = ((((loff_t)page_index) << PAGE_CACHE_SHIFT) + offset_in_page);
        virt = kmap(page_for_ecryptfs);
        rc = ecryptfs_read_lower(virt, offset, size, ecryptfs_inode);
+       if (rc > 0)
+               rc = 0;
        kunmap(page_for_ecryptfs);
        flush_dcache_page(page_for_ecryptfs);
        return rc;
index 12d6496..b15a43a 100644 (file)
@@ -77,7 +77,6 @@ static void ecryptfs_destroy_inode(struct inode *inode)
        struct ecryptfs_inode_info *inode_info;
 
        inode_info = ecryptfs_inode_to_private(inode);
-       mutex_lock(&inode_info->lower_file_mutex);
        if (inode_info->lower_file) {
                struct dentry *lower_dentry =
                        inode_info->lower_file->f_dentry;
@@ -89,7 +88,6 @@ static void ecryptfs_destroy_inode(struct inode *inode)
                        d_drop(lower_dentry);
                }
        }
-       mutex_unlock(&inode_info->lower_file_mutex);
        ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
        kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
 }
index 5c833c1..d49be6b 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
 #include <linux/kmod.h>
 #include <linux/fsnotify.h>
 #include <linux/fs_struct.h>
+#include <linux/pipe_fs_i.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -63,6 +64,7 @@
 
 int core_uses_pid;
 char core_pattern[CORENAME_MAX_SIZE] = "core";
+unsigned int core_pipe_limit;
 int suid_dumpable = 0;
 
 /* The maximal length of core_pattern is also specified in sysctl.c */
@@ -1393,18 +1395,16 @@ out_ret:
        return retval;
 }
 
-int set_binfmt(struct linux_binfmt *new)
+void set_binfmt(struct linux_binfmt *new)
 {
-       struct linux_binfmt *old = current->binfmt;
+       struct mm_struct *mm = current->mm;
 
-       if (new) {
-               if (!try_module_get(new->module))
-                       return -1;
-       }
-       current->binfmt = new;
-       if (old)
-               module_put(old->module);
-       return 0;
+       if (mm->binfmt)
+               module_put(mm->binfmt->module);
+
+       mm->binfmt = new;
+       if (new)
+               __module_get(new->module);
 }
 
 EXPORT_SYMBOL(set_binfmt);
@@ -1728,6 +1728,29 @@ int get_dumpable(struct mm_struct *mm)
        return (ret >= 2) ? 2 : ret;
 }
 
+static void wait_for_dump_helpers(struct file *file)
+{
+       struct pipe_inode_info *pipe;
+
+       pipe = file->f_path.dentry->d_inode->i_pipe;
+
+       pipe_lock(pipe);
+       pipe->readers++;
+       pipe->writers--;
+
+       while ((pipe->readers > 1) && (!signal_pending(current))) {
+               wake_up_interruptible_sync(&pipe->wait);
+               kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+               pipe_wait(pipe);
+       }
+
+       pipe->readers--;
+       pipe->writers++;
+       pipe_unlock(pipe);
+
+}
+
+
 void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 {
        struct core_state core_state;
@@ -1744,11 +1767,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
        unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
        char **helper_argv = NULL;
        int helper_argc = 0;
-       char *delimit;
+       int dump_count = 0;
+       static atomic_t core_dump_count = ATOMIC_INIT(0);
 
        audit_core_dumps(signr);
 
-       binfmt = current->binfmt;
+       binfmt = mm->binfmt;
        if (!binfmt || !binfmt->core_dump)
                goto fail;
 
@@ -1799,54 +1823,63 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
        lock_kernel();
        ispipe = format_corename(corename, signr);
        unlock_kernel();
-       /*
-        * Don't bother to check the RLIMIT_CORE value if core_pattern points
-        * to a pipe.  Since we're not writing directly to the filesystem
-        * RLIMIT_CORE doesn't really apply, as no actual core file will be
-        * created unless the pipe reader choses to write out the core file
-        * at which point file size limits and permissions will be imposed
-        * as it does with any other process
-        */
+
        if ((!ispipe) && (core_limit < binfmt->min_coredump))
                goto fail_unlock;
 
        if (ispipe) {
+               if (core_limit == 0) {
+                       /*
+                        * Normally core limits are irrelevant to pipes, since
+                        * we're not writing to the file system, but we use
+                        * core_limit of 0 here as a speacial value. Any
+                        * non-zero limit gets set to RLIM_INFINITY below, but
+                        * a limit of 0 skips the dump.  This is a consistent
+                        * way to catch recursive crashes.  We can still crash
+                        * if the core_pattern binary sets RLIM_CORE =  !0
+                        * but it runs as root, and can do lots of stupid things
+                        * Note that we use task_tgid_vnr here to grab the pid
+                        * of the process group leader.  That way we get the
+                        * right pid if a thread in a multi-threaded
+                        * core_pattern process dies.
+                        */
+                       printk(KERN_WARNING
+                               "Process %d(%s) has RLIMIT_CORE set to 0\n",
+                               task_tgid_vnr(current), current->comm);
+                       printk(KERN_WARNING "Aborting core\n");
+                       goto fail_unlock;
+               }
+
+               dump_count = atomic_inc_return(&core_dump_count);
+               if (core_pipe_limit && (core_pipe_limit < dump_count)) {
+                       printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
+                              task_tgid_vnr(current), current->comm);
+                       printk(KERN_WARNING "Skipping core dump\n");
+                       goto fail_dropcount;
+               }
+
                helper_argv = argv_split(GFP_KERNEL, corename+1, &helper_argc);
                if (!helper_argv) {
                        printk(KERN_WARNING "%s failed to allocate memory\n",
                               __func__);
-                       goto fail_unlock;
-               }
-               /* Terminate the string before the first option */
-               delimit = strchr(corename, ' ');
-               if (delimit)
-                       *delimit = '\0';
-               delimit = strrchr(helper_argv[0], '/');
-               if (delimit)
-                       delimit++;
-               else
-                       delimit = helper_argv[0];
-               if (!strcmp(delimit, current->comm)) {
-                       printk(KERN_NOTICE "Recursive core dump detected, "
-                                       "aborting\n");
-                       goto fail_unlock;
+                       goto fail_dropcount;
                }
 
                core_limit = RLIM_INFINITY;
 
                /* SIGPIPE can happen, but it's just never processed */
-               if (call_usermodehelper_pipe(corename+1, helper_argv, NULL,
+               if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
                                &file)) {
                        printk(KERN_INFO "Core dump to %s pipe failed\n",
                               corename);
-                       goto fail_unlock;
+                       goto fail_dropcount;
                }
        } else
                file = filp_open(corename,
                                 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
                                 0600);
        if (IS_ERR(file))
-               goto fail_unlock;
+               goto fail_dropcount;
        inode = file->f_path.dentry->d_inode;
        if (inode->i_nlink > 1)
                goto close_fail;        /* multiple links - don't dump */
@@ -1875,7 +1908,12 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
        if (retval)
                current->signal->group_exit_code |= 0x80;
 close_fail:
+       if (ispipe && core_pipe_limit)
+               wait_for_dump_helpers(file);
        filp_close(file, NULL);
+fail_dropcount:
+       if (dump_count)
+               atomic_dec(&core_dump_count);
 fail_unlock:
        if (helper_argv)
                argv_free(helper_argv);
index 5ab10c3..9f500de 100644 (file)
@@ -214,7 +214,6 @@ int exofs_sync_fs(struct super_block *sb, int wait)
        }
 
        lock_super(sb);
-       lock_kernel();
        sbi = sb->s_fs_info;
        fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
        fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles);
@@ -245,7 +244,6 @@ int exofs_sync_fs(struct super_block *sb, int wait)
 out:
        if (or)
                osd_end_request(or);
-       unlock_kernel();
        unlock_super(sb);
        kfree(fscb);
        return ret;
@@ -268,8 +266,6 @@ static void exofs_put_super(struct super_block *sb)
        int num_pend;
        struct exofs_sb_info *sbi = sb->s_fs_info;
 
-       lock_kernel();
-
        if (sb->s_dirt)
                exofs_write_super(sb);
 
@@ -286,8 +282,6 @@ static void exofs_put_super(struct super_block *sb)
        osduld_put_device(sbi->s_dev);
        kfree(sb->s_fs_info);
        sb->s_fs_info = NULL;
-
-       unlock_kernel();
 }
 
 /*
index 1c1638f..ade6340 100644 (file)
@@ -819,6 +819,7 @@ const struct address_space_operations ext2_aops = {
        .writepages             = ext2_writepages,
        .migratepage            = buffer_migrate_page,
        .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
 };
 
 const struct address_space_operations ext2_aops_xip = {
@@ -837,6 +838,7 @@ const struct address_space_operations ext2_nobh_aops = {
        .direct_IO              = ext2_direct_IO,
        .writepages             = ext2_writepages,
        .migratepage            = buffer_migrate_page,
+       .error_remove_page      = generic_error_remove_page,
 };
 
 /*
index cd098a7..acf1b14 100644 (file)
@@ -1830,6 +1830,7 @@ static const struct address_space_operations ext3_ordered_aops = {
        .direct_IO              = ext3_direct_IO,
        .migratepage            = buffer_migrate_page,
        .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
 };
 
 static const struct address_space_operations ext3_writeback_aops = {
@@ -1845,6 +1846,7 @@ static const struct address_space_operations ext3_writeback_aops = {
        .direct_IO              = ext3_direct_IO,
        .migratepage            = buffer_migrate_page,
        .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
 };
 
 static const struct address_space_operations ext3_journalled_aops = {
@@ -1859,6 +1861,7 @@ static const struct address_space_operations ext3_journalled_aops = {
        .invalidatepage         = ext3_invalidatepage,
        .releasepage            = ext3_releasepage,
        .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
 };
 
 void ext3_set_aops(struct inode *inode)
index 3a79873..064746f 100644 (file)
@@ -3386,6 +3386,7 @@ static const struct address_space_operations ext4_ordered_aops = {
        .direct_IO              = ext4_direct_IO,
        .migratepage            = buffer_migrate_page,
        .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
 };
 
 static const struct address_space_operations ext4_writeback_aops = {
@@ -3401,6 +3402,7 @@ static const struct address_space_operations ext4_writeback_aops = {
        .direct_IO              = ext4_direct_IO,
        .migratepage            = buffer_migrate_page,
        .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
 };
 
 static const struct address_space_operations ext4_journalled_aops = {
@@ -3415,6 +3417,7 @@ static const struct address_space_operations ext4_journalled_aops = {
        .invalidatepage         = ext4_invalidatepage,
        .releasepage            = ext4_releasepage,
        .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
 };
 
 static const struct address_space_operations ext4_da_aops = {
@@ -3431,6 +3434,7 @@ static const struct address_space_operations ext4_da_aops = {
        .direct_IO              = ext4_direct_IO,
        .migratepage            = buffer_migrate_page,
        .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
 };
 
 void ext4_set_aops(struct inode *inode)
index 8970d8c..04629d1 100644 (file)
@@ -470,19 +470,11 @@ static void fat_put_super(struct super_block *sb)
 
        iput(sbi->fat_inode);
 
-       if (sbi->nls_disk) {
-               unload_nls(sbi->nls_disk);
-               sbi->nls_disk = NULL;
-               sbi->options.codepage = fat_default_codepage;
-       }
-       if (sbi->nls_io) {
-               unload_nls(sbi->nls_io);
-               sbi->nls_io = NULL;
-       }
-       if (sbi->options.iocharset != fat_default_iocharset) {
+       unload_nls(sbi->nls_disk);
+       unload_nls(sbi->nls_io);
+
+       if (sbi->options.iocharset != fat_default_iocharset)
                kfree(sbi->options.iocharset);
-               sbi->options.iocharset = fat_default_iocharset;
-       }
 
        sb->s_fs_info = NULL;
        kfree(sbi);
index ae41308..fc089f2 100644 (file)
@@ -263,6 +263,79 @@ pid_t f_getown(struct file *filp)
        return pid;
 }
 
+static int f_setown_ex(struct file *filp, unsigned long arg)
+{
+       struct f_owner_ex * __user owner_p = (void * __user)arg;
+       struct f_owner_ex owner;
+       struct pid *pid;
+       int type;
+       int ret;
+
+       ret = copy_from_user(&owner, owner_p, sizeof(owner));
+       if (ret)
+               return ret;
+
+       switch (owner.type) {
+       case F_OWNER_TID:
+               type = PIDTYPE_MAX;
+               break;
+
+       case F_OWNER_PID:
+               type = PIDTYPE_PID;
+               break;
+
+       case F_OWNER_GID:
+               type = PIDTYPE_PGID;
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       rcu_read_lock();
+       pid = find_vpid(owner.pid);
+       if (owner.pid && !pid)
+               ret = -ESRCH;
+       else
+               ret = __f_setown(filp, pid, type, 1);
+       rcu_read_unlock();
+
+       return ret;
+}
+
+static int f_getown_ex(struct file *filp, unsigned long arg)
+{
+       struct f_owner_ex * __user owner_p = (void * __user)arg;
+       struct f_owner_ex owner;
+       int ret = 0;
+
+       read_lock(&filp->f_owner.lock);
+       owner.pid = pid_vnr(filp->f_owner.pid);
+       switch (filp->f_owner.pid_type) {
+       case PIDTYPE_MAX:
+               owner.type = F_OWNER_TID;
+               break;
+
+       case PIDTYPE_PID:
+               owner.type = F_OWNER_PID;
+               break;
+
+       case PIDTYPE_PGID:
+               owner.type = F_OWNER_GID;
+               break;
+
+       default:
+               WARN_ON(1);
+               ret = -EINVAL;
+               break;
+       }
+       read_unlock(&filp->f_owner.lock);
+
+       if (!ret)
+               ret = copy_to_user(owner_p, &owner, sizeof(owner));
+       return ret;
+}
+
 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
                struct file *filp)
 {
@@ -313,6 +386,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
        case F_SETOWN:
                err = f_setown(filp, arg, 1);
                break;
+       case F_GETOWN_EX:
+               err = f_getown_ex(filp, arg);
+               break;
+       case F_SETOWN_EX:
+               err = f_setown_ex(filp, arg);
+               break;
        case F_GETSIG:
                err = filp->f_owner.signum;
                break;
@@ -428,8 +507,7 @@ static inline int sigio_perm(struct task_struct *p,
 
 static void send_sigio_to_task(struct task_struct *p,
                               struct fown_struct *fown,
-                              int fd,
-                              int reason)
+                              int fd, int reason, int group)
 {
        /*
         * F_SETSIG can change ->signum lockless in parallel, make
@@ -461,11 +539,11 @@ static void send_sigio_to_task(struct task_struct *p,
                        else
                                si.si_band = band_table[reason - POLL_IN];
                        si.si_fd    = fd;
-                       if (!group_send_sig_info(signum, &si, p))
+                       if (!do_send_sig_info(signum, &si, p, group))
                                break;
                /* fall-through: fall back on the old plain SIGIO signal */
                case 0:
-                       group_send_sig_info(SIGIO, SEND_SIG_PRIV, p);
+                       do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group);
        }
 }
 
@@ -474,16 +552,23 @@ void send_sigio(struct fown_struct *fown, int fd, int band)
        struct task_struct *p;
        enum pid_type type;
        struct pid *pid;
+       int group = 1;
        
        read_lock(&fown->lock);
+
        type = fown->pid_type;
+       if (type == PIDTYPE_MAX) {
+               group = 0;
+               type = PIDTYPE_PID;
+       }
+
        pid = fown->pid;
        if (!pid)
                goto out_unlock_fown;
        
        read_lock(&tasklist_lock);
        do_each_pid_task(pid, type, p) {
-               send_sigio_to_task(p, fown, fd, band);
+               send_sigio_to_task(p, fown, fd, band, group);
        } while_each_pid_task(pid, type, p);
        read_unlock(&tasklist_lock);
  out_unlock_fown:
@@ -491,10 +576,10 @@ void send_sigio(struct fown_struct *fown, int fd, int band)
 }
 
 static void send_sigurg_to_task(struct task_struct *p,
-                                struct fown_struct *fown)
+                               struct fown_struct *fown, int group)
 {
        if (sigio_perm(p, fown, SIGURG))
-               group_send_sig_info(SIGURG, SEND_SIG_PRIV, p);
+               do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group);
 }
 
 int send_sigurg(struct fown_struct *fown)
@@ -502,10 +587,17 @@ int send_sigurg(struct fown_struct *fown)
        struct task_struct *p;
        enum pid_type type;
        struct pid *pid;
+       int group = 1;
        int ret = 0;
        
        read_lock(&fown->lock);
+
        type = fown->pid_type;
+       if (type == PIDTYPE_MAX) {
+               group = 0;
+               type = PIDTYPE_PID;
+       }
+
        pid = fown->pid;
        if (!pid)
                goto out_unlock_fown;
@@ -514,7 +606,7 @@ int send_sigurg(struct fown_struct *fown)
        
        read_lock(&tasklist_lock);
        do_each_pid_task(pid, type, p) {
-               send_sigurg_to_task(p, fown);
+               send_sigurg_to_task(p, fown, group);
        } while_each_pid_task(pid, type, p);
        read_unlock(&tasklist_lock);
  out_unlock_fown:
index 334ce39..8eb4404 100644 (file)
@@ -74,14 +74,14 @@ EXPORT_SYMBOL_GPL(get_max_files);
  * Handle nr_files sysctl
  */
 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
-int proc_nr_files(ctl_table *table, int write, struct file *filp,
+int proc_nr_files(ctl_table *table, int write,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        files_stat.nr_files = get_nr_files();
-       return proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       return proc_dointvec(table, write, buffer, lenp, ppos);
 }
 #else
-int proc_nr_files(ctl_table *table, int write, struct file *filp,
+int proc_nr_files(ctl_table *table, int write,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        return -ENOSYS;
index e703654..992f6c9 100644 (file)
@@ -1276,14 +1276,9 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
                return 0;
 
        if (attr->ia_valid & ATTR_SIZE) {
-               unsigned long limit;
-               if (IS_SWAPFILE(inode))
-                       return -ETXTBSY;
-               limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-               if (limit != RLIM_INFINITY && attr->ia_size > (loff_t) limit) {
-                       send_sig(SIGXFSZ, current, 0);
-                       return -EFBIG;
-               }
+               err = inode_newsize_ok(inode, attr->ia_size);
+               if (err)
+                       return err;
                is_truncate = true;
        }
 
@@ -1350,8 +1345,7 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
         * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
         */
        if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
-               if (outarg.attr.size < oldsize)
-                       fuse_truncate(inode->i_mapping, outarg.attr.size);
+               truncate_pagecache(inode, oldsize, outarg.attr.size);
                invalidate_inode_pages2(inode->i_mapping);
        }
 
index fc9c79f..01cc462 100644 (file)
@@ -606,8 +606,6 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
                                   u64 attr_valid);
 
-void fuse_truncate(struct address_space *mapping, loff_t offset);
-
 /**
  * Initialize the client device
  */
index 6da947d..1a822ce 100644 (file)
@@ -140,14 +140,6 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
        return 0;
 }
 
-void fuse_truncate(struct address_space *mapping, loff_t offset)
-{
-       /* See vmtruncate() */
-       unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-       truncate_inode_pages(mapping, offset);
-       unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-}
-
 void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
                                   u64 attr_valid)
 {
@@ -205,8 +197,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
        spin_unlock(&fc->lock);
 
        if (S_ISREG(inode->i_mode) && oldsize != attr->size) {
-               if (attr->size < oldsize)
-                       fuse_truncate(inode->i_mapping, attr->size);
+               truncate_pagecache(inode, oldsize, attr->size);
                invalidate_inode_pages2(inode->i_mapping);
        }
 }
index 7ebae9a..694b5d4 100644 (file)
@@ -1135,6 +1135,7 @@ static const struct address_space_operations gfs2_writeback_aops = {
        .direct_IO = gfs2_direct_IO,
        .migratepage = buffer_migrate_page,
        .is_partially_uptodate = block_is_partially_uptodate,
+       .error_remove_page = generic_error_remove_page,
 };
 
 static const struct address_space_operations gfs2_ordered_aops = {
@@ -1151,6 +1152,7 @@ static const struct address_space_operations gfs2_ordered_aops = {
        .direct_IO = gfs2_direct_IO,
        .migratepage = buffer_migrate_page,
        .is_partially_uptodate = block_is_partially_uptodate,
+       .error_remove_page = generic_error_remove_page,
 };
 
 static const struct address_space_operations gfs2_jdata_aops = {
@@ -1166,6 +1168,7 @@ static const struct address_space_operations gfs2_jdata_aops = {
        .invalidatepage = gfs2_invalidatepage,
        .releasepage = gfs2_releasepage,
        .is_partially_uptodate = block_is_partially_uptodate,
+       .error_remove_page = generic_error_remove_page,
 };
 
 void gfs2_set_aops(struct inode *inode)
index 7b6165f..8bbe03c 100644 (file)
@@ -344,10 +344,8 @@ void hfs_mdb_put(struct super_block *sb)
        brelse(HFS_SB(sb)->mdb_bh);
        brelse(HFS_SB(sb)->alt_mdb_bh);
 
-       if (HFS_SB(sb)->nls_io)
-               unload_nls(HFS_SB(sb)->nls_io);
-       if (HFS_SB(sb)->nls_disk)
-               unload_nls(HFS_SB(sb)->nls_disk);
+       unload_nls(HFS_SB(sb)->nls_io);
+       unload_nls(HFS_SB(sb)->nls_disk);
 
        free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0);
        kfree(HFS_SB(sb));
index c0759fe..43022f3 100644 (file)
@@ -229,8 +229,7 @@ static void hfsplus_put_super(struct super_block *sb)
        iput(HFSPLUS_SB(sb).alloc_file);
        iput(HFSPLUS_SB(sb).hidden_dir);
        brelse(HFSPLUS_SB(sb).s_vhbh);
-       if (HFSPLUS_SB(sb).nls)
-               unload_nls(HFSPLUS_SB(sb).nls);
+       unload_nls(HFSPLUS_SB(sb).nls);
        kfree(sb->s_fs_info);
        sb->s_fs_info = NULL;
 
@@ -464,8 +463,7 @@ out:
 
 cleanup:
        hfsplus_put_super(sb);
-       if (nls)
-               unload_nls(nls);
+       unload_nls(nls);
        return err;
 }
 
index eba6d55..87a1258 100644 (file)
@@ -380,36 +380,11 @@ static void hugetlbfs_delete_inode(struct inode *inode)
 
 static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock)
 {
-       struct super_block *sb = inode->i_sb;
-
-       if (!hlist_unhashed(&inode->i_hash)) {
-               if (!(inode->i_state & (I_DIRTY|I_SYNC)))
-                       list_move(&inode->i_list, &inode_unused);
-               inodes_stat.nr_unused++;
-               if (!sb || (sb->s_flags & MS_ACTIVE)) {
-                       spin_unlock(&inode_lock);
-                       return;
-               }
-               inode->i_state |= I_WILL_FREE;
-               spin_unlock(&inode_lock);
-               /*
-                * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK
-                * in our backing_dev_info.
-                */
-               write_inode_now(inode, 1);
-               spin_lock(&inode_lock);
-               inode->i_state &= ~I_WILL_FREE;
-               inodes_stat.nr_unused--;
-               hlist_del_init(&inode->i_hash);
+       if (generic_detach_inode(inode)) {
+               truncate_hugepages(inode, 0);
+               clear_inode(inode);
+               destroy_inode(inode);
        }
-       list_del_init(&inode->i_list);
-       list_del_init(&inode->i_sb_list);
-       inode->i_state |= I_FREEING;
-       inodes_stat.nr_inodes--;
-       spin_unlock(&inode_lock);
-       truncate_hugepages(inode, 0);
-       clear_inode(inode);
-       destroy_inode(inode);
 }
 
 static void hugetlbfs_drop_inode(struct inode *inode)
@@ -936,15 +911,9 @@ static struct file_system_type hugetlbfs_fs_type = {
 
 static struct vfsmount *hugetlbfs_vfsmount;
 
-static int can_do_hugetlb_shm(int creat_flags)
+static int can_do_hugetlb_shm(void)
 {
-       if (creat_flags != HUGETLB_SHMFS_INODE)
-               return 0;
-       if (capable(CAP_IPC_LOCK))
-               return 1;
-       if (in_group_p(sysctl_hugetlb_shm_group))
-               return 1;
-       return 0;
+       return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
 }
 
 struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
@@ -960,7 +929,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
        if (!hugetlbfs_vfsmount)
                return ERR_PTR(-ENOENT);
 
-       if (!can_do_hugetlb_shm(creat_flags)) {
+       if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
                *user = current_user();
                if (user_shm_lock(size, *user)) {
                        WARN_ONCE(1,
index 76582b0..4d8e3be 100644 (file)
@@ -1241,7 +1241,16 @@ void generic_delete_inode(struct inode *inode)
 }
 EXPORT_SYMBOL(generic_delete_inode);
 
-static void generic_forget_inode(struct inode *inode)
+/**
+ *     generic_detach_inode - remove inode from inode lists
+ *     @inode: inode to remove
+ *
+ *     Remove inode from inode lists, write it if it's dirty. This is just an
+ *     internal VFS helper exported for hugetlbfs. Do not use!
+ *
+ *     Returns 1 if inode should be completely destroyed.
+ */
+int generic_detach_inode(struct inode *inode)
 {
        struct super_block *sb = inode->i_sb;
 
@@ -1251,7 +1260,7 @@ static void generic_forget_inode(struct inode *inode)
                inodes_stat.nr_unused++;
                if (sb->s_flags & MS_ACTIVE) {
                        spin_unlock(&inode_lock);
-                       return;
+                       return 0;
                }
                WARN_ON(inode->i_state & I_NEW);
                inode->i_state |= I_WILL_FREE;
@@ -1269,6 +1278,14 @@ static void generic_forget_inode(struct inode *inode)
        inode->i_state |= I_FREEING;
        inodes_stat.nr_inodes--;
        spin_unlock(&inode_lock);
+       return 1;
+}
+EXPORT_SYMBOL_GPL(generic_detach_inode);
+
+static void generic_forget_inode(struct inode *inode)
+{
+       if (!generic_detach_inode(inode))
+               return;
        if (inode->i_data.nrpages)
                truncate_inode_pages(&inode->i_data, 0);
        clear_inode(inode);
@@ -1399,31 +1416,31 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
        struct inode *inode = dentry->d_inode;
        struct timespec now;
 
-       if (mnt_want_write(mnt))
-               return;
        if (inode->i_flags & S_NOATIME)
-               goto out;
+               return;
        if (IS_NOATIME(inode))
-               goto out;
+               return;
        if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
-               goto out;
+               return;
 
        if (mnt->mnt_flags & MNT_NOATIME)
-               goto out;
+               return;
        if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
-               goto out;
+               return;
 
        now = current_fs_time(inode->i_sb);
 
        if (!relatime_need_update(mnt, inode, now))
-               goto out;
+               return;
 
        if (timespec_equal(&inode->i_atime, &now))
-               goto out;
+               return;
+
+       if (mnt_want_write(mnt))
+               return;
 
        inode->i_atime = now;
        mark_inode_dirty_sync(inode);
-out:
        mnt_drop_write(mnt);
 }
 EXPORT_SYMBOL(touch_atime);
@@ -1444,34 +1461,37 @@ void file_update_time(struct file *file)
 {
        struct inode *inode = file->f_path.dentry->d_inode;
        struct timespec now;
-       int sync_it = 0;
-       int err;
+       enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
 
+       /* First try to exhaust all avenues to not sync */
        if (IS_NOCMTIME(inode))
                return;
 
-       err = mnt_want_write_file(file);
-       if (err)
-               return;
-
        now = current_fs_time(inode->i_sb);
-       if (!timespec_equal(&inode->i_mtime, &now)) {
-               inode->i_mtime = now;
-               sync_it = 1;
-       }
+       if (!timespec_equal(&inode->i_mtime, &now))
+               sync_it = S_MTIME;
 
-       if (!timespec_equal(&inode->i_ctime, &now)) {
-               inode->i_ctime = now;
-               sync_it = 1;
-       }
+       if (!timespec_equal(&inode->i_ctime, &now))
+               sync_it |= S_CTIME;
 
-       if (IS_I_VERSION(inode)) {
-               inode_inc_iversion(inode);
-               sync_it = 1;
-       }
+       if (IS_I_VERSION(inode))
+               sync_it |= S_VERSION;
+
+       if (!sync_it)
+               return;
 
-       if (sync_it)
-               mark_inode_dirty_sync(inode);
+       /* Finally allowed to write? Takes lock. */
+       if (mnt_want_write_file(file))
+               return;
+
+       /* Only change inode inside the lock region */
+       if (sync_it & S_VERSION)
+               inode_inc_iversion(inode);
+       if (sync_it & S_CTIME)
+               inode->i_ctime = now;
+       if (sync_it & S_MTIME)
+               inode->i_mtime = now;
+       mark_inode_dirty_sync(inode);
        mnt_drop_write(file->f_path.mnt);
 }
 EXPORT_SYMBOL(file_update_time);
@@ -1599,7 +1619,8 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
        else if (S_ISSOCK(mode))
                inode->i_fop = &bad_sock_fops;
        else
-               printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n",
-                      mode);
+               printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
+                                 " inode %s:%lu\n", mode, inode->i_sb->s_id,
+                                 inode->i_ino);
 }
 EXPORT_SYMBOL(init_special_inode);
index d55ef56..515175b 100644 (file)
@@ -57,6 +57,7 @@ extern int check_unsafe_exec(struct linux_binprm *);
  * namespace.c
  */
 extern int copy_mount_options(const void __user *, unsigned long *);
+extern int copy_mount_string(const void __user *, char **);
 
 extern void free_vfsmnt(struct vfsmount *);
 extern struct vfsmount *alloc_vfsmnt(const char *);
index 5612880..7b17a14 100644 (file)
@@ -162,20 +162,21 @@ EXPORT_SYMBOL(fiemap_check_flags);
 static int fiemap_check_ranges(struct super_block *sb,
                               u64 start, u64 len, u64 *new_len)
 {
+       u64 maxbytes = (u64) sb->s_maxbytes;
+
        *new_len = len;
 
        if (len == 0)
                return -EINVAL;
 
-       if (start > sb->s_maxbytes)
+       if (start > maxbytes)
                return -EFBIG;
 
        /*
         * Shrink request scope to what the fs can actually handle.
         */
-       if ((len > sb->s_maxbytes) ||
-           (sb->s_maxbytes - len) < start)
-               *new_len = sb->s_maxbytes - start;
+       if (len > maxbytes || (maxbytes - len) < start)
+               *new_len = maxbytes - start;
 
        return 0;
 }
index 85f96bc..6b4dcd4 100644 (file)
@@ -46,10 +46,7 @@ static void isofs_put_super(struct super_block *sb)
 #ifdef CONFIG_JOLIET
        lock_kernel();
 
-       if (sbi->s_nls_iocharset) {
-               unload_nls(sbi->s_nls_iocharset);
-               sbi->s_nls_iocharset = NULL;
-       }
+       unload_nls(sbi->s_nls_iocharset);
 
        unlock_kernel();
 #endif
@@ -912,8 +909,7 @@ out_no_root:
                printk(KERN_WARNING "%s: get root inode failed\n", __func__);
 out_no_inode:
 #ifdef CONFIG_JOLIET
-       if (sbi->s_nls_iocharset)
-               unload_nls(sbi->s_nls_iocharset);
+       unload_nls(sbi->s_nls_iocharset);
 #endif
        goto out_freesbi;
 out_no_read:
index 37e6dcd..2234c73 100644 (file)
@@ -178,13 +178,11 @@ static void jfs_put_super(struct super_block *sb)
        rc = jfs_umount(sb);
        if (rc)
                jfs_err("jfs_umount failed with return code %d", rc);
-       if (sbi->nls_tab)
-               unload_nls(sbi->nls_tab);
-       sbi->nls_tab = NULL;
+
+       unload_nls(sbi->nls_tab);
 
        truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
        iput(sbi->direct_inode);
-       sbi->direct_inode = NULL;
 
        kfree(sbi);
 
@@ -347,8 +345,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
 
        if (nls_map != (void *) -1) {
                /* Discard old (if remount) */
-               if (sbi->nls_tab)
-                       unload_nls(sbi->nls_tab);
+               unload_nls(sbi->nls_tab);
                sbi->nls_tab = nls_map;
        }
        return 1;
index dcec3d3..219576c 100644 (file)
@@ -527,14 +527,18 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
                                const void *from, size_t available)
 {
        loff_t pos = *ppos;
+       size_t ret;
+
        if (pos < 0)
                return -EINVAL;
-       if (pos >= available)
+       if (pos >= available || !count)
                return 0;
        if (count > available - pos)
                count = available - pos;
-       if (copy_to_user(to, from + pos, count))
+       ret = copy_to_user(to, from + pos, count);
+       if (ret == count)
                return -EFAULT;
+       count -= ret;
        *ppos = pos + count;
        return count;
 }
@@ -735,10 +739,11 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
        if (copy_from_user(attr->set_buf, buf, size))
                goto out;
 
-       ret = len; /* claim we got the whole input */
        attr->set_buf[size] = '\0';
        val = simple_strtol(attr->set_buf, NULL, 0);
-       attr->set(attr->data, val);
+       ret = attr->set(attr->data, val);
+       if (ret == 0)
+               ret = len; /* on success, claim we got the whole input */
 out:
        mutex_unlock(&attr->mutex);
        return ret;
index 7230787..bdc3cb4 100644 (file)
@@ -1640,7 +1640,7 @@ static int do_new_mount(struct path *path, char *type, int flags,
 {
        struct vfsmount *mnt;
 
-       if (!type || !memchr(type, 0, PAGE_SIZE))
+       if (!type)
                return -EINVAL;
 
        /* we need capabilities... */
@@ -1871,6 +1871,23 @@ int copy_mount_options(const void __user * data, unsigned long *where)
        return 0;
 }
 
+int copy_mount_string(const void __user *data, char **where)
+{
+       char *tmp;
+
+       if (!data) {
+               *where = NULL;
+               return 0;
+       }
+
+       tmp = strndup_user(data, PAGE_SIZE);
+       if (IS_ERR(tmp))
+               return PTR_ERR(tmp);
+
+       *where = tmp;
+       return 0;
+}
+
 /*
  * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
  * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
@@ -1900,8 +1917,6 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
 
        if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
                return -EINVAL;
-       if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
-               return -EINVAL;
 
        if (data_page)
                ((char *)data_page)[PAGE_SIZE - 1] = 0;
@@ -2070,40 +2085,42 @@ EXPORT_SYMBOL(create_mnt_ns);
 SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
                char __user *, type, unsigned long, flags, void __user *, data)
 {
-       int retval;
+       int ret;
+       char *kernel_type;
+       char *kernel_dir;
+       char *kernel_dev;
        unsigned long data_page;
-       unsigned long type_page;
-       unsigned long dev_page;
-       char *dir_page;
 
-       retval = copy_mount_options(type, &type_page);
-       if (retval < 0)
-               return retval;
+       ret = copy_mount_string(type, &kernel_type);
+       if (ret < 0)
+               goto out_type;
 
-       dir_page = getname(dir_name);
-       retval = PTR_ERR(dir_page);
-       if (IS_ERR(dir_page))
-               goto out1;
+       kernel_dir = getname(dir_name);
+       if (IS_ERR(kernel_dir)) {
+               ret = PTR_ERR(kernel_dir);
+               goto out_dir;
+       }
 
-       retval = copy_mount_options(dev_name, &dev_page);
-       if (retval < 0)
-               goto out2;
+       ret = copy_mount_string(dev_name, &kernel_dev);
+       if (ret < 0)
+               goto out_dev;
 
-       retval = copy_mount_options(data, &data_page);
-       if (retval < 0)
-               goto out3;
+       ret = copy_mount_options(data, &data_page);
+       if (ret < 0)
+               goto out_data;
 
-       retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
-                         flags, (void *)data_page);
-       free_page(data_page);
+       ret = do_mount(kernel_dev, kernel_dir, kernel_type, flags,
+               (void *) data_page);
 
-out3:
-       free_page(dev_page);
-out2:
-       putname(dir_page);
-out1:
-       free_page(type_page);
-       return retval;
+       free_page(data_page);
+out_data:
+       kfree(kernel_dev);
+out_dev:
+       putname(kernel_dir);
+out_dir:
+       kfree(kernel_type);
+out_type:
+       return ret;
 }
 
 /*
index b99ce20..cf98da1 100644 (file)
@@ -746,16 +746,8 @@ static void ncp_put_super(struct super_block *sb)
 
 #ifdef CONFIG_NCPFS_NLS
        /* unload the NLS charsets */
-       if (server->nls_vol)
-       {
-               unload_nls(server->nls_vol);
-               server->nls_vol = NULL;
-       }
-       if (server->nls_io)
-       {
-               unload_nls(server->nls_io);
-               server->nls_io = NULL;
-       }
+       unload_nls(server->nls_vol);
+       unload_nls(server->nls_io);
 #endif /* CONFIG_NCPFS_NLS */
 
        if (server->info_filp)
index 53a7ed7..0d58caf 100644 (file)
@@ -223,10 +223,8 @@ ncp_set_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg)
        oldset_io = server->nls_io;
        server->nls_io = iocharset;
 
-       if (oldset_cp)
-               unload_nls(oldset_cp);
-       if (oldset_io)
-               unload_nls(oldset_io);
+       unload_nls(oldset_cp);
+       unload_nls(oldset_io);
 
        return 0;
 }
index 5021b75..86d6b4d 100644 (file)
@@ -525,6 +525,7 @@ const struct address_space_operations nfs_file_aops = {
        .direct_IO = nfs_direct_IO,
        .migratepage = nfs_migrate_page,
        .launder_page = nfs_launder_page,
+       .error_remove_page = generic_error_remove_page,
 };
 
 /*
index 060022b..faa0918 100644 (file)
@@ -458,49 +458,21 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
  */
 static int nfs_vmtruncate(struct inode * inode, loff_t offset)
 {
-       if (i_size_read(inode) < offset) {
-               unsigned long limit;
-
-               limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-               if (limit != RLIM_INFINITY && offset > limit)
-                       goto out_sig;
-               if (offset > inode->i_sb->s_maxbytes)
-                       goto out_big;
-               spin_lock(&inode->i_lock);
-               i_size_write(inode, offset);
-               spin_unlock(&inode->i_lock);
-       } else {
-               struct address_space *mapping = inode->i_mapping;
+       loff_t oldsize;
+       int err;
 
-               /*
-                * truncation of in-use swapfiles is disallowed - it would
-                * cause subsequent swapout to scribble on the now-freed
-                * blocks.
-                */
-               if (IS_SWAPFILE(inode))
-                       return -ETXTBSY;
-               spin_lock(&inode->i_lock);
-               i_size_write(inode, offset);
-               spin_unlock(&inode->i_lock);
+       err = inode_newsize_ok(inode, offset);
+       if (err)
+               goto out;
 
-               /*
-                * unmap_mapping_range is called twice, first simply for
-                * efficiency so that truncate_inode_pages does fewer
-                * single-page unmaps.  However after this first call, and
-                * before truncate_inode_pages finishes, it is possible for
-                * private pages to be COWed, which remain after
-                * truncate_inode_pages finishes, hence the second
-                * unmap_mapping_range call must be made for correctness.
-                */
-               unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-               truncate_inode_pages(mapping, offset);
-               unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-       }
-       return 0;
-out_sig:
-       send_sig(SIGXFSZ, current, 0);
-out_big:
-       return -EFBIG;
+       spin_lock(&inode->i_lock);
+       oldsize = inode->i_size;
+       i_size_write(inode, offset);
+       spin_unlock(&inode->i_lock);
+
+       truncate_pagecache(inode, oldsize, offset);
+out:
+       return err;
 }
 
 /**
index 810770f..29786d3 100644 (file)
@@ -1711,6 +1711,8 @@ static int nfs_validate_mount_data(void *options,
 
                if (!(data->flags & NFS_MOUNT_TCP))
                        args->nfs_server.protocol = XPRT_TRANSPORT_UDP;
+               else
+                       args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
                /* N.B. caller will free nfs_server.hostname in all cases */
                args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL);
                args->namlen            = data->namlen;
index 477d37d..2224b4d 100644 (file)
@@ -270,7 +270,8 @@ struct nls_table *load_nls(char *charset)
 
 void unload_nls(struct nls_table *nls)
 {
-       module_put(nls->owner);
+       if (nls)
+               module_put(nls->owner);
 }
 
 static const wchar_t charset2uni[256] = {
index b38f944..cfce53c 100644 (file)
@@ -1550,6 +1550,7 @@ const struct address_space_operations ntfs_aops = {
        .migratepage    = buffer_migrate_page,  /* Move a page cache page from
                                                   one physical page to an
                                                   other. */
+       .error_remove_page = generic_error_remove_page,
 };
 
 /**
@@ -1569,6 +1570,7 @@ const struct address_space_operations ntfs_mst_aops = {
        .migratepage    = buffer_migrate_page,  /* Move a page cache page from
                                                   one physical page to an
                                                   other. */
+       .error_remove_page = generic_error_remove_page,
 };
 
 #ifdef NTFS_RW
index abaaa1c..80b0477 100644 (file)
@@ -201,8 +201,7 @@ use_utf8:
                                                v, old_nls->charset);
                                nls_map = old_nls;
                        } else /* nls_map */ {
-                               if (old_nls)
-                                       unload_nls(old_nls);
+                               unload_nls(old_nls);
                        }
                } else if (!strcmp(p, "utf8")) {
                        bool val = false;
@@ -2427,10 +2426,9 @@ static void ntfs_put_super(struct super_block *sb)
                ntfs_free(vol->upcase);
                vol->upcase = NULL;
        }
-       if (vol->nls_map) {
-               unload_nls(vol->nls_map);
-               vol->nls_map = NULL;
-       }
+
+       unload_nls(vol->nls_map);
+
        sb->s_fs_info = NULL;
        kfree(vol);
 
index 72e7606..deb2b13 100644 (file)
@@ -2022,4 +2022,5 @@ const struct address_space_operations ocfs2_aops = {
        .releasepage            = ocfs2_releasepage,
        .migratepage            = buffer_migrate_page,
        .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
 };
index 0c6bc60..07f77a7 100644 (file)
@@ -322,6 +322,8 @@ static inline void task_context_switch_counts(struct seq_file *m,
                        p->nivcsw);
 }
 
+#ifdef CONFIG_MMU
+
 struct stack_stats {
        struct vm_area_struct *vma;
        unsigned long   startpage;
@@ -402,6 +404,11 @@ static inline void task_show_stack_usage(struct seq_file *m,
                mmput(mm);
        }
 }
+#else
+static void task_show_stack_usage(struct seq_file *m, struct task_struct *task)
+{
+}
+#endif         /* CONFIG_MMU */
 
 int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
                        struct pid *pid, struct task_struct *task)
index 171e052..c7bff4f 100644 (file)
@@ -97,7 +97,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                "Committed_AS:   %8lu kB\n"
                "VmallocTotal:   %8lu kB\n"
                "VmallocUsed:    %8lu kB\n"
-               "VmallocChunk:   %8lu kB\n",
+               "VmallocChunk:   %8lu kB\n"
+#ifdef CONFIG_MEMORY_FAILURE
+               "HardwareCorrupted: %8lu kB\n"
+#endif
+               ,
                K(i.totalram),
                K(i.freeram),
                K(i.bufferram),
@@ -144,6 +148,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                (unsigned long)VMALLOC_TOTAL >> 10,
                vmi.used >> 10,
                vmi.largest_chunk >> 10
+#ifdef CONFIG_MEMORY_FAILURE
+               ,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10)
+#endif
                );
 
        hugetlb_report_meminfo(m);
index 9b1e4e9..f667e8a 100644 (file)
@@ -153,7 +153,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
 
        /* careful: calling conventions are nasty here */
        res = count;
-       error = table->proc_handler(table, write, filp, buf, &res, ppos);
+       error = table->proc_handler(table, write, buf, &res, ppos);
        if (!error)
                error = res;
 out:
index 0c10a0b..766b1d4 100644 (file)
@@ -4,13 +4,18 @@
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/time.h>
+#include <linux/kernel_stat.h>
 #include <asm/cputime.h>
 
 static int uptime_proc_show(struct seq_file *m, void *v)
 {
        struct timespec uptime;
        struct timespec idle;
-       cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
+       int i;
+       cputime_t idletime = cputime_zero;
+
+       for_each_possible_cpu(i)
+               idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle);
 
        do_posix_clock_monotonic_gettime(&uptime);
        monotonic_to_bootbased(&uptime);
index 11f0c06..32fae40 100644 (file)
@@ -69,14 +69,11 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
        /* make various checks */
        order = get_order(newsize);
        if (unlikely(order >= MAX_ORDER))
-               goto too_big;
+               return -EFBIG;
 
-       limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-       if (limit != RLIM_INFINITY && newsize > limit)
-               goto fsize_exceeded;
-
-       if (newsize > inode->i_sb->s_maxbytes)
-               goto too_big;
+       ret = inode_newsize_ok(inode, newsize);
+       if (ret)
+               return ret;
 
        i_size_write(inode, newsize);
 
@@ -118,12 +115,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
 
        return 0;
 
- fsize_exceeded:
-       send_sig(SIGXFSZ, current, 0);
- too_big:
-       return -EFBIG;
-
- add_error:
+add_error:
        while (loop < npages)
                __free_page(pages + loop++);
        return ret;
index 6c8c55d..3ac2898 100644 (file)
@@ -839,9 +839,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
                max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
 
        pos = *ppos;
-       retval = -EINVAL;
-       if (unlikely(pos < 0))
-               goto fput_out;
        if (unlikely(pos + count > max)) {
                retval = -EOVERFLOW;
                if (pos >= max)
index 47f132d..c117fa8 100644 (file)
@@ -528,7 +528,7 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
        pos = (ROMFH_SIZE + len + 1 + ROMFH_PAD) & ROMFH_MASK;
 
        root = romfs_iget(sb, pos);
-       if (!root)
+       if (IS_ERR(root))
                goto error;
 
        sb->s_root = d_alloc_root(root);
index 6c95927..eae7d9d 100644 (file)
@@ -429,20 +429,21 @@ EXPORT_SYMBOL(mangle_path);
  */
 int seq_path(struct seq_file *m, struct path *path, char *esc)
 {
-       if (m->count < m->size) {
-               char *s = m->buf + m->count;
-               char *p = d_path(path, s, m->size - m->count);
+       char *buf;
+       size_t size = seq_get_buf(m, &buf);
+       int res = -1;
+
+       if (size) {
+               char *p = d_path(path, buf, size);
                if (!IS_ERR(p)) {
-                       s = mangle_path(s, p, esc);
-                       if (s) {
-                               p = m->buf + m->count;
-                               m->count = s - m->buf;
-                               return s - p;
-                       }
+                       char *end = mangle_path(buf, p, esc);
+                       if (end)
+                               res = end - buf;
                }
        }
-       m->count = m->size;
-       return -1;
+       seq_commit(m, res);
+
+       return res;
 }
 EXPORT_SYMBOL(seq_path);
 
@@ -454,26 +455,28 @@ EXPORT_SYMBOL(seq_path);
 int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
                  char *esc)
 {
-       int err = -ENAMETOOLONG;
-       if (m->count < m->size) {
-               char *s = m->buf + m->count;
+       char *buf;
+       size_t size = seq_get_buf(m, &buf);
+       int res = -ENAMETOOLONG;
+
+       if (size) {
                char *p;
 
                spin_lock(&dcache_lock);
-               p = __d_path(path, root, s, m->size - m->count);
+               p = __d_path(path, root, buf, size);
                spin_unlock(&dcache_lock);
-               err = PTR_ERR(p);
+               res = PTR_ERR(p);
                if (!IS_ERR(p)) {
-                       s = mangle_path(s, p, esc);
-                       if (s) {
-                               p = m->buf + m->count;
-                               m->count = s - m->buf;
-                               return 0;
-                       }
+                       char *end = mangle_path(buf, p, esc);
+                       if (end)
+                               res = end - buf;
+                       else
+                               res = -ENAMETOOLONG;
                }
        }
-       m->count = m->size;
-       return err;
+       seq_commit(m, res);
+
+       return res < 0 ? res : 0;
 }
 
 /*
@@ -481,20 +484,21 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
  */
 int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
 {
-       if (m->count < m->size) {
-               char *s = m->buf + m->count;
-               char *p = dentry_path(dentry, s, m->size - m->count);
+       char *buf;
+       size_t size = seq_get_buf(m, &buf);
+       int res = -1;
+
+       if (size) {
+               char *p = dentry_path(dentry, buf, size);
                if (!IS_ERR(p)) {
-                       s = mangle_path(s, p, esc);
-                       if (s) {
-                               p = m->buf + m->count;
-                               m->count = s - m->buf;
-                               return s - p;
-                       }
+                       char *end = mangle_path(buf, p, esc);
+                       if (end)
+                               res = end - buf;
                }
        }
-       m->count = m->size;
-       return -1;
+       seq_commit(m, res);
+
+       return res;
 }
 
 int seq_bitmap(struct seq_file *m, const unsigned long *bits,
index 1402d2d..1c4c8f0 100644 (file)
@@ -459,14 +459,8 @@ smb_show_options(struct seq_file *s, struct vfsmount *m)
 static void
 smb_unload_nls(struct smb_sb_info *server)
 {
-       if (server->remote_nls) {
-               unload_nls(server->remote_nls);
-               server->remote_nls = NULL;
-       }
-       if (server->local_nls) {
-               unload_nls(server->local_nls);
-               server->local_nls = NULL;
-       }
+       unload_nls(server->remote_nls);
+       unload_nls(server->local_nls);
 }
 
 static void
index 0e7207b..19eb70b 100644 (file)
@@ -465,6 +465,48 @@ rescan:
 }
 
 EXPORT_SYMBOL(get_super);
+
+/**
+ * get_active_super - get an active reference to the superblock of a device
+ * @bdev: device to get the superblock for
+ *
+ * Scans the superblock list and finds the superblock of the file system
+ * mounted on the device given.  Returns the superblock with an active
+ * reference and s_umount held exclusively or %NULL if none was found.
+ */
+struct super_block *get_active_super(struct block_device *bdev)
+{
+       struct super_block *sb;
+
+       if (!bdev)
+               return NULL;
+
+       spin_lock(&sb_lock);
+       list_for_each_entry(sb, &super_blocks, s_list) {
+               if (sb->s_bdev != bdev)
+                       continue;
+
+               sb->s_count++;
+               spin_unlock(&sb_lock);
+               down_write(&sb->s_umount);
+               if (sb->s_root) {
+                       spin_lock(&sb_lock);
+                       if (sb->s_count > S_BIAS) {
+                               atomic_inc(&sb->s_active);
+                               sb->s_count--;
+                               spin_unlock(&sb_lock);
+                               return sb;
+                       }
+                       spin_unlock(&sb_lock);
+               }
+               up_write(&sb->s_umount);
+               put_super(sb);
+               yield();
+               spin_lock(&sb_lock);
+       }
+       spin_unlock(&sb_lock);
+       return NULL;
+}
  
 struct super_block * user_get_super(dev_t dev)
 {
@@ -527,11 +569,15 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 {
        int retval;
        int remount_rw;
-       
+
+       if (sb->s_frozen != SB_UNFROZEN)
+               return -EBUSY;
+
 #ifdef CONFIG_BLOCK
        if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
                return -EACCES;
 #endif
+
        if (flags & MS_RDONLY)
                acct_auto_close(sb);
        shrink_dcache_sb(sb);
@@ -743,9 +789,14 @@ int get_sb_bdev(struct file_system_type *fs_type,
         * will protect the lockfs code from trying to start a snapshot
         * while we are mounting
         */
-       down(&bdev->bd_mount_sem);
+       mutex_lock(&bdev->bd_fsfreeze_mutex);
+       if (bdev->bd_fsfreeze_count > 0) {
+               mutex_unlock(&bdev->bd_fsfreeze_mutex);
+               error = -EBUSY;
+               goto error_bdev;
+       }
        s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
-       up(&bdev->bd_mount_sem);
+       mutex_unlock(&bdev->bd_fsfreeze_mutex);
        if (IS_ERR(s))
                goto error_s;
 
@@ -892,6 +943,16 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
        if (error)
                goto out_sb;
 
+       /*
+        * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
+        * but s_maxbytes was an unsigned long long for many releases. Throw
+        * this warning for a little while to try and catch filesystems that
+        * violate this rule. This warning should be either removed or
+        * converted to a BUG() in 2.6.34.
+        */
+       WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
+               "negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes);
+
        mnt->mnt_mountpoint = mnt->mnt_root;
        mnt->mnt_parent = mnt;
        up_write(&mnt->mnt_sb->s_umount);
index d5e5559..3818544 100644 (file)
@@ -1635,4 +1635,5 @@ const struct address_space_operations xfs_address_space_operations = {
        .direct_IO              = xfs_vm_direct_IO,
        .migratepage            = buffer_migrate_page,
        .is_partially_uptodate  = block_is_partially_uptodate,
+       .error_remove_page      = generic_error_remove_page,
 };
index 916c0ff..c5bc67c 100644 (file)
@@ -26,7 +26,6 @@ STATIC int
 xfs_stats_clear_proc_handler(
        ctl_table       *ctl,
        int             write,
-       struct file     *filp,
        void            __user *buffer,
        size_t          *lenp,
        loff_t          *ppos)
@@ -34,7 +33,7 @@ xfs_stats_clear_proc_handler(
        int             c, ret, *valp = ctl->data;
        __uint32_t      vn_active;
 
-       ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
 
        if (!ret && write && *valp) {
                printk("XFS Clearing xfsstats\n");
diff --git a/include/acpi/button.h b/include/acpi/button.h
new file mode 100644 (file)
index 0000000..97eea0e
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef ACPI_BUTTON_H
+#define ACPI_BUTTON_H
+
+#include <linux/notifier.h>
+
+#if defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE)
+extern int acpi_lid_notifier_register(struct notifier_block *nb);
+extern int acpi_lid_notifier_unregister(struct notifier_block *nb);
+extern int acpi_lid_open(void);
+#else
+static inline int acpi_lid_notifier_register(struct notifier_block *nb)
+{
+       return 0;
+}
+static inline int acpi_lid_notifier_unregister(struct notifier_block *nb)
+{
+       return 0;
+}
+static inline int acpi_lid_open(void)
+{
+       return 1;
+}
+#endif /* defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE) */
+
+#endif /* ACPI_BUTTON_H */
index 4d3e483..0c3dd86 100644 (file)
 #define F_SETSIG       10      /* for sockets. */
 #define F_GETSIG       11      /* for sockets. */
 #endif
+#ifndef F_SETOWN_EX
+#define F_SETOWN_EX    12
+#define F_GETOWN_EX    13
+#endif
+
+#define F_OWNER_TID    0
+#define F_OWNER_PID    1
+#define F_OWNER_GID    2
+
+struct f_owner_ex {
+       int     type;
+       pid_t   pid;
+};
 
 /* for F_[GET|SET]FL */
 #define FD_CLOEXEC     1       /* actually anything with low bit set goes */
index dd63bd3..5ee13b2 100644 (file)
@@ -34,6 +34,7 @@
 #define MADV_REMOVE    9               /* remove these pages & resources */
 #define MADV_DONTFORK  10              /* don't inherit across fork */
 #define MADV_DOFORK    11              /* do inherit across fork */
+#define MADV_HWPOISON  100             /* poison a page for testing */
 
 #define MADV_MERGEABLE   12            /* KSM may merge identical pages */
 #define MADV_UNMERGEABLE 13            /* KSM may not merge identical pages */
index c840719..942d30b 100644 (file)
@@ -82,6 +82,7 @@ typedef struct siginfo {
 #ifdef __ARCH_SI_TRAPNO
                        int _trapno;    /* TRAP # which caused the signal */
 #endif
+                       short _addr_lsb; /* LSB of the reported address */
                } _sigfault;
 
                /* SIGPOLL */
@@ -112,6 +113,7 @@ typedef struct siginfo {
 #ifdef __ARCH_SI_TRAPNO
 #define si_trapno      _sifields._sigfault._trapno
 #endif
+#define si_addr_lsb    _sifields._sigfault._addr_lsb
 #define si_band                _sifields._sigpoll._band
 #define si_fd          _sifields._sigpoll._fd
 
@@ -192,7 +194,11 @@ typedef struct siginfo {
 #define BUS_ADRALN     (__SI_FAULT|1)  /* invalid address alignment */
 #define BUS_ADRERR     (__SI_FAULT|2)  /* non-existant physical address */
 #define BUS_OBJERR     (__SI_FAULT|3)  /* object specific hardware error */
-#define NSIGBUS                3
+/* hardware memory error consumed on a machine check: action required */
+#define BUS_MCEERR_AR  (__SI_FAULT|4)
+/* hardware memory error detected in process but not consumed: action optional*/
+#define BUS_MCEERR_AO  (__SI_FAULT|5)
+#define NSIGBUS                5
 
 /*
  * SIGTRAP si_codes
index 29ca8f5..b6e818f 100644 (file)
        . = ALIGN(PAGE_SIZE);                                           \
        .data : AT(ADDR(.data) - LOAD_OFFSET) {                         \
                INIT_TASK_DATA(inittask)                                \
+               NOSAVE_DATA                                             \
+               PAGE_ALIGNED_DATA(pagealigned)                          \
                CACHELINE_ALIGNED_DATA(cacheline)                       \
                READ_MOSTLY_DATA(cacheline)                             \
                DATA_DATA                                               \
                CONSTRUCTORS                                            \
-               NOSAVE_DATA                                             \
-               PAGE_ALIGNED_DATA(pagealigned)                          \
        }
 
 #define INIT_TEXT_SECTION(inittext_align)                              \
index 8535084..3f6e545 100644 (file)
        {0x8086, 0x2e12, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
        {0x8086, 0x2e22, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
        {0x8086, 0x2e32, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
+       {0x8086, 0x2e42, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
        {0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
        {0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
        {0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
index 8e1e925..7e0cb1d 100644 (file)
@@ -185,6 +185,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_GET_APERTURE 0x23
 #define DRM_I915_GEM_MMAP_GTT  0x24
 #define DRM_I915_GET_PIPE_FROM_CRTC_ID 0x25
+#define DRM_I915_GEM_MADVISE   0x26
 
 #define DRM_IOCTL_I915_INIT            DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH           DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -221,6 +222,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_GET_TILING  DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling)
 #define DRM_IOCTL_I915_GEM_GET_APERTURE        DRM_IOR  (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture)
 #define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_intel_get_pipe_from_crtc_id)
+#define DRM_IOCTL_I915_GEM_MADVISE     DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -667,4 +669,21 @@ struct drm_i915_get_pipe_from_crtc_id {
        __u32 pipe;
 };
 
+#define I915_MADV_WILLNEED 0
+#define I915_MADV_DONTNEED 1
+#define __I915_MADV_PURGED 2 /* internal state */
+
+struct drm_i915_gem_madvise {
+       /** Handle of the buffer to change the backing store advice */
+       __u32 handle;
+
+       /* Advice: either the buffer will be needed again in the near future,
+        *         or wont be and could be discarded under memory pressure.
+        */
+       __u32 madv;
+
+       /** Whether the backing store still exists. */
+       __u32 retained;
+};
+
 #endif                         /* _I915_DRM_H_ */
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
new file mode 100644 (file)
index 0000000..6b42417
--- /dev/null
@@ -0,0 +1,18 @@
+/*
+ *  include/linux/amba/mmci.h
+ */
+#ifndef AMBA_MMCI_H
+#define AMBA_MMCI_H
+
+#include <linux/mmc/host.h>
+
+struct mmci_platform_data {
+       unsigned int ocr_mask;                  /* available voltages */
+       u32 (*translate_vdd)(struct device *, unsigned int);
+       unsigned int (*status)(struct device *);
+       int     gpio_wp;
+       int     gpio_cd;
+       unsigned long capabilities;
+};
+
+#endif
index dcad0ff..e4836c6 100644 (file)
@@ -136,12 +136,12 @@ enum ssp_tx_level_trig {
 
 /**
  * enum SPI Clock Phase - clock phase (Motorola SPI interface only)
- * @SSP_CLK_RISING_EDGE: Receive data on rising edge
- * @SSP_CLK_FALLING_EDGE: Receive data on falling edge
+ * @SSP_CLK_FIRST_EDGE: Receive data on first edge transition (actual direction depends on polarity)
+ * @SSP_CLK_SECOND_EDGE: Receive data on second edge transition (actual direction depends on polarity)
  */
 enum ssp_spi_clk_phase {
-       SSP_CLK_RISING_EDGE,
-       SSP_CLK_FALLING_EDGE
+       SSP_CLK_FIRST_EDGE,
+       SSP_CLK_SECOND_EDGE
 };
 
 /**
index 5fc2ef8..a1c486a 100644 (file)
@@ -58,25 +58,60 @@ struct dma_chan_ref {
  * array.
  * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
  * dependency chain
- * @ASYNC_TX_DEP_ACK: ack the dependency descriptor.  Useful for chaining.
+ * @ASYNC_TX_FENCE: specify that the next operation in the dependency
+ * chain uses this operation's result as an input
  */
 enum async_tx_flags {
        ASYNC_TX_XOR_ZERO_DST    = (1 << 0),
        ASYNC_TX_XOR_DROP_DST    = (1 << 1),
-       ASYNC_TX_ACK             = (1 << 3),
-       ASYNC_TX_DEP_ACK         = (1 << 4),
+       ASYNC_TX_ACK             = (1 << 2),
+       ASYNC_TX_FENCE           = (1 << 3),
+};
+
+/**
+ * struct async_submit_ctl - async_tx submission/completion modifiers
+ * @flags: submission modifiers
+ * @depend_tx: parent dependency of the current operation being submitted
+ * @cb_fn: callback routine to run at operation completion
+ * @cb_param: parameter for the callback routine
+ * @scribble: caller provided space for dma/page address conversions
+ */
+struct async_submit_ctl {
+       enum async_tx_flags flags;
+       struct dma_async_tx_descriptor *depend_tx;
+       dma_async_tx_callback cb_fn;
+       void *cb_param;
+       void *scribble;
 };
 
 #ifdef CONFIG_DMA_ENGINE
 #define async_tx_issue_pending_all dma_issue_pending_all
+
+/**
+ * async_tx_issue_pending - send pending descriptor to the hardware channel
+ * @tx: descriptor handle to retrieve hardware context
+ *
+ * Note: any dependent operations will have already been issued by
+ * async_tx_channel_switch, or (in the case of no channel switch) will
+ * be already pending on this channel.
+ */
+static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
+{
+       if (likely(tx)) {
+               struct dma_chan *chan = tx->chan;
+               struct dma_device *dma = chan->device;
+
+               dma->device_issue_pending(chan);
+       }
+}
 #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL
 #include <asm/async_tx.h>
 #else
 #define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \
         __async_tx_find_channel(dep, type)
 struct dma_chan *
-__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
-       enum dma_transaction_type tx_type);
+__async_tx_find_channel(struct async_submit_ctl *submit,
+                       enum dma_transaction_type tx_type);
 #endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */
 #else
 static inline void async_tx_issue_pending_all(void)
@@ -84,10 +119,16 @@ static inline void async_tx_issue_pending_all(void)
        do { } while (0);
 }
 
+static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
+{
+       do { } while (0);
+}
+
 static inline struct dma_chan *
-async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
-       enum dma_transaction_type tx_type, struct page **dst, int dst_count,
-       struct page **src, int src_count, size_t len)
+async_tx_find_channel(struct async_submit_ctl *submit,
+                     enum dma_transaction_type tx_type, struct page **dst,
+                     int dst_count, struct page **src, int src_count,
+                     size_t len)
 {
        return NULL;
 }
@@ -99,46 +140,70 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
  * @cb_fn_param: parameter to pass to the callback routine
  */
 static inline void
-async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param)
+async_tx_sync_epilog(struct async_submit_ctl *submit)
 {
-       if (cb_fn)
-               cb_fn(cb_fn_param);
+       if (submit->cb_fn)
+               submit->cb_fn(submit->cb_param);
 }
 
-void
-async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
-       enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_fn_param);
+typedef union {
+       unsigned long addr;
+       struct page *page;
+       dma_addr_t dma;
+} addr_conv_t;
+
+static inline void
+init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags,
+                 struct dma_async_tx_descriptor *tx,
+                 dma_async_tx_callback cb_fn, void *cb_param,
+                 addr_conv_t *scribble)
+{
+       args->flags = flags;
+       args->depend_tx = tx;
+       args->cb_fn = cb_fn;
+       args->cb_param = cb_param;
+       args->scribble = scribble;
+}
+
+void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
+                    struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
 async_xor(struct page *dest, struct page **src_list, unsigned int offset,
-       int src_cnt, size_t len, enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_fn_param);
+         int src_cnt, size_t len, struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
-async_xor_zero_sum(struct page *dest, struct page **src_list,
-       unsigned int offset, int src_cnt, size_t len,
-       u32 *result, enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_fn_param);
+async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
+             int src_cnt, size_t len, enum sum_check_flags *result,
+             struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
 async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
-       unsigned int src_offset, size_t len, enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_fn_param);
+            unsigned int src_offset, size_t len,
+            struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
 async_memset(struct page *dest, int val, unsigned int offset,
-       size_t len, enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_fn_param);
+            size_t len, struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *
+async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt,
+                  size_t len, struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *
+async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
+                  size_t len, enum sum_check_flags *pqres, struct page *spare,
+                  struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *
+async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb,
+                       struct page **ptrs, struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
-async_trigger_callback(enum async_tx_flags flags,
-       struct dma_async_tx_descriptor *depend_tx,
-       dma_async_tx_callback cb_fn, void *cb_fn_param);
+async_raid6_datap_recov(int src_num, size_t bytes, int faila,
+                       struct page **ptrs, struct async_submit_ctl *submit);
 
 void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
 #endif /* _ASYNC_TX_H_ */
index 2046b5b..aece486 100644 (file)
@@ -120,7 +120,7 @@ extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
 extern int prepare_bprm_creds(struct linux_binprm *bprm);
 extern void install_exec_creds(struct linux_binprm *bprm);
 extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
-extern int set_binfmt(struct linux_binfmt *new);
+extern void set_binfmt(struct linux_binfmt *new);
 extern void free_bprm(struct linux_binprm *);
 
 #endif /* __KERNEL__ */
index 90bba9e..b62bb92 100644 (file)
@@ -141,6 +141,38 @@ enum {
        CGRP_WAIT_ON_RMDIR,
 };
 
+/* which pidlist file are we talking about? */
+enum cgroup_filetype {
+       CGROUP_FILE_PROCS,
+       CGROUP_FILE_TASKS,
+};
+
+/*
+ * A pidlist is a list of pids that virtually represents the contents of one
+ * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists,
+ * a pair (one each for procs, tasks) for each pid namespace that's relevant
+ * to the cgroup.
+ */
+struct cgroup_pidlist {
+       /*
+        * used to find which pidlist is wanted. doesn't change as long as
+        * this particular list stays in the list.
+        */
+       struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
+       /* array of xids */
+       pid_t *list;
+       /* how many elements the above list has */
+       int length;
+       /* how many files are using the current array */
+       int use_count;
+       /* each of these stored in a list by its cgroup */
+       struct list_head links;
+       /* pointer to the cgroup we belong to, for list removal purposes */
+       struct cgroup *owner;
+       /* protects the other fields */
+       struct rw_semaphore mutex;
+};
+
 struct cgroup {
        unsigned long flags;            /* "unsigned long" so bitops work */
 
@@ -179,11 +211,12 @@ struct cgroup {
         */
        struct list_head release_list;
 
-       /* pids_mutex protects pids_list and cached pid arrays. */
-       struct rw_semaphore pids_mutex;
-
-       /* Linked list of struct cgroup_pids */
-       struct list_head pids_list;
+       /*
+        * list of pidlists, up to two for each namespace (one for procs, one
+        * for tasks); created on demand.
+        */
+       struct list_head pidlists;
+       struct mutex pidlist_mutex;
 
        /* For RCU-protected deletion */
        struct rcu_head rcu_head;
@@ -227,6 +260,9 @@ struct css_set {
         * during subsystem registration (at boot time).
         */
        struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+
+       /* For RCU-protected deletion */
+       struct rcu_head rcu_head;
 };
 
 /*
@@ -389,10 +425,11 @@ struct cgroup_subsys {
                                                  struct cgroup *cgrp);
        int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
        void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
-       int (*can_attach)(struct cgroup_subsys *ss,
-                         struct cgroup *cgrp, struct task_struct *tsk);
+       int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
+                         struct task_struct *tsk, bool threadgroup);
        void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                       struct cgroup *old_cgrp, struct task_struct *tsk);
+                       struct cgroup *old_cgrp, struct task_struct *tsk,
+                       bool threadgroup);
        void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
        void (*exit)(struct cgroup_subsys *ss, struct task_struct *task);
        int (*populate)(struct cgroup_subsys *ss,
index 7f62777..ddb7a97 100644 (file)
@@ -27,8 +27,8 @@
  *
  * configfs Copyright (C) 2005 Oracle.  All rights reserved.
  *
- * Please read Documentation/filesystems/configfs.txt before using the
- * configfs interface, ESPECIALLY the parts about reference counts and
+ * Please read Documentation/filesystems/configfs/configfs.txt before using
+ * the configfs interface, ESPECIALLY the parts about reference counts and
  * item destructors.
  */
 
index 9c20c7e..d27a7a0 100644 (file)
@@ -20,6 +20,9 @@
  */
 #ifndef DCA_H
 #define DCA_H
+
+#include <linux/pci.h>
+
 /* DCA Provider API */
 
 /* DCA Notifier Interface */
@@ -36,6 +39,12 @@ struct dca_provider {
        int                      id;
 };
 
+struct dca_domain {
+       struct list_head        node;
+       struct list_head        dca_providers;
+       struct pci_bus          *pci_rc;
+};
+
 struct dca_ops {
        int     (*add_requester)    (struct dca_provider *, struct device *);
        int     (*remove_requester) (struct dca_provider *, struct device *);
@@ -47,7 +56,7 @@ struct dca_ops {
 struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size);
 void free_dca_provider(struct dca_provider *dca);
 int register_dca_provider(struct dca_provider *dca, struct device *dev);
-void unregister_dca_provider(struct dca_provider *dca);
+void unregister_dca_provider(struct dca_provider *dca, struct device *dev);
 
 static inline void *dca_priv(struct dca_provider *dca)
 {
index eb5c2ba..fc1b930 100644 (file)
@@ -9,7 +9,7 @@
  *     2 as published by the Free Software Foundation.
  *
  *  debugfs is for people to use instead of /proc or /sys.
- *  See Documentation/DocBook/kernel-api for more details.
+ *  See Documentation/DocBook/filesystems for more details.
  */
 
 #ifndef _DEBUGFS_H_
index ffefba8..2b9f2ac 100644 (file)
@@ -48,19 +48,20 @@ enum dma_status {
 
 /**
  * enum dma_transaction_type - DMA transaction types/indexes
+ *
+ * Note: The DMA_ASYNC_TX capability is not to be set by drivers.  It is
+ * automatically set as dma devices are registered.
  */
 enum dma_transaction_type {
        DMA_MEMCPY,
        DMA_XOR,
-       DMA_PQ_XOR,
-       DMA_DUAL_XOR,
-       DMA_PQ_UPDATE,
-       DMA_ZERO_SUM,
-       DMA_PQ_ZERO_SUM,
+       DMA_PQ,
+       DMA_XOR_VAL,
+       DMA_PQ_VAL,
        DMA_MEMSET,
-       DMA_MEMCPY_CRC32C,
        DMA_INTERRUPT,
        DMA_PRIVATE,
+       DMA_ASYNC_TX,
        DMA_SLAVE,
 };
 
@@ -70,18 +71,25 @@ enum dma_transaction_type {
 
 /**
  * enum dma_ctrl_flags - DMA flags to augment operation preparation,
- *     control completion, and communicate status.
+ *  control completion, and communicate status.
  * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
- *     this transaction
+ *  this transaction
  * @DMA_CTRL_ACK - the descriptor cannot be reused until the client
- *     acknowledges receipt, i.e. has has a chance to establish any
- *     dependency chains
+ *  acknowledges receipt, i.e. has has a chance to establish any dependency
+ *  chains
  * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
  * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
  * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single
  *     (if not set, do the source dma-unmapping as page)
  * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single
  *     (if not set, do the destination dma-unmapping as page)
+ * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
+ * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
+ * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
+ *  sources that were the result of a previous operation, in the case of a PQ
+ *  operation it continues the calculation with new sources
+ * @DMA_PREP_FENCE - tell the driver that subsequent operations depend
+ *  on the result of this operation
  */
 enum dma_ctrl_flags {
        DMA_PREP_INTERRUPT = (1 << 0),
@@ -90,8 +98,31 @@ enum dma_ctrl_flags {
        DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
        DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4),
        DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5),
+       DMA_PREP_PQ_DISABLE_P = (1 << 6),
+       DMA_PREP_PQ_DISABLE_Q = (1 << 7),
+       DMA_PREP_CONTINUE = (1 << 8),
+       DMA_PREP_FENCE = (1 << 9),
 };
 
+/**
+ * enum sum_check_bits - bit position of pq_check_flags
+ */
+enum sum_check_bits {
+       SUM_CHECK_P = 0,
+       SUM_CHECK_Q = 1,
+};
+
+/**
+ * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations
+ * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise
+ * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise
+ */
+enum sum_check_flags {
+       SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P),
+       SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q),
+};
+
+
 /**
  * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
  * See linux/cpumask.h
@@ -180,8 +211,6 @@ typedef void (*dma_async_tx_callback)(void *dma_async_param);
  * @flags: flags to augment operation preparation, control completion, and
  *     communicate status
  * @phys: physical address of the descriptor
- * @tx_list: driver common field for operations that require multiple
- *     descriptors
  * @chan: target channel for this operation
  * @tx_submit: set the prepared descriptor(s) to be executed by the engine
  * @callback: routine to call after this operation is complete
@@ -195,7 +224,6 @@ struct dma_async_tx_descriptor {
        dma_cookie_t cookie;
        enum dma_ctrl_flags flags; /* not a 'long' to pack with cookie */
        dma_addr_t phys;
-       struct list_head tx_list;
        struct dma_chan *chan;
        dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
        dma_async_tx_callback callback;
@@ -213,6 +241,11 @@ struct dma_async_tx_descriptor {
  * @global_node: list_head for global dma_device_list
  * @cap_mask: one or more dma_capability flags
  * @max_xor: maximum number of xor sources, 0 if no capability
+ * @max_pq: maximum number of PQ sources and PQ-continue capability
+ * @copy_align: alignment shift for memcpy operations
+ * @xor_align: alignment shift for xor operations
+ * @pq_align: alignment shift for pq operations
+ * @fill_align: alignment shift for memset operations
  * @dev_id: unique device ID
  * @dev: struct device reference for dma mapping api
  * @device_alloc_chan_resources: allocate resources and return the
@@ -220,7 +253,9 @@ struct dma_async_tx_descriptor {
  * @device_free_chan_resources: release DMA channel's resources
  * @device_prep_dma_memcpy: prepares a memcpy operation
  * @device_prep_dma_xor: prepares a xor operation
- * @device_prep_dma_zero_sum: prepares a zero_sum operation
+ * @device_prep_dma_xor_val: prepares a xor validation operation
+ * @device_prep_dma_pq: prepares a pq operation
+ * @device_prep_dma_pq_val: prepares a pqzero_sum operation
  * @device_prep_dma_memset: prepares a memset operation
  * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
  * @device_prep_slave_sg: prepares a slave dma operation
@@ -235,7 +270,13 @@ struct dma_device {
        struct list_head channels;
        struct list_head global_node;
        dma_cap_mask_t  cap_mask;
-       int max_xor;
+       unsigned short max_xor;
+       unsigned short max_pq;
+       u8 copy_align;
+       u8 xor_align;
+       u8 pq_align;
+       u8 fill_align;
+       #define DMA_HAS_PQ_CONTINUE (1 << 15)
 
        int dev_id;
        struct device *dev;
@@ -249,9 +290,17 @@ struct dma_device {
        struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
                struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
                unsigned int src_cnt, size_t len, unsigned long flags);
-       struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)(
+       struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
                struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt,
-               size_t len, u32 *result, unsigned long flags);
+               size_t len, enum sum_check_flags *result, unsigned long flags);
+       struct dma_async_tx_descriptor *(*device_prep_dma_pq)(
+               struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+               unsigned int src_cnt, const unsigned char *scf,
+               size_t len, unsigned long flags);
+       struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)(
+               struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+               unsigned int src_cnt, const unsigned char *scf, size_t len,
+               enum sum_check_flags *pqres, unsigned long flags);
        struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
                struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
                unsigned long flags);
@@ -270,6 +319,96 @@ struct dma_device {
        void (*device_issue_pending)(struct dma_chan *chan);
 };
 
+static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len)
+{
+       size_t mask;
+
+       if (!align)
+               return true;
+       mask = (1 << align) - 1;
+       if (mask & (off1 | off2 | len))
+               return false;
+       return true;
+}
+
+static inline bool is_dma_copy_aligned(struct dma_device *dev, size_t off1,
+                                      size_t off2, size_t len)
+{
+       return dmaengine_check_align(dev->copy_align, off1, off2, len);
+}
+
+static inline bool is_dma_xor_aligned(struct dma_device *dev, size_t off1,
+                                     size_t off2, size_t len)
+{
+       return dmaengine_check_align(dev->xor_align, off1, off2, len);
+}
+
+static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1,
+                                    size_t off2, size_t len)
+{
+       return dmaengine_check_align(dev->pq_align, off1, off2, len);
+}
+
+static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1,
+                                      size_t off2, size_t len)
+{
+       return dmaengine_check_align(dev->fill_align, off1, off2, len);
+}
+
+static inline void
+dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
+{
+       dma->max_pq = maxpq;
+       if (has_pq_continue)
+               dma->max_pq |= DMA_HAS_PQ_CONTINUE;
+}
+
+static inline bool dmaf_continue(enum dma_ctrl_flags flags)
+{
+       return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE;
+}
+
+static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags)
+{
+       enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P;
+
+       return (flags & mask) == mask;
+}
+
+static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
+{
+       return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
+}
+
+static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
+{
+       return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
+}
+
+/* dma_maxpq - reduce maxpq in the face of continued operations
+ * @dma - dma device with PQ capability
+ * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set
+ *
+ * When an engine does not support native continuation we need 3 extra
+ * source slots to reuse P and Q with the following coefficients:
+ * 1/ {00} * P : remove P from Q', but use it as a source for P'
+ * 2/ {01} * Q : use Q to continue Q' calculation
+ * 3/ {00} * Q : subtract Q from P' to cancel (2)
+ *
+ * In the case where P is disabled we only need 1 extra source:
+ * 1/ {01} * Q : use Q to continue Q' calculation
+ */
+static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags)
+{
+       if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags))
+               return dma_dev_to_maxpq(dma);
+       else if (dmaf_p_disabled_continue(flags))
+               return dma_dev_to_maxpq(dma) - 1;
+       else if (dmaf_continue(flags))
+               return dma_dev_to_maxpq(dma) - 3;
+       BUG();
+}
+
 /* --- public DMA engine API --- */
 
 #ifdef CONFIG_DMA_ENGINE
@@ -299,7 +438,11 @@ static inline void net_dmaengine_put(void)
 #ifdef CONFIG_ASYNC_TX_DMA
 #define async_dmaengine_get()  dmaengine_get()
 #define async_dmaengine_put()  dmaengine_put()
+#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
+#define async_dma_find_channel(type) dma_find_channel(DMA_ASYNC_TX)
+#else
 #define async_dma_find_channel(type) dma_find_channel(type)
+#endif /* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH */
 #else
 static inline void async_dmaengine_get(void)
 {
@@ -312,7 +455,7 @@ async_dma_find_channel(enum dma_transaction_type type)
 {
        return NULL;
 }
-#endif
+#endif /* CONFIG_ASYNC_TX_DMA */
 
 dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
        void *dest, void *src, size_t len);
index f847df9..a34bdf5 100644 (file)
@@ -133,6 +133,7 @@ struct dentry;
 #define FB_ACCEL_NEOMAGIC_NM2230 96    /* NeoMagic NM2230              */
 #define FB_ACCEL_NEOMAGIC_NM2360 97    /* NeoMagic NM2360              */
 #define FB_ACCEL_NEOMAGIC_NM2380 98    /* NeoMagic NM2380              */
+#define FB_ACCEL_PXA3XX                 99     /* PXA3xx                       */
 
 #define FB_ACCEL_SAVAGE4        0x80   /* S3 Savage4                   */
 #define FB_ACCEL_SAVAGE3D       0x81   /* S3 Savage3D                  */
index 5180352..2adaa25 100644 (file)
@@ -595,6 +595,7 @@ struct address_space_operations {
        int (*launder_page) (struct page *);
        int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
                                        unsigned long);
+       int (*error_remove_page)(struct address_space *, struct page *);
 };
 
 /*
@@ -640,7 +641,6 @@ struct block_device {
        struct super_block *    bd_super;
        int                     bd_openers;
        struct mutex            bd_mutex;       /* open/close mutex */
-       struct semaphore        bd_mount_sem;
        struct list_head        bd_inodes;
        void *                  bd_holder;
        int                     bd_holders;
@@ -1315,7 +1315,7 @@ struct super_block {
        unsigned long           s_blocksize;
        unsigned char           s_blocksize_bits;
        unsigned char           s_dirt;
-       unsigned long long      s_maxbytes;     /* Max file size */
+       loff_t                  s_maxbytes;     /* Max file size */
        struct file_system_type *s_type;
        const struct super_operations   *s_op;
        const struct dquot_operations   *dq_op;
@@ -2156,6 +2156,7 @@ extern ino_t iunique(struct super_block *, ino_t);
 extern int inode_needs_sync(struct inode *inode);
 extern void generic_delete_inode(struct inode *inode);
 extern void generic_drop_inode(struct inode *inode);
+extern int generic_detach_inode(struct inode *inode);
 
 extern struct inode *ilookup5_nowait(struct super_block *sb,
                unsigned long hashval, int (*test)(struct inode *, void *),
@@ -2334,6 +2335,7 @@ extern void get_filesystem(struct file_system_type *fs);
 extern void put_filesystem(struct file_system_type *fs);
 extern struct file_system_type *get_fs_type(const char *name);
 extern struct super_block *get_super(struct block_device *);
+extern struct super_block *get_active_super(struct block_device *bdev);
 extern struct super_block *user_get_super(dev_t);
 extern void drop_super(struct super_block *sb);
 
@@ -2381,7 +2383,8 @@ extern int buffer_migrate_page(struct address_space *,
 #define buffer_migrate_page NULL
 #endif
 
-extern int inode_change_ok(struct inode *, struct iattr *);
+extern int inode_change_ok(const struct inode *, struct iattr *);
+extern int inode_newsize_ok(const struct inode *, loff_t offset);
 extern int __must_check inode_setattr(struct inode *, struct iattr *);
 
 extern void file_update_time(struct file *file);
@@ -2467,7 +2470,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
                          size_t len, loff_t *ppos);
 
 struct ctl_table;
-int proc_nr_files(struct ctl_table *table, int write, struct file *filp,
+int proc_nr_files(struct ctl_table *table, int write,
                  void __user *buffer, size_t *lenp, loff_t *ppos);
 
 int __init get_filesystem_list(char *buf);
index 3c0924a..cd3d2ab 100644 (file)
@@ -19,7 +19,7 @@
 extern int ftrace_enabled;
 extern int
 ftrace_enable_sysctl(struct ctl_table *table, int write,
-                    struct file *filp, void __user *buffer, size_t *lenp,
+                    void __user *buffer, size_t *lenp,
                     loff_t *ppos);
 
 typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
@@ -94,7 +94,7 @@ static inline void ftrace_start(void) { }
 extern int stack_tracer_enabled;
 int
 stack_trace_sysctl(struct ctl_table *table, int write,
-                  struct file *file, void __user *buffer, size_t *lenp,
+                  void __user *buffer, size_t *lenp,
                   loff_t *ppos);
 #endif
 
index 34956c8..8ec1799 100644 (file)
@@ -4,11 +4,6 @@
 #include <linux/compiler.h>
 #include <linux/types.h>
 
-struct inode;
-struct mm_struct;
-struct task_struct;
-union ktime;
-
 /* Second argument to futex syscall */
 
 
@@ -129,6 +124,11 @@ struct robust_list_head {
 #define FUTEX_BITSET_MATCH_ANY 0xffffffff
 
 #ifdef __KERNEL__
+struct inode;
+struct mm_struct;
+struct task_struct;
+union ktime;
+
 long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout,
              u32 __user *uaddr2, u32 val2, u32 val3);
 
index 176e7ee..1693799 100644 (file)
@@ -3,15 +3,15 @@
 
 #include <linux/fs.h>
 
+struct ctl_table;
+struct user_struct;
+
 #ifdef CONFIG_HUGETLB_PAGE
 
 #include <linux/mempolicy.h>
 #include <linux/shm.h>
 #include <asm/tlbflush.h>
 
-struct ctl_table;
-struct user_struct;
-
 int PageHuge(struct page *page);
 
 static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
@@ -20,9 +20,9 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
 }
 
 void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
-int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
-int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
-int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
+int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
+int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
+int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
 int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
                        struct page **, struct vm_area_struct **,
@@ -187,7 +187,11 @@ static inline void set_file_hugepages(struct file *file)
 
 #define is_file_hugepages(file)                        0
 #define set_file_hugepages(file)               BUG()
-#define hugetlb_file_setup(name,size,acct,user,creat)  ERR_PTR(-ENOSYS)
+static inline struct file *hugetlb_file_setup(const char *name, size_t size,
+               int acctflag, struct user_struct **user, int creat_flags)
+{
+       return ERR_PTR(-ENOSYS);
+}
 
 #endif /* !CONFIG_HUGETLBFS */
 
index e46a073..bf9213b 100644 (file)
@@ -118,6 +118,9 @@ static inline bool mem_cgroup_disabled(void)
 
 extern bool mem_cgroup_oom_called(struct task_struct *task);
 void mem_cgroup_update_mapped_file_stat(struct page *page, int val);
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+                                               gfp_t gfp_mask, int nid,
+                                               int zid);
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 struct mem_cgroup;
 
@@ -276,6 +279,13 @@ static inline void mem_cgroup_update_mapped_file_stat(struct page *page,
 {
 }
 
+static inline
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+                                           gfp_t gfp_mask, int nid, int zid)
+{
+       return 0;
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
index b6eae5e..24c3956 100644 (file)
@@ -288,7 +288,7 @@ static inline int is_vmalloc_addr(const void *x)
 #ifdef CONFIG_MMU
 extern int is_vmalloc_or_module_addr(const void *x);
 #else
-static int is_vmalloc_or_module_addr(const void *x)
+static inline int is_vmalloc_or_module_addr(const void *x)
 {
        return 0;
 }
@@ -695,11 +695,12 @@ static inline int page_mapped(struct page *page)
 #define VM_FAULT_SIGBUS        0x0002
 #define VM_FAULT_MAJOR 0x0004
 #define VM_FAULT_WRITE 0x0008  /* Special case for get_user_pages */
+#define VM_FAULT_HWPOISON 0x0010       /* Hit poisoned page */
 
 #define VM_FAULT_NOPAGE        0x0100  /* ->fault installed the pte, not return page */
 #define VM_FAULT_LOCKED        0x0200  /* ->fault locked the returned page */
 
-#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS)
+#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)
 
 /*
  * Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
@@ -791,8 +792,14 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
        unmap_mapping_range(mapping, holebegin, holelen, 0);
 }
 
-extern int vmtruncate(struct inode * inode, loff_t offset);
-extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
+extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
+extern int vmtruncate(struct inode *inode, loff_t offset);
+extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
+
+int truncate_inode_page(struct address_space *mapping, struct page *page);
+int generic_error_remove_page(struct address_space *mapping, struct page *page);
+
+int invalidate_inode_page(struct page *page);
 
 #ifdef CONFIG_MMU
 extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -1279,7 +1286,7 @@ int in_gate_area_no_task(unsigned long addr);
 #define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);})
 #endif /* __HAVE_ARCH_GATE_AREA */
 
-int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
+int drop_caches_sysctl_handler(struct ctl_table *, int,
                                        void __user *, size_t *, loff_t *);
 unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
                        unsigned long lru_pages);
@@ -1308,5 +1315,12 @@ void vmemmap_populate_print_last(void);
 extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
                                 size_t size);
 extern void refund_locked_memory(struct mm_struct *mm, size_t size);
+
+extern void memory_failure(unsigned long pfn, int trapno);
+extern int __memory_failure(unsigned long pfn, int trapno, int ref);
+extern int sysctl_memory_failure_early_kill;
+extern int sysctl_memory_failure_recovery;
+extern atomic_long_t mce_bad_pages;
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
index 0042090..21d6aa4 100644 (file)
@@ -240,6 +240,8 @@ struct mm_struct {
 
        unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
 
+       struct linux_binfmt *binfmt;
+
        cpumask_t cpu_vm_mask;
 
        /* Architecture-specific MM context */
@@ -259,11 +261,10 @@ struct mm_struct {
        unsigned long flags; /* Must use atomic bitops to access the bits */
 
        struct core_state *core_state; /* coredumping support */
-
-       /* aio bits */
+#ifdef CONFIG_AIO
        spinlock_t              ioctx_lock;
        struct hlist_head       ioctx_list;
-
+#endif
 #ifdef CONFIG_MM_OWNER
        /*
         * "owner" points to a task that is regarded as the canonical
index 652ef01..6f75617 100644 (file)
@@ -755,21 +755,20 @@ static inline int is_dma(struct zone *zone)
 
 /* These two functions are used to setup the per zone pages min values */
 struct ctl_table;
-struct file;
-int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *, 
+int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
                                        void __user *, size_t *, loff_t *);
 extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
-int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
+int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int,
                                        void __user *, size_t *, loff_t *);
-int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *,
+int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
                                        void __user *, size_t *, loff_t *);
 int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
-                       struct file *, void __user *, size_t *, loff_t *);
+                       void __user *, size_t *, loff_t *);
 int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
-                       struct file *, void __user *, size_t *, loff_t *);
+                       void __user *, size_t *, loff_t *);
 
 extern int numa_zonelist_order_handler(struct ctl_table *, int,
-                       struct file *, void __user *, size_t *, loff_t *);
+                       void __user *, size_t *, loff_t *);
 extern char numa_zonelist_order[];
 #define NUMA_ZONELIST_ORDER_LEN 16     /* string buffer size */
 
index 1c755b2..482efc8 100644 (file)
@@ -128,7 +128,10 @@ extern struct module __this_module;
  */
 #define MODULE_LICENSE(_license) MODULE_INFO(license, _license)
 
-/* Author, ideally of form NAME[, NAME]*[ and NAME] */
+/*
+ * Author(s), use "Name <email>" or just "Name", for multiple
+ * authors use multiple MODULE_AUTHOR() statements/lines.
+ */
 #define MODULE_AUTHOR(_author) MODULE_INFO(author, _author)
   
 /* What your module does. */
@@ -308,10 +311,14 @@ struct module
 #endif
 
 #ifdef CONFIG_KALLSYMS
-       /* We keep the symbol and string tables for kallsyms. */
-       Elf_Sym *symtab;
-       unsigned int num_symtab;
-       char *strtab;
+       /*
+        * We keep the symbol and string tables for kallsyms.
+        * The core_* fields below are temporary, loader-only (they
+        * could really be discarded after module init).
+        */
+       Elf_Sym *symtab, *core_symtab;
+       unsigned int num_symtab, core_num_syms;
+       char *strtab, *core_strtab;
 
        /* Section attributes */
        struct module_sect_attrs *sect_attrs;
index 13de789..6b202b1 100644 (file)
@@ -51,6 +51,9 @@
  * PG_buddy is set to indicate that the page is free and in the buddy system
  * (see mm/page_alloc.c).
  *
+ * PG_hwpoison indicates that a page got corrupted in hardware and contains
+ * data with incorrect ECC bits that triggered a machine check. Accessing is
+ * not safe since it may cause another machine check. Don't touch!
  */
 
 /*
@@ -101,6 +104,9 @@ enum pageflags {
 #endif
 #ifdef CONFIG_ARCH_USES_PG_UNCACHED
        PG_uncached,            /* Page has been mapped as uncached */
+#endif
+#ifdef CONFIG_MEMORY_FAILURE
+       PG_hwpoison,            /* hardware poisoned page. Don't touch */
 #endif
        __NR_PAGEFLAGS,
 
@@ -269,6 +275,15 @@ PAGEFLAG(Uncached, uncached)
 PAGEFLAG_FALSE(Uncached)
 #endif
 
+#ifdef CONFIG_MEMORY_FAILURE
+PAGEFLAG(HWPoison, hwpoison)
+TESTSETFLAG(HWPoison, hwpoison)
+#define __PG_HWPOISON (1UL << PG_hwpoison)
+#else
+PAGEFLAG_FALSE(HWPoison)
+#define __PG_HWPOISON 0
+#endif
+
 static inline int PageUptodate(struct page *page)
 {
        int ret = test_bit(PG_uptodate, &(page)->flags);
@@ -393,7 +408,7 @@ static inline void __ClearPageTail(struct page *page)
         1 << PG_private | 1 << PG_private_2 | \
         1 << PG_buddy   | 1 << PG_writeback | 1 << PG_reserved | \
         1 << PG_slab    | 1 << PG_swapcache | 1 << PG_active | \
-        1 << PG_unevictable | __PG_MLOCKED)
+        1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON)
 
 /*
  * Flags checked when a page is prepped for return by the page allocator.
index ada779f..4b938d4 100644 (file)
@@ -38,6 +38,7 @@ enum {
        PCG_LOCK,  /* page cgroup is locked */
        PCG_CACHE, /* charged as cache */
        PCG_USED, /* this object is in use. */
+       PCG_ACCT_LRU, /* page has been accounted for */
 };
 
 #define TESTPCGFLAG(uname, lname)                      \
@@ -52,11 +53,23 @@ static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
 static inline void ClearPageCgroup##uname(struct page_cgroup *pc)      \
        { clear_bit(PCG_##lname, &pc->flags);  }
 
+#define TESTCLEARPCGFLAG(uname, lname)                 \
+static inline int TestClearPageCgroup##uname(struct page_cgroup *pc)   \
+       { return test_and_clear_bit(PCG_##lname, &pc->flags);  }
+
 /* Cache flag is set only once (at allocation) */
 TESTPCGFLAG(Cache, CACHE)
+CLEARPCGFLAG(Cache, CACHE)
+SETPCGFLAG(Cache, CACHE)
 
 TESTPCGFLAG(Used, USED)
 CLEARPCGFLAG(Used, USED)
+SETPCGFLAG(Used, USED)
+
+SETPCGFLAG(AcctLRU, ACCT_LRU)
+CLEARPCGFLAG(AcctLRU, ACCT_LRU)
+TESTPCGFLAG(AcctLRU, ACCT_LRU)
+TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU)
 
 static inline int page_cgroup_nid(struct page_cgroup *pc)
 {
index 7803565..da1fda8 100644 (file)
 #define PCI_DEVICE_ID_INTEL_E7525_MCH  0x359e
 #define PCI_DEVICE_ID_INTEL_IOAT_CNB   0x360b
 #define PCI_DEVICE_ID_INTEL_FBD_CNB    0x360c
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF0  0x3710
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF1  0x3711
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF2  0x3712
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF3  0x3713
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF4  0x3714
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF5  0x3715
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF6  0x3716
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF7  0x3717
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF8  0x3718
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF9  0x3719
 #define PCI_DEVICE_ID_INTEL_ICH10_0    0x3a14
 #define PCI_DEVICE_ID_INTEL_ICH10_1    0x3a16
 #define PCI_DEVICE_ID_INTEL_ICH10_2    0x3a18
index 07bff66..9311505 100644 (file)
@@ -88,4 +88,6 @@
 #define PR_TASK_PERF_EVENTS_DISABLE            31
 #define PR_TASK_PERF_EVENTS_ENABLE             32
 
+#define PR_MCE_KILL    33
+
 #endif /* _LINUX_PRCTL_H */
index 953fc05..14a86bc 100644 (file)
@@ -140,7 +140,7 @@ struct rchan_callbacks
         * cause relay_open() to create a single global buffer rather
         * than the default set of per-cpu buffers.
         *
-        * See Documentation/filesystems/relayfs.txt for more info.
+        * See Documentation/filesystems/relay.txt for more info.
         */
        struct dentry *(*create_buf_file)(const char *filename,
                                          struct dentry *parent,
index 511f42f..731af71 100644 (file)
@@ -34,6 +34,10 @@ struct res_counter {
         * the limit that usage cannot exceed
         */
        unsigned long long limit;
+       /*
+        * the limit that usage can be exceed
+        */
+       unsigned long long soft_limit;
        /*
         * the number of unsuccessful attempts to consume the resource
         */
@@ -87,6 +91,7 @@ enum {
        RES_MAX_USAGE,
        RES_LIMIT,
        RES_FAILCNT,
+       RES_SOFT_LIMIT,
 };
 
 /*
@@ -109,7 +114,8 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent);
 int __must_check res_counter_charge_locked(struct res_counter *counter,
                unsigned long val);
 int __must_check res_counter_charge(struct res_counter *counter,
-               unsigned long val, struct res_counter **limit_fail_at);
+               unsigned long val, struct res_counter **limit_fail_at,
+               struct res_counter **soft_limit_at);
 
 /*
  * uncharge - tell that some portion of the resource is released
@@ -122,7 +128,8 @@ int __must_check res_counter_charge(struct res_counter *counter,
  */
 
 void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
-void res_counter_uncharge(struct res_counter *counter, unsigned long val);
+void res_counter_uncharge(struct res_counter *counter, unsigned long val,
+                               bool *was_soft_limit_excess);
 
 static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
 {
@@ -132,6 +139,36 @@ static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
        return false;
 }
 
+static inline bool res_counter_soft_limit_check_locked(struct res_counter *cnt)
+{
+       if (cnt->usage < cnt->soft_limit)
+               return true;
+
+       return false;
+}
+
+/**
+ * Get the difference between the usage and the soft limit
+ * @cnt: The counter
+ *
+ * Returns 0 if usage is less than or equal to soft limit
+ * The difference between usage and soft limit, otherwise.
+ */
+static inline unsigned long long
+res_counter_soft_limit_excess(struct res_counter *cnt)
+{
+       unsigned long long excess;
+       unsigned long flags;
+
+       spin_lock_irqsave(&cnt->lock, flags);
+       if (cnt->usage <= cnt->soft_limit)
+               excess = 0;
+       else
+               excess = cnt->usage - cnt->soft_limit;
+       spin_unlock_irqrestore(&cnt->lock, flags);
+       return excess;
+}
+
 /*
  * Helper function to detect if the cgroup is within it's limit or
  * not. It's currently called from cgroup_rss_prepare()
@@ -147,6 +184,17 @@ static inline bool res_counter_check_under_limit(struct res_counter *cnt)
        return ret;
 }
 
+static inline bool res_counter_check_under_soft_limit(struct res_counter *cnt)
+{
+       bool ret;
+       unsigned long flags;
+
+       spin_lock_irqsave(&cnt->lock, flags);
+       ret = res_counter_soft_limit_check_locked(cnt);
+       spin_unlock_irqrestore(&cnt->lock, flags);
+       return ret;
+}
+
 static inline void res_counter_reset_max(struct res_counter *cnt)
 {
        unsigned long flags;
@@ -180,4 +228,16 @@ static inline int res_counter_set_limit(struct res_counter *cnt,
        return ret;
 }
 
+static inline int
+res_counter_set_soft_limit(struct res_counter *cnt,
+                               unsigned long long soft_limit)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&cnt->lock, flags);
+       cnt->soft_limit = soft_limit;
+       spin_unlock_irqrestore(&cnt->lock, flags);
+       return 0;
+}
+
 #endif
index 477841d..cb0ba70 100644 (file)
@@ -81,7 +81,19 @@ static inline void page_dup_rmap(struct page *page)
  */
 int page_referenced(struct page *, int is_locked,
                        struct mem_cgroup *cnt, unsigned long *vm_flags);
-int try_to_unmap(struct page *, int ignore_refs);
+enum ttu_flags {
+       TTU_UNMAP = 0,                  /* unmap mode */
+       TTU_MIGRATION = 1,              /* migration mode */
+       TTU_MUNLOCK = 2,                /* munlock mode */
+       TTU_ACTION_MASK = 0xff,
+
+       TTU_IGNORE_MLOCK = (1 << 8),    /* ignore mlock */
+       TTU_IGNORE_ACCESS = (1 << 9),   /* don't age */
+       TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */
+};
+#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
+
+int try_to_unmap(struct page *, enum ttu_flags flags);
 
 /*
  * Called from mm/filemap_xip.c to unmap empty zero page
@@ -108,6 +120,13 @@ int page_mkclean(struct page *);
  */
 int try_to_munlock(struct page *);
 
+/*
+ * Called by memory-failure.c to kill processes.
+ */
+struct anon_vma *page_lock_anon_vma(struct page *page);
+void page_unlock_anon_vma(struct anon_vma *anon_vma);
+int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
+
 #else  /* !CONFIG_MMU */
 
 #define anon_vma_init()                do {} while (0)
index 848d1f2..75e6e60 100644 (file)
@@ -309,7 +309,7 @@ extern void softlockup_tick(void);
 extern void touch_softlockup_watchdog(void);
 extern void touch_all_softlockup_watchdogs(void);
 extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
-                                   struct file *filp, void __user *buffer,
+                                   void __user *buffer,
                                    size_t *lenp, loff_t *ppos);
 extern unsigned int  softlockup_panic;
 extern int softlockup_thresh;
@@ -331,7 +331,7 @@ extern unsigned long sysctl_hung_task_check_count;
 extern unsigned long sysctl_hung_task_timeout_secs;
 extern unsigned long sysctl_hung_task_warnings;
 extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
-                                        struct file *filp, void __user *buffer,
+                                        void __user *buffer,
                                         size_t *lenp, loff_t *ppos);
 #endif
 
@@ -1271,7 +1271,6 @@ struct task_struct {
        struct mm_struct *mm, *active_mm;
 
 /* task state */
-       struct linux_binfmt *binfmt;
        int exit_state;
        int exit_code, exit_signal;
        int pdeath_signal;  /*  The signal sent when the parent dies  */
@@ -1735,6 +1734,7 @@ extern cputime_t task_gtime(struct task_struct *p);
 #define PF_EXITPIDONE  0x00000008      /* pi exit done on shut down */
 #define PF_VCPU                0x00000010      /* I'm a virtual CPU */
 #define PF_FORKNOEXEC  0x00000040      /* forked but didn't exec */
+#define PF_MCE_PROCESS  0x00000080      /* process policy on mce errors */
 #define PF_SUPERPRIV   0x00000100      /* used super-user privileges */
 #define PF_DUMPCORE    0x00000200      /* dumped core */
 #define PF_SIGNALED    0x00000400      /* killed by a signal */
@@ -1754,6 +1754,7 @@ extern cputime_t task_gtime(struct task_struct *p);
 #define PF_SPREAD_PAGE 0x01000000      /* Spread page cache over cpuset */
 #define PF_SPREAD_SLAB 0x02000000      /* Spread some slab caches over cpuset */
 #define PF_THREAD_BOUND        0x04000000      /* Thread bound to specific cpu */
+#define PF_MCE_EARLY    0x08000000      /* Early kill for mce process policy */
 #define PF_MEMPOLICY   0x10000000      /* Non-default NUMA mempolicy */
 #define PF_MUTEX_TESTER        0x20000000      /* Thread belongs to the rt mutex tester */
 #define PF_FREEZER_SKIP        0x40000000      /* Freezer should not count it as freezeable */
@@ -1906,7 +1907,7 @@ extern unsigned int sysctl_sched_time_avg;
 extern unsigned int sysctl_timer_migration;
 
 int sched_nr_latency_handler(struct ctl_table *table, int write,
-               struct file *file, void __user *buffer, size_t *length,
+               void __user *buffer, size_t *length,
                loff_t *ppos);
 #endif
 #ifdef CONFIG_SCHED_DEBUG
@@ -1924,7 +1925,7 @@ extern unsigned int sysctl_sched_rt_period;
 extern int sysctl_sched_rt_runtime;
 
 int sched_rt_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                loff_t *ppos);
 
 extern unsigned int sysctl_sched_compat_yield;
@@ -2059,6 +2060,7 @@ extern int kill_pgrp(struct pid *pid, int sig, int priv);
 extern int kill_pid(struct pid *pid, int sig, int priv);
 extern int kill_proc_info(int, struct siginfo *, pid_t);
 extern int do_notify_parent(struct task_struct *, int);
+extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
 extern void force_sig(int, struct task_struct *);
 extern void force_sig_specific(int, struct task_struct *);
 extern int send_sig(int, struct task_struct *, int);
@@ -2336,7 +2338,10 @@ static inline int signal_pending(struct task_struct *p)
        return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
 }
 
-extern int __fatal_signal_pending(struct task_struct *p);
+static inline int __fatal_signal_pending(struct task_struct *p)
+{
+       return unlikely(sigismember(&p->pending.signal, SIGKILL));
+}
 
 static inline int fatal_signal_pending(struct task_struct *p)
 {
index d050b66..239e40d 100644 (file)
@@ -133,7 +133,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
                return PAGE_ALIGN(mmap_min_addr);
        return hint;
 }
-extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp,
+extern int mmap_min_addr_handler(struct ctl_table *table, int write,
                                 void __user *buffer, size_t *lenp, loff_t *ppos);
 
 #ifdef CONFIG_SECURITY
index 0c6a86b..8366d8f 100644 (file)
@@ -35,6 +35,44 @@ struct seq_operations {
 
 #define SEQ_SKIP 1
 
+/**
+ * seq_get_buf - get buffer to write arbitrary data to
+ * @m: the seq_file handle
+ * @bufp: the beginning of the buffer is stored here
+ *
+ * Return the number of bytes available in the buffer, or zero if
+ * there's no space.
+ */
+static inline size_t seq_get_buf(struct seq_file *m, char **bufp)
+{
+       BUG_ON(m->count > m->size);
+       if (m->count < m->size)
+               *bufp = m->buf + m->count;
+       else
+               *bufp = NULL;
+
+       return m->size - m->count;
+}
+
+/**
+ * seq_commit - commit data to the buffer
+ * @m: the seq_file handle
+ * @num: the number of bytes to commit
+ *
+ * Commit @num bytes of data written to a buffer previously acquired
+ * by seq_buf_get.  To signal an error condition, or that the data
+ * didn't fit in the available space, pass a negative @num value.
+ */
+static inline void seq_commit(struct seq_file *m, int num)
+{
+       if (num < 0) {
+               m->count = m->size;
+       } else {
+               BUG_ON(m->count + num > m->size);
+               m->count += num;
+       }
+}
+
 char *mangle_path(char *s, char *p, char *esc);
 int seq_open(struct file *, const struct seq_operations *);
 ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
index c755283..ab9272c 100644 (file)
@@ -233,6 +233,8 @@ static inline int valid_signal(unsigned long sig)
 }
 
 extern int next_signal(struct sigpending *pending, sigset_t *mask);
+extern int do_send_sig_info(int sig, struct siginfo *info,
+                               struct task_struct *p, bool group);
 extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
 extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
 extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
index 6c990e6..4ec9001 100644 (file)
@@ -34,15 +34,37 @@ static inline int current_is_kswapd(void)
  * the type/offset into the pte as 5/27 as well.
  */
 #define MAX_SWAPFILES_SHIFT    5
-#ifndef CONFIG_MIGRATION
-#define MAX_SWAPFILES          (1 << MAX_SWAPFILES_SHIFT)
+
+/*
+ * Use some of the swap files numbers for other purposes. This
+ * is a convenient way to hook into the VM to trigger special
+ * actions on faults.
+ */
+
+/*
+ * NUMA node memory migration support
+ */
+#ifdef CONFIG_MIGRATION
+#define SWP_MIGRATION_NUM 2
+#define SWP_MIGRATION_READ     (MAX_SWAPFILES + SWP_HWPOISON_NUM)
+#define SWP_MIGRATION_WRITE    (MAX_SWAPFILES + SWP_HWPOISON_NUM + 1)
 #else
-/* Use last two entries for page migration swap entries */
-#define MAX_SWAPFILES          ((1 << MAX_SWAPFILES_SHIFT)-2)
-#define SWP_MIGRATION_READ     MAX_SWAPFILES
-#define SWP_MIGRATION_WRITE    (MAX_SWAPFILES + 1)
+#define SWP_MIGRATION_NUM 0
 #endif
 
+/*
+ * Handling of hardware poisoned pages with memory corruption.
+ */
+#ifdef CONFIG_MEMORY_FAILURE
+#define SWP_HWPOISON_NUM 1
+#define SWP_HWPOISON           MAX_SWAPFILES
+#else
+#define SWP_HWPOISON_NUM 0
+#endif
+
+#define MAX_SWAPFILES \
+       ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
+
 /*
  * Magic header for a swap area. The first part of the union is
  * what the swap magic looks like for the old (limited to 128MB)
@@ -217,6 +239,11 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
                                                  gfp_t gfp_mask, bool noswap,
                                                  unsigned int swappiness);
+extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
+                                               gfp_t gfp_mask, bool noswap,
+                                               unsigned int swappiness,
+                                               struct zone *zone,
+                                               int nid);
 extern int __isolate_lru_page(struct page *page, int mode, int file);
 extern unsigned long shrink_all_memory(unsigned long nr_pages);
 extern int vm_swappiness;
@@ -240,7 +267,7 @@ extern int page_evictable(struct page *page, struct vm_area_struct *vma);
 extern void scan_mapping_unevictable_pages(struct address_space *);
 
 extern unsigned long scan_unevictable_pages;
-extern int scan_unevictable_handler(struct ctl_table *, int, struct file *,
+extern int scan_unevictable_handler(struct ctl_table *, int,
                                        void __user *, size_t *, loff_t *);
 extern int scan_unevictable_register_node(struct node *node);
 extern void scan_unevictable_unregister_node(struct node *node);
index 6ec39ab..cd42e30 100644 (file)
@@ -131,3 +131,41 @@ static inline int is_write_migration_entry(swp_entry_t entry)
 
 #endif
 
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Support for hardware poisoned pages
+ */
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+       BUG_ON(!PageLocked(page));
+       return swp_entry(SWP_HWPOISON, page_to_pfn(page));
+}
+
+static inline int is_hwpoison_entry(swp_entry_t entry)
+{
+       return swp_type(entry) == SWP_HWPOISON;
+}
+#else
+
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+       return swp_entry(0, 0);
+}
+
+static inline int is_hwpoison_entry(swp_entry_t swp)
+{
+       return 0;
+}
+#endif
+
+#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION)
+static inline int non_swap_entry(swp_entry_t entry)
+{
+       return swp_type(entry) >= MAX_SWAPFILES;
+}
+#else
+static inline int non_swap_entry(swp_entry_t entry)
+{
+       return 0;
+}
+#endif
index e76d3b2..1e4743e 100644 (file)
@@ -29,7 +29,6 @@
 #include <linux/types.h>
 #include <linux/compiler.h>
 
-struct file;
 struct completion;
 
 #define CTL_MAXNAME 10         /* how many path components do we allow in a
@@ -977,25 +976,25 @@ typedef int ctl_handler (struct ctl_table *table,
                         void __user *oldval, size_t __user *oldlenp,
                         void __user *newval, size_t newlen);
 
-typedef int proc_handler (struct ctl_table *ctl, int write, struct file * filp,
+typedef int proc_handler (struct ctl_table *ctl, int write,
                          void __user *buffer, size_t *lenp, loff_t *ppos);
 
-extern int proc_dostring(struct ctl_table *, int, struct file *,
+extern int proc_dostring(struct ctl_table *, int,
                         void __user *, size_t *, loff_t *);
-extern int proc_dointvec(struct ctl_table *, int, struct file *,
+extern int proc_dointvec(struct ctl_table *, int,
                         void __user *, size_t *, loff_t *);
-extern int proc_dointvec_minmax(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_minmax(struct ctl_table *, int,
                                void __user *, size_t *, loff_t *);
-extern int proc_dointvec_jiffies(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_jiffies(struct ctl_table *, int,
                                 void __user *, size_t *, loff_t *);
-extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int,
                                        void __user *, size_t *, loff_t *);
-extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_ms_jiffies(struct ctl_table *, int,
                                    void __user *, size_t *, loff_t *);
-extern int proc_doulongvec_minmax(struct ctl_table *, int, struct file *,
+extern int proc_doulongvec_minmax(struct ctl_table *, int,
                                  void __user *, size_t *, loff_t *);
 extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int,
-                                     struct file *, void __user *, size_t *, loff_t *);
+                                     void __user *, size_t *, loff_t *);
 
 extern int do_sysctl (int __user *name, int nlen,
                      void __user *oldval, size_t __user *oldlenp,
index 56787c0..fe04e5e 100644 (file)
@@ -155,6 +155,34 @@ extern void timekeeping_leap_insert(int leapsecond);
 struct tms;
 extern void do_sys_times(struct tms *);
 
+/*
+ * Similar to the struct tm in userspace <time.h>, but it needs to be here so
+ * that the kernel source is self contained.
+ */
+struct tm {
+       /*
+        * the number of seconds after the minute, normally in the range
+        * 0 to 59, but can be up to 60 to allow for leap seconds
+        */
+       int tm_sec;
+       /* the number of minutes after the hour, in the range 0 to 59*/
+       int tm_min;
+       /* the number of hours past midnight, in the range 0 to 23 */
+       int tm_hour;
+       /* the day of the month, in the range 1 to 31 */
+       int tm_mday;
+       /* the number of months since January, in the range 0 to 11 */
+       int tm_mon;
+       /* the number of years since 1900 */
+       long tm_year;
+       /* the number of days since Sunday, in the range 0 to 6 */
+       int tm_wday;
+       /* the number of days since January 1, in the range 0 to 365 */
+       int tm_yday;
+};
+
+void time_to_tm(time_t totalsecs, int offset, struct tm *result);
+
 /**
  * timespec_to_ns - Convert timespec to nanoseconds
  * @ts:                pointer to the timespec variable to be converted
index 17ba82e..1eb44a9 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Tracing hooks
  *
- * Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2008-2009 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -463,22 +463,38 @@ static inline int tracehook_get_signal(struct task_struct *task,
 
 /**
  * tracehook_notify_jctl - report about job control stop/continue
- * @notify:            nonzero if this is the last thread in the group to stop
+ * @notify:            zero, %CLD_STOPPED or %CLD_CONTINUED
  * @why:               %CLD_STOPPED or %CLD_CONTINUED
  *
  * This is called when we might call do_notify_parent_cldstop().
- * It's called when about to stop for job control; we are already in
- * %TASK_STOPPED state, about to call schedule().  It's also called when
- * a delayed %CLD_STOPPED or %CLD_CONTINUED report is ready to be made.
  *
- * Return nonzero to generate a %SIGCHLD with @why, which is
- * normal if @notify is nonzero.
+ * @notify is zero if we would not ordinarily send a %SIGCHLD,
+ * or is the %CLD_STOPPED or %CLD_CONTINUED .si_code for %SIGCHLD.
  *
- * Called with no locks held.
+ * @why is %CLD_STOPPED when about to stop for job control;
+ * we are already in %TASK_STOPPED state, about to call schedule().
+ * It might also be that we have just exited (check %PF_EXITING),
+ * but need to report that a group-wide stop is complete.
+ *
+ * @why is %CLD_CONTINUED when waking up after job control stop and
+ * ready to make a delayed @notify report.
+ *
+ * Return the %CLD_* value for %SIGCHLD, or zero to generate no signal.
+ *
+ * Called with the siglock held.
  */
 static inline int tracehook_notify_jctl(int notify, int why)
 {
-       return notify || (current->ptrace & PT_PTRACED);
+       return notify ?: (current->ptrace & PT_PTRACED) ? why : 0;
+}
+
+/**
+ * tracehook_finish_jctl - report about return from job control stop
+ *
+ * This is called by do_signal_stop() after wakeup.
+ */
+static inline void tracehook_finish_jctl(void)
+{
 }
 
 #define DEATH_REAP                     -1
index 63a3f7a..660a9de 100644 (file)
@@ -4,7 +4,7 @@
 /*
  * Kernel Tracepoint API.
  *
- * See Documentation/tracepoint.txt.
+ * See Documentation/trace/tracepoints.txt.
  *
  * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
  *
index 46dd12c..9356b24 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H
 #define _LINUX_UNALIGNED_BE_BYTESHIFT_H
 
-#include <linux/kernel.h>
+#include <linux/types.h>
 
 static inline u16 __get_unaligned_be16(const u8 *p)
 {
index 59777e9..be376fb 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H
 #define _LINUX_UNALIGNED_LE_BYTESHIFT_H
 
-#include <linux/kernel.h>
+#include <linux/types.h>
 
 static inline u16 __get_unaligned_le16(const u8 *p)
 {
index 923f904..2dfaa29 100644 (file)
@@ -1,5 +1,6 @@
 /*
- * vgaarb.c
+ * The VGA aribiter manages VGA space routing and VGA resource decode to
+ * allow multiple VGA devices to be used in a system in a safe way.
  *
  * (C) Copyright 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
  * (C) Copyright 2007 Paulo R. Zanoni <przanoni@gmail.com>
index 75cf586..66ebddc 100644 (file)
@@ -110,21 +110,20 @@ extern int laptop_mode;
 extern unsigned long determine_dirtyable_memory(void);
 
 extern int dirty_background_ratio_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                loff_t *ppos);
 extern int dirty_background_bytes_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                loff_t *ppos);
 extern int dirty_ratio_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                loff_t *ppos);
 extern int dirty_bytes_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                loff_t *ppos);
 
 struct ctl_table;
-struct file;
-int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *,
+int dirty_writeback_centisecs_handler(struct ctl_table *, int,
                                      void __user *, size_t *, loff_t *);
 
 void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
index 72c3692..5b26a0b 100644 (file)
@@ -399,7 +399,7 @@ extern void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport,
  * fed into the routing cache should use these handlers.
  */
 int ipv4_doint_and_flush(ctl_table *ctl, int write,
-                        struct file* filp, void __user *buffer,
+                        void __user *buffer,
                         size_t *lenp, loff_t *ppos);
 int ipv4_doint_and_flush_strategy(ctl_table *table,
                                  void __user *oldval, size_t __user *oldlenp,
index 1459ed3..f76f22d 100644 (file)
@@ -55,7 +55,6 @@ enum {
 #include <net/neighbour.h>
 
 struct ctl_table;
-struct file;
 struct inet6_dev;
 struct net_device;
 struct net_proto_family;
@@ -139,7 +138,6 @@ extern int                  igmp6_event_report(struct sk_buff *skb);
 #ifdef CONFIG_SYSCTL
 extern int                     ndisc_ifinfo_sysctl_change(struct ctl_table *ctl,
                                                           int write,
-                                                          struct file * filp,
                                                           void __user *buffer,
                                                           size_t *lenp,
                                                           loff_t *ppos);
index 40eab73..7d37047 100644 (file)
@@ -27,18 +27,18 @@ static void *get_ipc(ctl_table *table)
 }
 
 #ifdef CONFIG_PROC_SYSCTL
-static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp,
+static int proc_ipc_dointvec(ctl_table *table, int write,
        void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        struct ctl_table ipc_table;
        memcpy(&ipc_table, table, sizeof(ipc_table));
        ipc_table.data = get_ipc(table);
 
-       return proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
+       return proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
 }
 
 static int proc_ipc_callback_dointvec(ctl_table *table, int write,
-       struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        struct ctl_table ipc_table;
        size_t lenp_bef = *lenp;
@@ -47,7 +47,7 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write,
        memcpy(&ipc_table, table, sizeof(ipc_table));
        ipc_table.data = get_ipc(table);
 
-       rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
+       rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
 
        if (write && !rc && lenp_bef == *lenp)
                /*
@@ -61,13 +61,13 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write,
 }
 
 static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
-       struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        struct ctl_table ipc_table;
        memcpy(&ipc_table, table, sizeof(ipc_table));
        ipc_table.data = get_ipc(table);
 
-       return proc_doulongvec_minmax(&ipc_table, write, filp, buffer,
+       return proc_doulongvec_minmax(&ipc_table, write, buffer,
                                        lenp, ppos);
 }
 
@@ -95,7 +95,7 @@ static void ipc_auto_callback(int val)
 }
 
 static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
-       struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        struct ctl_table ipc_table;
        size_t lenp_bef = *lenp;
@@ -106,7 +106,7 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
        ipc_table.data = get_ipc(table);
        oldval = *((int *)(ipc_table.data));
 
-       rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos);
+       rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
 
        if (write && !rc && lenp_bef == *lenp) {
                int newval = *((int *)(ipc_table.data));
index 24ae46d..8a05871 100644 (file)
@@ -31,24 +31,24 @@ static void *get_mq(ctl_table *table)
        return which;
 }
 
-static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp,
+static int proc_mq_dointvec(ctl_table *table, int write,
        void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        struct ctl_table mq_table;
        memcpy(&mq_table, table, sizeof(mq_table));
        mq_table.data = get_mq(table);
 
-       return proc_dointvec(&mq_table, write, filp, buffer, lenp, ppos);
+       return proc_dointvec(&mq_table, write, buffer, lenp, ppos);
 }
 
 static int proc_mq_dointvec_minmax(ctl_table *table, int write,
-       struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        struct ctl_table mq_table;
        memcpy(&mq_table, table, sizeof(mq_table));
        mq_table.data = get_mq(table);
 
-       return proc_dointvec_minmax(&mq_table, write, filp, buffer,
+       return proc_dointvec_minmax(&mq_table, write, buffer,
                                        lenp, ppos);
 }
 #else
index 187c89b..b8d4cd8 100644 (file)
@@ -58,7 +58,6 @@ obj-$(CONFIG_KEXEC) += kexec.o
 obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
 obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_CGROUPS) += cgroup.o
-obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
 obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
 obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
index defc2e6..5feed23 100644 (file)
@@ -855,18 +855,24 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                break;
        }
        case AUDIT_SIGNAL_INFO:
-               err = security_secid_to_secctx(audit_sig_sid, &ctx, &len);
-               if (err)
-                       return err;
+               len = 0;
+               if (audit_sig_sid) {
+                       err = security_secid_to_secctx(audit_sig_sid, &ctx, &len);
+                       if (err)
+                               return err;
+               }
                sig_data = kmalloc(sizeof(*sig_data) + len, GFP_KERNEL);
                if (!sig_data) {
-                       security_release_secctx(ctx, len);
+                       if (audit_sig_sid)
+                               security_release_secctx(ctx, len);
                        return -ENOMEM;
                }
                sig_data->uid = audit_sig_uid;
                sig_data->pid = audit_sig_pid;
-               memcpy(sig_data->ctx, ctx, len);
-               security_release_secctx(ctx, len);
+               if (audit_sig_sid) {
+                       memcpy(sig_data->ctx, ctx, len);
+                       security_release_secctx(ctx, len);
+               }
                audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO,
                                0, 0, sig_data, sizeof(*sig_data) + len);
                kfree(sig_data);
index 0e96dbc..cc7e879 100644 (file)
@@ -45,8 +45,8 @@
 
 struct audit_watch {
        atomic_t                count;  /* reference count */
-       char                    *path;  /* insertion path */
        dev_t                   dev;    /* associated superblock device */
+       char                    *path;  /* insertion path */
        unsigned long           ino;    /* associated inode number */
        struct audit_parent     *parent; /* associated parent */
        struct list_head        wlist;  /* entry in parent->watches list */
index 68d3c6a..267e484 100644 (file)
@@ -168,12 +168,12 @@ struct audit_context {
        int                 in_syscall; /* 1 if task is in a syscall */
        enum audit_state    state, current_state;
        unsigned int        serial;     /* serial number for record */
-       struct timespec     ctime;      /* time of syscall entry */
        int                 major;      /* syscall number */
+       struct timespec     ctime;      /* time of syscall entry */
        unsigned long       argv[4];    /* syscall arguments */
-       int                 return_valid; /* return code is valid */
        long                return_code;/* syscall return code */
        u64                 prio;
+       int                 return_valid; /* return code is valid */
        int                 name_count;
        struct audit_names  names[AUDIT_NAMES];
        char *              filterkey;  /* key for rule that triggered record */
@@ -198,8 +198,8 @@ struct audit_context {
        char                target_comm[TASK_COMM_LEN];
 
        struct audit_tree_refs *trees, *first_trees;
-       int tree_count;
        struct list_head killed_trees;
+       int tree_count;
 
        int type;
        union {
index cd83d99..7ccba4b 100644 (file)
@@ -23,6 +23,7 @@
  */
 
 #include <linux/cgroup.h>
+#include <linux/ctype.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/kernel.h>
@@ -48,6 +49,8 @@
 #include <linux/namei.h>
 #include <linux/smp_lock.h>
 #include <linux/pid_namespace.h>
+#include <linux/idr.h>
+#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
 
 #include <asm/atomic.h>
 
@@ -60,6 +63,8 @@ static struct cgroup_subsys *subsys[] = {
 #include <linux/cgroup_subsys.h>
 };
 
+#define MAX_CGROUP_ROOT_NAMELEN 64
+
 /*
  * A cgroupfs_root represents the root of a cgroup hierarchy,
  * and may be associated with a superblock to form an active
@@ -74,6 +79,9 @@ struct cgroupfs_root {
         */
        unsigned long subsys_bits;
 
+       /* Unique id for this hierarchy. */
+       int hierarchy_id;
+
        /* The bitmask of subsystems currently attached to this hierarchy */
        unsigned long actual_subsys_bits;
 
@@ -94,6 +102,9 @@ struct cgroupfs_root {
 
        /* The path to use for release notifications. */
        char release_agent_path[PATH_MAX];
+
+       /* The name for this hierarchy - may be empty */
+       char name[MAX_CGROUP_ROOT_NAMELEN];
 };
 
 /*
@@ -141,6 +152,10 @@ struct css_id {
 static LIST_HEAD(roots);
 static int root_count;
 
+static DEFINE_IDA(hierarchy_ida);
+static int next_hierarchy_id;
+static DEFINE_SPINLOCK(hierarchy_id_lock);
+
 /* dummytop is a shorthand for the dummy hierarchy's top cgroup */
 #define dummytop (&rootnode.top_cgroup)
 
@@ -201,6 +216,7 @@ struct cg_cgroup_link {
         * cgroup, anchored on cgroup->css_sets
         */
        struct list_head cgrp_link_list;
+       struct cgroup *cgrp;
        /*
         * List running through cg_cgroup_links pointing at a
         * single css_set object, anchored on css_set->cg_links
@@ -227,8 +243,11 @@ static int cgroup_subsys_init_idr(struct cgroup_subsys *ss);
 static DEFINE_RWLOCK(css_set_lock);
 static int css_set_count;
 
-/* hash table for cgroup groups. This improves the performance to
- * find an existing css_set */
+/*
+ * hash table for cgroup groups. This improves the performance to find
+ * an existing css_set. This hash doesn't (currently) take into
+ * account cgroups in empty hierarchies.
+ */
 #define CSS_SET_HASH_BITS      7
 #define CSS_SET_TABLE_SIZE     (1 << CSS_SET_HASH_BITS)
 static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
@@ -248,48 +267,22 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
        return &css_set_table[index];
 }
 
+static void free_css_set_rcu(struct rcu_head *obj)
+{
+       struct css_set *cg = container_of(obj, struct css_set, rcu_head);
+       kfree(cg);
+}
+
 /* We don't maintain the lists running through each css_set to its
  * task until after the first call to cgroup_iter_start(). This
  * reduces the fork()/exit() overhead for people who have cgroups
  * compiled into their kernel but not actually in use */
 static int use_task_css_set_links __read_mostly;
 
-/* When we create or destroy a css_set, the operation simply
- * takes/releases a reference count on all the cgroups referenced
- * by subsystems in this css_set. This can end up multiple-counting
- * some cgroups, but that's OK - the ref-count is just a
- * busy/not-busy indicator; ensuring that we only count each cgroup
- * once would require taking a global lock to ensure that no
- * subsystems moved between hierarchies while we were doing so.
- *
- * Possible TODO: decide at boot time based on the number of
- * registered subsystems and the number of CPUs or NUMA nodes whether
- * it's better for performance to ref-count every subsystem, or to
- * take a global lock and only add one ref count to each hierarchy.
- */
-
-/*
- * unlink a css_set from the list and free it
- */
-static void unlink_css_set(struct css_set *cg)
+static void __put_css_set(struct css_set *cg, int taskexit)
 {
        struct cg_cgroup_link *link;
        struct cg_cgroup_link *saved_link;
-
-       hlist_del(&cg->hlist);
-       css_set_count--;
-
-       list_for_each_entry_safe(link, saved_link, &cg->cg_links,
-                                cg_link_list) {
-               list_del(&link->cg_link_list);
-               list_del(&link->cgrp_link_list);
-               kfree(link);
-       }
-}
-
-static void __put_css_set(struct css_set *cg, int taskexit)
-{
-       int i;
        /*
         * Ensure that the refcount doesn't hit zero while any readers
         * can see it. Similar to atomic_dec_and_lock(), but for an
@@ -302,21 +295,28 @@ static void __put_css_set(struct css_set *cg, int taskexit)
                write_unlock(&css_set_lock);
                return;
        }
-       unlink_css_set(cg);
-       write_unlock(&css_set_lock);
 
-       rcu_read_lock();
-       for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-               struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup);
+       /* This css_set is dead. unlink it and release cgroup refcounts */
+       hlist_del(&cg->hlist);
+       css_set_count--;
+
+       list_for_each_entry_safe(link, saved_link, &cg->cg_links,
+                                cg_link_list) {
+               struct cgroup *cgrp = link->cgrp;
+               list_del(&link->cg_link_list);
+               list_del(&link->cgrp_link_list);
                if (atomic_dec_and_test(&cgrp->count) &&
                    notify_on_release(cgrp)) {
                        if (taskexit)
                                set_bit(CGRP_RELEASABLE, &cgrp->flags);
                        check_for_release(cgrp);
                }
+
+               kfree(link);
        }
-       rcu_read_unlock();
-       kfree(cg);
+
+       write_unlock(&css_set_lock);
+       call_rcu(&cg->rcu_head, free_css_set_rcu);
 }
 
 /*
@@ -337,6 +337,78 @@ static inline void put_css_set_taskexit(struct css_set *cg)
        __put_css_set(cg, 1);
 }
 
+/*
+ * compare_css_sets - helper function for find_existing_css_set().
+ * @cg: candidate css_set being tested
+ * @old_cg: existing css_set for a task
+ * @new_cgrp: cgroup that's being entered by the task
+ * @template: desired set of css pointers in css_set (pre-calculated)
+ *
+ * Returns true if "cg" matches "old_cg" except for the hierarchy
+ * which "new_cgrp" belongs to, for which it should match "new_cgrp".
+ */
+static bool compare_css_sets(struct css_set *cg,
+                            struct css_set *old_cg,
+                            struct cgroup *new_cgrp,
+                            struct cgroup_subsys_state *template[])
+{
+       struct list_head *l1, *l2;
+
+       if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
+               /* Not all subsystems matched */
+               return false;
+       }
+
+       /*
+        * Compare cgroup pointers in order to distinguish between
+        * different cgroups in heirarchies with no subsystems. We
+        * could get by with just this check alone (and skip the
+        * memcmp above) but on most setups the memcmp check will
+        * avoid the need for this more expensive check on almost all
+        * candidates.
+        */
+
+       l1 = &cg->cg_links;
+       l2 = &old_cg->cg_links;
+       while (1) {
+               struct cg_cgroup_link *cgl1, *cgl2;
+               struct cgroup *cg1, *cg2;
+
+               l1 = l1->next;
+               l2 = l2->next;
+               /* See if we reached the end - both lists are equal length. */
+               if (l1 == &cg->cg_links) {
+                       BUG_ON(l2 != &old_cg->cg_links);
+                       break;
+               } else {
+                       BUG_ON(l2 == &old_cg->cg_links);
+               }
+               /* Locate the cgroups associated with these links. */
+               cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
+               cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
+               cg1 = cgl1->cgrp;
+               cg2 = cgl2->cgrp;
+               /* Hierarchies should be linked in the same order. */
+               BUG_ON(cg1->root != cg2->root);
+
+               /*
+                * If this hierarchy is the hierarchy of the cgroup
+                * that's changing, then we need to check that this
+                * css_set points to the new cgroup; if it's any other
+                * hierarchy, then this css_set should point to the
+                * same cgroup as the old css_set.
+                */
+               if (cg1->root == new_cgrp->root) {
+                       if (cg1 != new_cgrp)
+                               return false;
+               } else {
+                       if (cg1 != cg2)
+                               return false;
+               }
+       }
+       return true;
+}
+
 /*
  * find_existing_css_set() is a helper for
  * find_css_set(), and checks to see whether an existing
@@ -378,10 +450,11 @@ static struct css_set *find_existing_css_set(
 
        hhead = css_set_hash(template);
        hlist_for_each_entry(cg, node, hhead, hlist) {
-               if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) {
-                       /* All subsystems matched */
-                       return cg;
-               }
+               if (!compare_css_sets(cg, oldcg, cgrp, template))
+                       continue;
+
+               /* This css_set matches what we need */
+               return cg;
        }
 
        /* No existing cgroup group matched */
@@ -435,8 +508,14 @@ static void link_css_set(struct list_head *tmp_cg_links,
        link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
                                cgrp_link_list);
        link->cg = cg;
+       link->cgrp = cgrp;
+       atomic_inc(&cgrp->count);
        list_move(&link->cgrp_link_list, &cgrp->css_sets);
-       list_add(&link->cg_link_list, &cg->cg_links);
+       /*
+        * Always add links to the tail of the list so that the list
+        * is sorted by order of hierarchy creation
+        */
+       list_add_tail(&link->cg_link_list, &cg->cg_links);
 }
 
 /*
@@ -451,11 +530,11 @@ static struct css_set *find_css_set(
 {
        struct css_set *res;
        struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
-       int i;
 
        struct list_head tmp_cg_links;
 
        struct hlist_head *hhead;
+       struct cg_cgroup_link *link;
 
        /* First see if we already have a cgroup group that matches
         * the desired set */
@@ -489,20 +568,12 @@ static struct css_set *find_css_set(
 
        write_lock(&css_set_lock);
        /* Add reference counts and links from the new css_set. */
-       for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-               struct cgroup *cgrp = res->subsys[i]->cgroup;
-               struct cgroup_subsys *ss = subsys[i];
-               atomic_inc(&cgrp->count);
-               /*
-                * We want to add a link once per cgroup, so we
-                * only do it for the first subsystem in each
-                * hierarchy
-                */
-               if (ss->root->subsys_list.next == &ss->sibling)
-                       link_css_set(&tmp_cg_links, res, cgrp);
+       list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
+               struct cgroup *c = link->cgrp;
+               if (c->root == cgrp->root)
+                       c = cgrp;
+               link_css_set(&tmp_cg_links, res, c);
        }
-       if (list_empty(&rootnode.subsys_list))
-               link_css_set(&tmp_cg_links, res, dummytop);
 
        BUG_ON(!list_empty(&tmp_cg_links));
 
@@ -517,6 +588,41 @@ static struct css_set *find_css_set(
        return res;
 }
 
+/*
+ * Return the cgroup for "task" from the given hierarchy. Must be
+ * called with cgroup_mutex held.
+ */
+static struct cgroup *task_cgroup_from_root(struct task_struct *task,
+                                           struct cgroupfs_root *root)
+{
+       struct css_set *css;
+       struct cgroup *res = NULL;
+
+       BUG_ON(!mutex_is_locked(&cgroup_mutex));
+       read_lock(&css_set_lock);
+       /*
+        * No need to lock the task - since we hold cgroup_mutex the
+        * task can't change groups, so the only thing that can happen
+        * is that it exits and its css is set back to init_css_set.
+        */
+       css = task->cgroups;
+       if (css == &init_css_set) {
+               res = &root->top_cgroup;
+       } else {
+               struct cg_cgroup_link *link;
+               list_for_each_entry(link, &css->cg_links, cg_link_list) {
+                       struct cgroup *c = link->cgrp;
+                       if (c->root == root) {
+                               res = c;
+                               break;
+                       }
+               }
+       }
+       read_unlock(&css_set_lock);
+       BUG_ON(!res);
+       return res;
+}
+
 /*
  * There is one global cgroup mutex. We also require taking
  * task_lock() when dereferencing a task's cgroup subsys pointers.
@@ -677,6 +783,12 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
                 */
                deactivate_super(cgrp->root->sb);
 
+               /*
+                * if we're getting rid of the cgroup, refcount should ensure
+                * that there are no pidlists left.
+                */
+               BUG_ON(!list_empty(&cgrp->pidlists));
+
                call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
        }
        iput(inode);
@@ -841,6 +953,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
                seq_puts(seq, ",noprefix");
        if (strlen(root->release_agent_path))
                seq_printf(seq, ",release_agent=%s", root->release_agent_path);
+       if (strlen(root->name))
+               seq_printf(seq, ",name=%s", root->name);
        mutex_unlock(&cgroup_mutex);
        return 0;
 }
@@ -849,6 +963,12 @@ struct cgroup_sb_opts {
        unsigned long subsys_bits;
        unsigned long flags;
        char *release_agent;
+       char *name;
+       /* User explicitly requested empty subsystem */
+       bool none;
+
+       struct cgroupfs_root *new_root;
+
 };
 
 /* Convert a hierarchy specifier into a bitmask of subsystems and
@@ -863,9 +983,7 @@ static int parse_cgroupfs_options(char *data,
        mask = ~(1UL << cpuset_subsys_id);
 #endif
 
-       opts->subsys_bits = 0;
-       opts->flags = 0;
-       opts->release_agent = NULL;
+       memset(opts, 0, sizeof(*opts));
 
        while ((token = strsep(&o, ",")) != NULL) {
                if (!*token)
@@ -879,17 +997,42 @@ static int parse_cgroupfs_options(char *data,
                                if (!ss->disabled)
                                        opts->subsys_bits |= 1ul << i;
                        }
+               } else if (!strcmp(token, "none")) {
+                       /* Explicitly have no subsystems */
+                       opts->none = true;
                } else if (!strcmp(token, "noprefix")) {
                        set_bit(ROOT_NOPREFIX, &opts->flags);
                } else if (!strncmp(token, "release_agent=", 14)) {
                        /* Specifying two release agents is forbidden */
                        if (opts->release_agent)
                                return -EINVAL;
-                       opts->release_agent = kzalloc(PATH_MAX, GFP_KERNEL);
+                       opts->release_agent =
+                               kstrndup(token + 14, PATH_MAX, GFP_KERNEL);
                        if (!opts->release_agent)
                                return -ENOMEM;
-                       strncpy(opts->release_agent, token + 14, PATH_MAX - 1);
-                       opts->release_agent[PATH_MAX - 1] = 0;
+               } else if (!strncmp(token, "name=", 5)) {
+                       int i;
+                       const char *name = token + 5;
+                       /* Can't specify an empty name */
+                       if (!strlen(name))
+                               return -EINVAL;
+                       /* Must match [\w.-]+ */
+                       for (i = 0; i < strlen(name); i++) {
+                               char c = name[i];
+                               if (isalnum(c))
+                                       continue;
+                               if ((c == '.') || (c == '-') || (c == '_'))
+                                       continue;
+                               return -EINVAL;
+                       }
+                       /* Specifying two names is forbidden */
+                       if (opts->name)
+                               return -EINVAL;
+                       opts->name = kstrndup(name,
+                                             MAX_CGROUP_ROOT_NAMELEN,
+                                             GFP_KERNEL);
+                       if (!opts->name)
+                               return -ENOMEM;
                } else {
                        struct cgroup_subsys *ss;
                        int i;
@@ -906,6 +1049,8 @@ static int parse_cgroupfs_options(char *data,
                }
        }
 
+       /* Consistency checks */
+
        /*
         * Option noprefix was introduced just for backward compatibility
         * with the old cpuset, so we allow noprefix only if mounting just
@@ -915,8 +1060,16 @@ static int parse_cgroupfs_options(char *data,
            (opts->subsys_bits & mask))
                return -EINVAL;
 
-       /* We can't have an empty hierarchy */
-       if (!opts->subsys_bits)
+
+       /* Can't specify "none" and some subsystems */
+       if (opts->subsys_bits && opts->none)
+               return -EINVAL;
+
+       /*
+        * We either have to specify by name or by subsystems. (So all
+        * empty hierarchies must have a name).
+        */
+       if (!opts->subsys_bits && !opts->name)
                return -EINVAL;
 
        return 0;
@@ -944,6 +1097,12 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
                goto out_unlock;
        }
 
+       /* Don't allow name to change at remount */
+       if (opts.name && strcmp(opts.name, root->name)) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+
        ret = rebind_subsystems(root, opts.subsys_bits);
        if (ret)
                goto out_unlock;
@@ -955,6 +1114,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
                strcpy(root->release_agent_path, opts.release_agent);
  out_unlock:
        kfree(opts.release_agent);
+       kfree(opts.name);
        mutex_unlock(&cgroup_mutex);
        mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
        unlock_kernel();
@@ -974,9 +1134,10 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
        INIT_LIST_HEAD(&cgrp->children);
        INIT_LIST_HEAD(&cgrp->css_sets);
        INIT_LIST_HEAD(&cgrp->release_list);
-       INIT_LIST_HEAD(&cgrp->pids_list);
-       init_rwsem(&cgrp->pids_mutex);
+       INIT_LIST_HEAD(&cgrp->pidlists);
+       mutex_init(&cgrp->pidlist_mutex);
 }
+
 static void init_cgroup_root(struct cgroupfs_root *root)
 {
        struct cgroup *cgrp = &root->top_cgroup;
@@ -988,33 +1149,106 @@ static void init_cgroup_root(struct cgroupfs_root *root)
        init_cgroup_housekeeping(cgrp);
 }
 
+static bool init_root_id(struct cgroupfs_root *root)
+{
+       int ret = 0;
+
+       do {
+               if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL))
+                       return false;
+               spin_lock(&hierarchy_id_lock);
+               /* Try to allocate the next unused ID */
+               ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
+                                       &root->hierarchy_id);
+               if (ret == -ENOSPC)
+                       /* Try again starting from 0 */
+                       ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id);
+               if (!ret) {
+                       next_hierarchy_id = root->hierarchy_id + 1;
+               } else if (ret != -EAGAIN) {
+                       /* Can only get here if the 31-bit IDR is full ... */
+                       BUG_ON(ret);
+               }
+               spin_unlock(&hierarchy_id_lock);
+       } while (ret);
+       return true;
+}
+
 static int cgroup_test_super(struct super_block *sb, void *data)
 {
-       struct cgroupfs_root *new = data;
+       struct cgroup_sb_opts *opts = data;
        struct cgroupfs_root *root = sb->s_fs_info;
 
-       /* First check subsystems */
-       if (new->subsys_bits != root->subsys_bits)
-           return 0;
+       /* If we asked for a name then it must match */
+       if (opts->name && strcmp(opts->name, root->name))
+               return 0;
 
-       /* Next check flags */
-       if (new->flags != root->flags)
+       /*
+        * If we asked for subsystems (or explicitly for no
+        * subsystems) then they must match
+        */
+       if ((opts->subsys_bits || opts->none)
+           && (opts->subsys_bits != root->subsys_bits))
                return 0;
 
        return 1;
 }
 
+static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
+{
+       struct cgroupfs_root *root;
+
+       if (!opts->subsys_bits && !opts->none)
+               return NULL;
+
+       root = kzalloc(sizeof(*root), GFP_KERNEL);
+       if (!root)
+               return ERR_PTR(-ENOMEM);
+
+       if (!init_root_id(root)) {
+               kfree(root);
+               return ERR_PTR(-ENOMEM);
+       }
+       init_cgroup_root(root);
+
+       root->subsys_bits = opts->subsys_bits;
+       root->flags = opts->flags;
+       if (opts->release_agent)
+               strcpy(root->release_agent_path, opts->release_agent);
+       if (opts->name)
+               strcpy(root->name, opts->name);
+       return root;
+}
+
+static void cgroup_drop_root(struct cgroupfs_root *root)
+{
+       if (!root)
+               return;
+
+       BUG_ON(!root->hierarchy_id);
+       spin_lock(&hierarchy_id_lock);
+       ida_remove(&hierarchy_ida, root->hierarchy_id);
+       spin_unlock(&hierarchy_id_lock);
+       kfree(root);
+}
+
 static int cgroup_set_super(struct super_block *sb, void *data)
 {
        int ret;
-       struct cgroupfs_root *root = data;
+       struct cgroup_sb_opts *opts = data;
+
+       /* If we don't have a new root, we can't set up a new sb */
+       if (!opts->new_root)
+               return -EINVAL;
+
+       BUG_ON(!opts->subsys_bits && !opts->none);
 
        ret = set_anon_super(sb, NULL);
        if (ret)
                return ret;
 
-       sb->s_fs_info = root;
-       root->sb = sb;
+       sb->s_fs_info = opts->new_root;
+       opts->new_root->sb = sb;
 
        sb->s_blocksize = PAGE_CACHE_SIZE;
        sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
@@ -1051,48 +1285,43 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
                         void *data, struct vfsmount *mnt)
 {
        struct cgroup_sb_opts opts;
+       struct cgroupfs_root *root;
        int ret = 0;
        struct super_block *sb;
-       struct cgroupfs_root *root;
-       struct list_head tmp_cg_links;
+       struct cgroupfs_root *new_root;
 
        /* First find the desired set of subsystems */
        ret = parse_cgroupfs_options(data, &opts);
-       if (ret) {
-               kfree(opts.release_agent);
-               return ret;
-       }
-
-       root = kzalloc(sizeof(*root), GFP_KERNEL);
-       if (!root) {
-               kfree(opts.release_agent);
-               return -ENOMEM;
-       }
+       if (ret)
+               goto out_err;
 
-       init_cgroup_root(root);
-       root->subsys_bits = opts.subsys_bits;
-       root->flags = opts.flags;
-       if (opts.release_agent) {
-               strcpy(root->release_agent_path, opts.release_agent);
-               kfree(opts.release_agent);
+       /*
+        * Allocate a new cgroup root. We may not need it if we're
+        * reusing an existing hierarchy.
+        */
+       new_root = cgroup_root_from_opts(&opts);
+       if (IS_ERR(new_root)) {
+               ret = PTR_ERR(new_root);
+               goto out_err;
        }
+       opts.new_root = new_root;
 
-       sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root);
-
+       /* Locate an existing or new sb for this hierarchy */
+       sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts);
        if (IS_ERR(sb)) {
-               kfree(root);
-               return PTR_ERR(sb);
+               ret = PTR_ERR(sb);
+               cgroup_drop_root(opts.new_root);
+               goto out_err;
        }
 
-       if (sb->s_fs_info != root) {
-               /* Reusing an existing superblock */
-               BUG_ON(sb->s_root == NULL);
-               kfree(root);
-               root = NULL;
-       } else {
-               /* New superblock */
+       root = sb->s_fs_info;
+       BUG_ON(!root);
+       if (root == opts.new_root) {
+               /* We used the new root structure, so this is a new hierarchy */
+               struct list_head tmp_cg_links;
                struct cgroup *root_cgrp = &root->top_cgroup;
                struct inode *inode;
+               struct cgroupfs_root *existing_root;
                int i;
 
                BUG_ON(sb->s_root != NULL);
@@ -1105,6 +1334,18 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
                mutex_lock(&inode->i_mutex);
                mutex_lock(&cgroup_mutex);
 
+               if (strlen(root->name)) {
+                       /* Check for name clashes with existing mounts */
+                       for_each_active_root(existing_root) {
+                               if (!strcmp(existing_root->name, root->name)) {
+                                       ret = -EBUSY;
+                                       mutex_unlock(&cgroup_mutex);
+                                       mutex_unlock(&inode->i_mutex);
+                                       goto drop_new_super;
+                               }
+                       }
+               }
+
                /*
                 * We're accessing css_set_count without locking
                 * css_set_lock here, but that's OK - it can only be
@@ -1123,7 +1364,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
                if (ret == -EBUSY) {
                        mutex_unlock(&cgroup_mutex);
                        mutex_unlock(&inode->i_mutex);
-                       goto free_cg_links;
+                       free_cg_links(&tmp_cg_links);
+                       goto drop_new_super;
                }
 
                /* EBUSY should be the only error here */
@@ -1155,17 +1397,27 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
                BUG_ON(root->number_of_cgroups != 1);
 
                cgroup_populate_dir(root_cgrp);
-               mutex_unlock(&inode->i_mutex);
                mutex_unlock(&cgroup_mutex);
+               mutex_unlock(&inode->i_mutex);
+       } else {
+               /*
+                * We re-used an existing hierarchy - the new root (if
+                * any) is not needed
+                */
+               cgroup_drop_root(opts.new_root);
        }
 
        simple_set_mnt(mnt, sb);
+       kfree(opts.release_agent);
+       kfree(opts.name);
        return 0;
 
- free_cg_links:
-       free_cg_links(&tmp_cg_links);
  drop_new_super:
        deactivate_locked_super(sb);
+ out_err:
+       kfree(opts.release_agent);
+       kfree(opts.name);
+
        return ret;
 }
 
@@ -1211,7 +1463,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
        mutex_unlock(&cgroup_mutex);
 
        kill_litter_super(sb);
-       kfree(root);
+       cgroup_drop_root(root);
 }
 
 static struct file_system_type cgroup_fs_type = {
@@ -1276,27 +1528,6 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
        return 0;
 }
 
-/*
- * Return the first subsystem attached to a cgroup's hierarchy, and
- * its subsystem id.
- */
-
-static void get_first_subsys(const struct cgroup *cgrp,
-                       struct cgroup_subsys_state **css, int *subsys_id)
-{
-       const struct cgroupfs_root *root = cgrp->root;
-       const struct cgroup_subsys *test_ss;
-       BUG_ON(list_empty(&root->subsys_list));
-       test_ss = list_entry(root->subsys_list.next,
-                            struct cgroup_subsys, sibling);
-       if (css) {
-               *css = cgrp->subsys[test_ss->subsys_id];
-               BUG_ON(!*css);
-       }
-       if (subsys_id)
-               *subsys_id = test_ss->subsys_id;
-}
-
 /**
  * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
  * @cgrp: the cgroup the task is attaching to
@@ -1313,18 +1544,15 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
        struct css_set *cg;
        struct css_set *newcg;
        struct cgroupfs_root *root = cgrp->root;
-       int subsys_id;
-
-       get_first_subsys(cgrp, NULL, &subsys_id);
 
        /* Nothing to do if the task is already in that cgroup */
-       oldcgrp = task_cgroup(tsk, subsys_id);
+       oldcgrp = task_cgroup_from_root(tsk, root);
        if (cgrp == oldcgrp)
                return 0;
 
        for_each_subsys(root, ss) {
                if (ss->can_attach) {
-                       retval = ss->can_attach(ss, cgrp, tsk);
+                       retval = ss->can_attach(ss, cgrp, tsk, false);
                        if (retval)
                                return retval;
                }
@@ -1362,7 +1590,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 
        for_each_subsys(root, ss) {
                if (ss->attach)
-                       ss->attach(ss, cgrp, oldcgrp, tsk);
+                       ss->attach(ss, cgrp, oldcgrp, tsk, false);
        }
        set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
        synchronize_rcu();
@@ -1423,15 +1651,6 @@ static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
        return ret;
 }
 
-/* The various types of files and directories in a cgroup file system */
-enum cgroup_filetype {
-       FILE_ROOT,
-       FILE_DIR,
-       FILE_TASKLIST,
-       FILE_NOTIFY_ON_RELEASE,
-       FILE_RELEASE_AGENT,
-};
-
 /**
  * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
  * @cgrp: the cgroup to be checked for liveness
@@ -1876,7 +2095,7 @@ int cgroup_task_count(const struct cgroup *cgrp)
  * the start of a css_set
  */
 static void cgroup_advance_iter(struct cgroup *cgrp,
-                                         struct cgroup_iter *it)
+                               struct cgroup_iter *it)
 {
        struct list_head *l = it->cg_link;
        struct cg_cgroup_link *link;
@@ -2129,7 +2348,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
 }
 
 /*
- * Stuff for reading the 'tasks' file.
+ * Stuff for reading the 'tasks'/'procs' files.
  *
  * Reading this file can return large amounts of data if a cgroup has
  * *lots* of attached tasks. So it may need several calls to read(),
@@ -2139,27 +2358,196 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
  */
 
 /*
- * Load into 'pidarray' up to 'npids' of the tasks using cgroup
- * 'cgrp'.  Return actual number of pids loaded.  No need to
- * task_lock(p) when reading out p->cgroup, since we're in an RCU
- * read section, so the css_set can't go away, and is
- * immutable after creation.
+ * The following two functions "fix" the issue where there are more pids
+ * than kmalloc will give memory for; in such cases, we use vmalloc/vfree.
+ * TODO: replace with a kernel-wide solution to this problem
+ */
+#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
+static void *pidlist_allocate(int count)
+{
+       if (PIDLIST_TOO_LARGE(count))
+               return vmalloc(count * sizeof(pid_t));
+       else
+               return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
+}
+static void pidlist_free(void *p)
+{
+       if (is_vmalloc_addr(p))
+               vfree(p);
+       else
+               kfree(p);
+}
+static void *pidlist_resize(void *p, int newcount)
+{
+       void *newlist;
+       /* note: if new alloc fails, old p will still be valid either way */
+       if (is_vmalloc_addr(p)) {
+               newlist = vmalloc(newcount * sizeof(pid_t));
+               if (!newlist)
+                       return NULL;
+               memcpy(newlist, p, newcount * sizeof(pid_t));
+               vfree(p);
+       } else {
+               newlist = krealloc(p, newcount * sizeof(pid_t), GFP_KERNEL);
+       }
+       return newlist;
+}
+
+/*
+ * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries
+ * If the new stripped list is sufficiently smaller and there's enough memory
+ * to allocate a new buffer, will let go of the unneeded memory. Returns the
+ * number of unique elements.
+ */
+/* is the size difference enough that we should re-allocate the array? */
+#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
+static int pidlist_uniq(pid_t **p, int length)
+{
+       int src, dest = 1;
+       pid_t *list = *p;
+       pid_t *newlist;
+
+       /*
+        * we presume the 0th element is unique, so i starts at 1. trivial
+        * edge cases first; no work needs to be done for either
+        */
+       if (length == 0 || length == 1)
+               return length;
+       /* src and dest walk down the list; dest counts unique elements */
+       for (src = 1; src < length; src++) {
+               /* find next unique element */
+               while (list[src] == list[src-1]) {
+                       src++;
+                       if (src == length)
+                               goto after;
+               }
+               /* dest always points to where the next unique element goes */
+               list[dest] = list[src];
+               dest++;
+       }
+after:
+       /*
+        * if the length difference is large enough, we want to allocate a
+        * smaller buffer to save memory. if this fails due to out of memory,
+        * we'll just stay with what we've got.
+        */
+       if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
+               newlist = pidlist_resize(list, dest);
+               if (newlist)
+                       *p = newlist;
+       }
+       return dest;
+}
+
+static int cmppid(const void *a, const void *b)
+{
+       return *(pid_t *)a - *(pid_t *)b;
+}
+
+/*
+ * find the appropriate pidlist for our purpose (given procs vs tasks)
+ * returns with the lock on that pidlist already held, and takes care
+ * of the use count, or returns NULL with no locks held if we're out of
+ * memory.
  */
-static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp)
+static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
+                                                 enum cgroup_filetype type)
 {
-       int n = 0, pid;
+       struct cgroup_pidlist *l;
+       /* don't need task_nsproxy() if we're looking at ourself */
+       struct pid_namespace *ns = get_pid_ns(current->nsproxy->pid_ns);
+       /*
+        * We can't drop the pidlist_mutex before taking the l->mutex in case
+        * the last ref-holder is trying to remove l from the list at the same
+        * time. Holding the pidlist_mutex precludes somebody taking whichever
+        * list we find out from under us - compare release_pid_array().
+        */
+       mutex_lock(&cgrp->pidlist_mutex);
+       list_for_each_entry(l, &cgrp->pidlists, links) {
+               if (l->key.type == type && l->key.ns == ns) {
+                       /* found a matching list - drop the extra refcount */
+                       put_pid_ns(ns);
+                       /* make sure l doesn't vanish out from under us */
+                       down_write(&l->mutex);
+                       mutex_unlock(&cgrp->pidlist_mutex);
+                       l->use_count++;
+                       return l;
+               }
+       }
+       /* entry not found; create a new one */
+       l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
+       if (!l) {
+               mutex_unlock(&cgrp->pidlist_mutex);
+               put_pid_ns(ns);
+               return l;
+       }
+       init_rwsem(&l->mutex);
+       down_write(&l->mutex);
+       l->key.type = type;
+       l->key.ns = ns;
+       l->use_count = 0; /* don't increment here */
+       l->list = NULL;
+       l->owner = cgrp;
+       list_add(&l->links, &cgrp->pidlists);
+       mutex_unlock(&cgrp->pidlist_mutex);
+       return l;
+}
+
+/*
+ * Load a cgroup's pidarray with either procs' tgids or tasks' pids
+ */
+static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
+                             struct cgroup_pidlist **lp)
+{
+       pid_t *array;
+       int length;
+       int pid, n = 0; /* used for populating the array */
        struct cgroup_iter it;
        struct task_struct *tsk;
+       struct cgroup_pidlist *l;
+
+       /*
+        * If cgroup gets more users after we read count, we won't have
+        * enough space - tough.  This race is indistinguishable to the
+        * caller from the case that the additional cgroup users didn't
+        * show up until sometime later on.
+        */
+       length = cgroup_task_count(cgrp);
+       array = pidlist_allocate(length);
+       if (!array)
+               return -ENOMEM;
+       /* now, populate the array */
        cgroup_iter_start(cgrp, &it);
        while ((tsk = cgroup_iter_next(cgrp, &it))) {
-               if (unlikely(n == npids))
+               if (unlikely(n == length))
                        break;
-               pid = task_pid_vnr(tsk);
-               if (pid > 0)
-                       pidarray[n++] = pid;
+               /* get tgid or pid for procs or tasks file respectively */
+               if (type == CGROUP_FILE_PROCS)
+                       pid = task_tgid_vnr(tsk);
+               else
+                       pid = task_pid_vnr(tsk);
+               if (pid > 0) /* make sure to only use valid results */
+                       array[n++] = pid;
        }
        cgroup_iter_end(cgrp, &it);
-       return n;
+       length = n;
+       /* now sort & (if procs) strip out duplicates */
+       sort(array, length, sizeof(pid_t), cmppid, NULL);
+       if (type == CGROUP_FILE_PROCS)
+               length = pidlist_uniq(&array, length);
+       l = cgroup_pidlist_find(cgrp, type);
+       if (!l) {
+               pidlist_free(array);
+               return -ENOMEM;
+       }
+       /* store array, freeing old if necessary - lock already held */
+       pidlist_free(l->list);
+       l->list = array;
+       l->length = length;
+       l->use_count++;
+       up_write(&l->mutex);
+       *lp = l;
+       return 0;
 }
 
 /**
@@ -2216,37 +2604,14 @@ err:
        return ret;
 }
 
-/*
- * Cache pids for all threads in the same pid namespace that are
- * opening the same "tasks" file.
- */
-struct cgroup_pids {
-       /* The node in cgrp->pids_list */
-       struct list_head list;
-       /* The cgroup those pids belong to */
-       struct cgroup *cgrp;
-       /* The namepsace those pids belong to */
-       struct pid_namespace *ns;
-       /* Array of process ids in the cgroup */
-       pid_t *tasks_pids;
-       /* How many files are using the this tasks_pids array */
-       int use_count;
-       /* Length of the current tasks_pids array */
-       int length;
-};
-
-static int cmppid(const void *a, const void *b)
-{
-       return *(pid_t *)a - *(pid_t *)b;
-}
 
 /*
- * seq_file methods for the "tasks" file. The seq_file position is the
+ * seq_file methods for the tasks/procs files. The seq_file position is the
  * next pid to display; the seq_file iterator is a pointer to the pid
- * in the cgroup->tasks_pids array.
+ * in the cgroup->l->list array.
  */
 
-static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
+static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
 {
        /*
         * Initially we receive a position value that corresponds to
@@ -2254,48 +2619,45 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
         * after a seek to the start). Use a binary-search to find the
         * next pid to display, if any
         */
-       struct cgroup_pids *cp = s->private;
-       struct cgroup *cgrp = cp->cgrp;
+       struct cgroup_pidlist *l = s->private;
        int index = 0, pid = *pos;
        int *iter;
 
-       down_read(&cgrp->pids_mutex);
+       down_read(&l->mutex);
        if (pid) {
-               int end = cp->length;
+               int end = l->length;
 
                while (index < end) {
                        int mid = (index + end) / 2;
-                       if (cp->tasks_pids[mid] == pid) {
+                       if (l->list[mid] == pid) {
                                index = mid;
                                break;
-                       } else if (cp->tasks_pids[mid] <= pid)
+                       } else if (l->list[mid] <= pid)
                                index = mid + 1;
                        else
                                end = mid;
                }
        }
        /* If we're off the end of the array, we're done */
-       if (index >= cp->length)
+       if (index >= l->length)
                return NULL;
        /* Update the abstract position to be the actual pid that we found */
-       iter = cp->tasks_pids + index;
+       iter = l->list + index;
        *pos = *iter;
        return iter;
 }
 
-static void cgroup_tasks_stop(struct seq_file *s, void *v)
+static void cgroup_pidlist_stop(struct seq_file *s, void *v)
 {
-       struct cgroup_pids *cp = s->private;
-       struct cgroup *cgrp = cp->cgrp;
-       up_read(&cgrp->pids_mutex);
+       struct cgroup_pidlist *l = s->private;
+       up_read(&l->mutex);
 }
 
-static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
+static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
 {
-       struct cgroup_pids *cp = s->private;
-       int *p = v;
-       int *end = cp->tasks_pids + cp->length;
-
+       struct cgroup_pidlist *l = s->private;
+       pid_t *p = v;
+       pid_t *end = l->list + l->length;
        /*
         * Advance to the next pid in the array. If this goes off the
         * end, we're done
@@ -2309,124 +2671,107 @@ static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
        }
 }
 
-static int cgroup_tasks_show(struct seq_file *s, void *v)
+static int cgroup_pidlist_show(struct seq_file *s, void *v)
 {
        return seq_printf(s, "%d\n", *(int *)v);
 }
 
-static const struct seq_operations cgroup_tasks_seq_operations = {
-       .start = cgroup_tasks_start,
-       .stop = cgroup_tasks_stop,
-       .next = cgroup_tasks_next,
-       .show = cgroup_tasks_show,
+/*
+ * seq_operations functions for iterating on pidlists through seq_file -
+ * independent of whether it's tasks or procs
+ */
+static const struct seq_operations cgroup_pidlist_seq_operations = {
+       .start = cgroup_pidlist_start,
+       .stop = cgroup_pidlist_stop,
+       .next = cgroup_pidlist_next,
+       .show = cgroup_pidlist_show,
 };
 
-static void release_cgroup_pid_array(struct cgroup_pids *cp)
+static void cgroup_release_pid_array(struct cgroup_pidlist *l)
 {
-       struct cgroup *cgrp = cp->cgrp;
-
-       down_write(&cgrp->pids_mutex);
-       BUG_ON(!cp->use_count);
-       if (!--cp->use_count) {
-               list_del(&cp->list);
-               put_pid_ns(cp->ns);
-               kfree(cp->tasks_pids);
-               kfree(cp);
+       /*
+        * the case where we're the last user of this particular pidlist will
+        * have us remove it from the cgroup's list, which entails taking the
+        * mutex. since in pidlist_find the pidlist->lock depends on cgroup->
+        * pidlist_mutex, we have to take pidlist_mutex first.
+        */
+       mutex_lock(&l->owner->pidlist_mutex);
+       down_write(&l->mutex);
+       BUG_ON(!l->use_count);
+       if (!--l->use_count) {
+               /* we're the last user if refcount is 0; remove and free */
+               list_del(&l->links);
+               mutex_unlock(&l->owner->pidlist_mutex);
+               pidlist_free(l->list);
+               put_pid_ns(l->key.ns);
+               up_write(&l->mutex);
+               kfree(l);
+               return;
        }
-       up_write(&cgrp->pids_mutex);
+       mutex_unlock(&l->owner->pidlist_mutex);
+       up_write(&l->mutex);
 }
 
-static int cgroup_tasks_release(struct inode *inode, struct file *file)
+static int cgroup_pidlist_release(struct inode *inode, struct file *file)
 {
-       struct seq_file *seq;
-       struct cgroup_pids *cp;
-
+       struct cgroup_pidlist *l;
        if (!(file->f_mode & FMODE_READ))
                return 0;
-
-       seq = file->private_data;
-       cp = seq->private;
-
-       release_cgroup_pid_array(cp);
+       /*
+        * the seq_file will only be initialized if the file was opened for
+        * reading; hence we check if it's not null only in that case.
+        */
+       l = ((struct seq_file *)file->private_data)->private;
+       cgroup_release_pid_array(l);
        return seq_release(inode, file);
 }
 
-static struct file_operations cgroup_tasks_operations = {
+static const struct file_operations cgroup_pidlist_operations = {
        .read = seq_read,
        .llseek = seq_lseek,
        .write = cgroup_file_write,
-       .release = cgroup_tasks_release,
+       .release = cgroup_pidlist_release,
 };
 
 /*
- * Handle an open on 'tasks' file.  Prepare an array containing the
- * process id's of tasks currently attached to the cgroup being opened.
+ * The following functions handle opens on a file that displays a pidlist
+ * (tasks or procs). Prepare an array of the process/thread IDs of whoever's
+ * in the cgroup.
  */
-
-static int cgroup_tasks_open(struct inode *unused, struct file *file)
+/* helper function for the two below it */
+static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
 {
        struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
-       struct pid_namespace *ns = current->nsproxy->pid_ns;
-       struct cgroup_pids *cp;
-       pid_t *pidarray;
-       int npids;
+       struct cgroup_pidlist *l;
        int retval;
 
        /* Nothing to do for write-only files */
        if (!(file->f_mode & FMODE_READ))
                return 0;
 
-       /*
-        * If cgroup gets more users after we read count, we won't have
-        * enough space - tough.  This race is indistinguishable to the
-        * caller from the case that the additional cgroup users didn't
-        * show up until sometime later on.
-        */
-       npids = cgroup_task_count(cgrp);
-       pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
-       if (!pidarray)
-               return -ENOMEM;
-       npids = pid_array_load(pidarray, npids, cgrp);
-       sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
-
-       /*
-        * Store the array in the cgroup, freeing the old
-        * array if necessary
-        */
-       down_write(&cgrp->pids_mutex);
-
-       list_for_each_entry(cp, &cgrp->pids_list, list) {
-               if (ns == cp->ns)
-                       goto found;
-       }
-
-       cp = kzalloc(sizeof(*cp), GFP_KERNEL);
-       if (!cp) {
-               up_write(&cgrp->pids_mutex);
-               kfree(pidarray);
-               return -ENOMEM;
-       }
-       cp->cgrp = cgrp;
-       cp->ns = ns;
-       get_pid_ns(ns);
-       list_add(&cp->list, &cgrp->pids_list);
-found:
-       kfree(cp->tasks_pids);
-       cp->tasks_pids = pidarray;
-       cp->length = npids;
-       cp->use_count++;
-       up_write(&cgrp->pids_mutex);
-
-       file->f_op = &cgroup_tasks_operations;
+       /* have the array populated */
+       retval = pidlist_array_load(cgrp, type, &l);
+       if (retval)
+               return retval;
+       /* configure file information */
+       file->f_op = &cgroup_pidlist_operations;
 
-       retval = seq_open(file, &cgroup_tasks_seq_operations);
+       retval = seq_open(file, &cgroup_pidlist_seq_operations);
        if (retval) {
-               release_cgroup_pid_array(cp);
+               cgroup_release_pid_array(l);
                return retval;
        }
-       ((struct seq_file *)file->private_data)->private = cp;
+       ((struct seq_file *)file->private_data)->private = l;
        return 0;
 }
+static int cgroup_tasks_open(struct inode *unused, struct file *file)
+{
+       return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
+}
+static int cgroup_procs_open(struct inode *unused, struct file *file)
+{
+       return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
+}
 
 static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
                                            struct cftype *cft)
@@ -2449,21 +2794,27 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp,
 /*
  * for the common functions, 'private' gives the type of file
  */
+/* for hysterical raisins, we can't put this on the older files */
+#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
 static struct cftype files[] = {
        {
                .name = "tasks",
                .open = cgroup_tasks_open,
                .write_u64 = cgroup_tasks_write,
-               .release = cgroup_tasks_release,
-               .private = FILE_TASKLIST,
+               .release = cgroup_pidlist_release,
                .mode = S_IRUGO | S_IWUSR,
        },
-
+       {
+               .name = CGROUP_FILE_GENERIC_PREFIX "procs",
+               .open = cgroup_procs_open,
+               /* .write_u64 = cgroup_procs_write, TODO */
+               .release = cgroup_pidlist_release,
+               .mode = S_IRUGO,
+       },
        {
                .name = "notify_on_release",
                .read_u64 = cgroup_read_notify_on_release,
                .write_u64 = cgroup_write_notify_on_release,
-               .private = FILE_NOTIFY_ON_RELEASE,
        },
 };
 
@@ -2472,7 +2823,6 @@ static struct cftype cft_release_agent = {
        .read_seq_string = cgroup_release_agent_show,
        .write_string = cgroup_release_agent_write,
        .max_write_len = PATH_MAX,
-       .private = FILE_RELEASE_AGENT,
 };
 
 static int cgroup_populate_dir(struct cgroup *cgrp)
@@ -2879,6 +3229,7 @@ int __init cgroup_init_early(void)
        init_task.cgroups = &init_css_set;
 
        init_css_set_link.cg = &init_css_set;
+       init_css_set_link.cgrp = dummytop;
        list_add(&init_css_set_link.cgrp_link_list,
                 &rootnode.top_cgroup.css_sets);
        list_add(&init_css_set_link.cg_link_list,
@@ -2933,7 +3284,7 @@ int __init cgroup_init(void)
        /* Add init_css_set to the hash table */
        hhead = css_set_hash(init_css_set.subsys);
        hlist_add_head(&init_css_set.hlist, hhead);
-
+       BUG_ON(!init_root_id(&rootnode));
        err = register_filesystem(&cgroup_fs_type);
        if (err < 0)
                goto out;
@@ -2986,15 +3337,16 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
        for_each_active_root(root) {
                struct cgroup_subsys *ss;
                struct cgroup *cgrp;
-               int subsys_id;
                int count = 0;
 
-               seq_printf(m, "%lu:", root->subsys_bits);
+               seq_printf(m, "%d:", root->hierarchy_id);
                for_each_subsys(root, ss)
                        seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
+               if (strlen(root->name))
+                       seq_printf(m, "%sname=%s", count ? "," : "",
+                                  root->name);
                seq_putc(m, ':');
-               get_first_subsys(&root->top_cgroup, NULL, &subsys_id);
-               cgrp = task_cgroup(tsk, subsys_id);
+               cgrp = task_cgroup_from_root(tsk, root);
                retval = cgroup_path(cgrp, buf, PAGE_SIZE);
                if (retval < 0)
                        goto out_unlock;
@@ -3033,8 +3385,8 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v)
        mutex_lock(&cgroup_mutex);
        for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
                struct cgroup_subsys *ss = subsys[i];
-               seq_printf(m, "%s\t%lu\t%d\t%d\n",
-                          ss->name, ss->root->subsys_bits,
+               seq_printf(m, "%s\t%d\t%d\t%d\n",
+                          ss->name, ss->root->hierarchy_id,
                           ss->root->number_of_cgroups, !ss->disabled);
        }
        mutex_unlock(&cgroup_mutex);
@@ -3320,13 +3672,11 @@ int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task)
 {
        int ret;
        struct cgroup *target;
-       int subsys_id;
 
        if (cgrp == dummytop)
                return 1;
 
-       get_first_subsys(cgrp, NULL, &subsys_id);
-       target = task_cgroup(task, subsys_id);
+       target = task_cgroup_from_root(task, cgrp->root);
        while (cgrp != target && cgrp!= cgrp->top_cgroup)
                cgrp = cgrp->parent;
        ret = (cgrp == target);
@@ -3693,3 +4043,154 @@ css_get_next(struct cgroup_subsys *ss, int id,
        return ret;
 }
 
+#ifdef CONFIG_CGROUP_DEBUG
+static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
+                                                  struct cgroup *cont)
+{
+       struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
+
+       if (!css)
+               return ERR_PTR(-ENOMEM);
+
+       return css;
+}
+
+static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+       kfree(cont->subsys[debug_subsys_id]);
+}
+
+static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
+{
+       return atomic_read(&cont->count);
+}
+
+static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft)
+{
+       return cgroup_task_count(cont);
+}
+
+static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
+{
+       return (u64)(unsigned long)current->cgroups;
+}
+
+static u64 current_css_set_refcount_read(struct cgroup *cont,
+                                          struct cftype *cft)
+{
+       u64 count;
+
+       rcu_read_lock();
+       count = atomic_read(&current->cgroups->refcount);
+       rcu_read_unlock();
+       return count;
+}
+
+static int current_css_set_cg_links_read(struct cgroup *cont,
+                                        struct cftype *cft,
+                                        struct seq_file *seq)
+{
+       struct cg_cgroup_link *link;
+       struct css_set *cg;
+
+       read_lock(&css_set_lock);
+       rcu_read_lock();
+       cg = rcu_dereference(current->cgroups);
+       list_for_each_entry(link, &cg->cg_links, cg_link_list) {
+               struct cgroup *c = link->cgrp;
+               const char *name;
+
+               if (c->dentry)
+                       name = c->dentry->d_name.name;
+               else
+                       name = "?";
+               seq_printf(seq, "Root %d group %s\n",
+                          c->root->hierarchy_id, name);
+       }
+       rcu_read_unlock();
+       read_unlock(&css_set_lock);
+       return 0;
+}
+
+#define MAX_TASKS_SHOWN_PER_CSS 25
+static int cgroup_css_links_read(struct cgroup *cont,
+                                struct cftype *cft,
+                                struct seq_file *seq)
+{
+       struct cg_cgroup_link *link;
+
+       read_lock(&css_set_lock);
+       list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
+               struct css_set *cg = link->cg;
+               struct task_struct *task;
+               int count = 0;
+               seq_printf(seq, "css_set %p\n", cg);
+               list_for_each_entry(task, &cg->tasks, cg_list) {
+                       if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
+                               seq_puts(seq, "  ...\n");
+                               break;
+                       } else {
+                               seq_printf(seq, "  task %d\n",
+                                          task_pid_vnr(task));
+                       }
+               }
+       }
+       read_unlock(&css_set_lock);
+       return 0;
+}
+
+static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
+{
+       return test_bit(CGRP_RELEASABLE, &cgrp->flags);
+}
+
+static struct cftype debug_files[] =  {
+       {
+               .name = "cgroup_refcount",
+               .read_u64 = cgroup_refcount_read,
+       },
+       {
+               .name = "taskcount",
+               .read_u64 = debug_taskcount_read,
+       },
+
+       {
+               .name = "current_css_set",
+               .read_u64 = current_css_set_read,
+       },
+
+       {
+               .name = "current_css_set_refcount",
+               .read_u64 = current_css_set_refcount_read,
+       },
+
+       {
+               .name = "current_css_set_cg_links",
+               .read_seq_string = current_css_set_cg_links_read,
+       },
+
+       {
+               .name = "cgroup_css_links",
+               .read_seq_string = cgroup_css_links_read,
+       },
+
+       {
+               .name = "releasable",
+               .read_u64 = releasable_read,
+       },
+};
+
+static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+       return cgroup_add_files(cont, ss, debug_files,
+                               ARRAY_SIZE(debug_files));
+}
+
+struct cgroup_subsys debug_subsys = {
+       .name = "debug",
+       .create = debug_create,
+       .destroy = debug_destroy,
+       .populate = debug_populate,
+       .subsys_id = debug_subsys_id,
+};
+#endif /* CONFIG_CGROUP_DEBUG */
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c
deleted file mode 100644 (file)
index 0c92d79..0000000
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * kernel/cgroup_debug.c - Example cgroup subsystem that
- * exposes debug info
- *
- * Copyright (C) Google Inc, 2007
- *
- * Developed by Paul Menage (menage@google.com)
- *
- */
-
-#include <linux/cgroup.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/rcupdate.h>
-
-#include <asm/atomic.h>
-
-static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
-                                                  struct cgroup *cont)
-{
-       struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
-
-       if (!css)
-               return ERR_PTR(-ENOMEM);
-
-       return css;
-}
-
-static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
-{
-       kfree(cont->subsys[debug_subsys_id]);
-}
-
-static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
-{
-       return atomic_read(&cont->count);
-}
-
-static u64 taskcount_read(struct cgroup *cont, struct cftype *cft)
-{
-       u64 count;
-
-       count = cgroup_task_count(cont);
-       return count;
-}
-
-static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft)
-{
-       return (u64)(long)current->cgroups;
-}
-
-static u64 current_css_set_refcount_read(struct cgroup *cont,
-                                          struct cftype *cft)
-{
-       u64 count;
-
-       rcu_read_lock();
-       count = atomic_read(&current->cgroups->refcount);
-       rcu_read_unlock();
-       return count;
-}
-
-static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
-{
-       return test_bit(CGRP_RELEASABLE, &cgrp->flags);
-}
-
-static struct cftype files[] =  {
-       {
-               .name = "cgroup_refcount",
-               .read_u64 = cgroup_refcount_read,
-       },
-       {
-               .name = "taskcount",
-               .read_u64 = taskcount_read,
-       },
-
-       {
-               .name = "current_css_set",
-               .read_u64 = current_css_set_read,
-       },
-
-       {
-               .name = "current_css_set_refcount",
-               .read_u64 = current_css_set_refcount_read,
-       },
-
-       {
-               .name = "releasable",
-               .read_u64 = releasable_read,
-       },
-};
-
-static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
-{
-       return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
-}
-
-struct cgroup_subsys debug_subsys = {
-       .name = "debug",
-       .create = debug_create,
-       .destroy = debug_destroy,
-       .populate = debug_populate,
-       .subsys_id = debug_subsys_id,
-};
index fb249e2..59e9ef6 100644 (file)
@@ -159,7 +159,7 @@ static bool is_task_frozen_enough(struct task_struct *task)
  */
 static int freezer_can_attach(struct cgroup_subsys *ss,
                              struct cgroup *new_cgroup,
-                             struct task_struct *task)
+                             struct task_struct *task, bool threadgroup)
 {
        struct freezer *freezer;
 
@@ -177,6 +177,19 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
        if (freezer->state == CGROUP_FROZEN)
                return -EBUSY;
 
+       if (threadgroup) {
+               struct task_struct *c;
+
+               rcu_read_lock();
+               list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
+                       if (is_task_frozen_enough(c)) {
+                               rcu_read_unlock();
+                               return -EBUSY;
+                       }
+               }
+               rcu_read_unlock();
+       }
+
        return 0;
 }
 
index 7e75a41..b5cb469 100644 (file)
@@ -1324,9 +1324,10 @@ static int fmeter_getrate(struct fmeter *fmp)
 static cpumask_var_t cpus_attach;
 
 /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
-static int cpuset_can_attach(struct cgroup_subsys *ss,
-                            struct cgroup *cont, struct task_struct *tsk)
+static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
+                            struct task_struct *tsk, bool threadgroup)
 {
+       int ret;
        struct cpuset *cs = cgroup_cs(cont);
 
        if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
@@ -1343,18 +1344,51 @@ static int cpuset_can_attach(struct cgroup_subsys *ss,
        if (tsk->flags & PF_THREAD_BOUND)
                return -EINVAL;
 
-       return security_task_setscheduler(tsk, 0, NULL);
+       ret = security_task_setscheduler(tsk, 0, NULL);
+       if (ret)
+               return ret;
+       if (threadgroup) {
+               struct task_struct *c;
+
+               rcu_read_lock();
+               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+                       ret = security_task_setscheduler(c, 0, NULL);
+                       if (ret) {
+                               rcu_read_unlock();
+                               return ret;
+                       }
+               }
+               rcu_read_unlock();
+       }
+       return 0;
+}
+
+static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
+                              struct cpuset *cs)
+{
+       int err;
+       /*
+        * can_attach beforehand should guarantee that this doesn't fail.
+        * TODO: have a better way to handle failure here
+        */
+       err = set_cpus_allowed_ptr(tsk, cpus_attach);
+       WARN_ON_ONCE(err);
+
+       task_lock(tsk);
+       cpuset_change_task_nodemask(tsk, to);
+       task_unlock(tsk);
+       cpuset_update_task_spread_flag(cs, tsk);
+
 }
 
-static void cpuset_attach(struct cgroup_subsys *ss,
-                         struct cgroup *cont, struct cgroup *oldcont,
-                         struct task_struct *tsk)
+static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
+                         struct cgroup *oldcont, struct task_struct *tsk,
+                         bool threadgroup)
 {
        nodemask_t from, to;
        struct mm_struct *mm;
        struct cpuset *cs = cgroup_cs(cont);
        struct cpuset *oldcs = cgroup_cs(oldcont);
-       int err;
 
        if (cs == &top_cpuset) {
                cpumask_copy(cpus_attach, cpu_possible_mask);
@@ -1363,15 +1397,19 @@ static void cpuset_attach(struct cgroup_subsys *ss,
                guarantee_online_cpus(cs, cpus_attach);
                guarantee_online_mems(cs, &to);
        }
-       err = set_cpus_allowed_ptr(tsk, cpus_attach);
-       if (err)
-               return;
 
-       task_lock(tsk);
-       cpuset_change_task_nodemask(tsk, &to);
-       task_unlock(tsk);
-       cpuset_update_task_spread_flag(cs, tsk);
+       /* do per-task migration stuff possibly for each in the threadgroup */
+       cpuset_attach_task(tsk, &to, cs);
+       if (threadgroup) {
+               struct task_struct *c;
+               rcu_read_lock();
+               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+                       cpuset_attach_task(c, &to, cs);
+               }
+               rcu_read_unlock();
+       }
 
+       /* change mm; only needs to be done once even if threadgroup */
        from = oldcs->mems_allowed;
        to = cs->mems_allowed;
        mm = get_task_mm(tsk);
index 60d6fdc..5859f59 100644 (file)
@@ -976,8 +976,6 @@ NORET_TYPE void do_exit(long code)
                disassociate_ctty(1);
 
        module_put(task_thread_info(tsk)->exec_domain->module);
-       if (tsk->binfmt)
-               module_put(tsk->binfmt->module);
 
        proc_exit_connector(tsk);
 
@@ -1097,28 +1095,28 @@ struct wait_opts {
        int __user              *wo_stat;
        struct rusage __user    *wo_rusage;
 
+       wait_queue_t            child_wait;
        int                     notask_error;
 };
 
-static struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
+static inline
+struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
 {
-       struct pid *pid = NULL;
-       if (type == PIDTYPE_PID)
-               pid = task->pids[type].pid;
-       else if (type < PIDTYPE_MAX)
-               pid = task->group_leader->pids[type].pid;
-       return pid;
+       if (type != PIDTYPE_PID)
+               task = task->group_leader;
+       return task->pids[type].pid;
 }
 
-static int eligible_child(struct wait_opts *wo, struct task_struct *p)
+static int eligible_pid(struct wait_opts *wo, struct task_struct *p)
 {
-       int err;
-
-       if (wo->wo_type < PIDTYPE_MAX) {
-               if (task_pid_type(p, wo->wo_type) != wo->wo_pid)
-                       return 0;
-       }
+       return  wo->wo_type == PIDTYPE_MAX ||
+               task_pid_type(p, wo->wo_type) == wo->wo_pid;
+}
 
+static int eligible_child(struct wait_opts *wo, struct task_struct *p)
+{
+       if (!eligible_pid(wo, p))
+               return 0;
        /* Wait for all children (clone and not) if __WALL is set;
         * otherwise, wait for clone children *only* if __WCLONE is
         * set; otherwise, wait for non-clone children *only*.  (Note:
@@ -1128,10 +1126,6 @@ static int eligible_child(struct wait_opts *wo, struct task_struct *p)
            && !(wo->wo_flags & __WALL))
                return 0;
 
-       err = security_task_wait(p);
-       if (err)
-               return err;
-
        return 1;
 }
 
@@ -1144,18 +1138,20 @@ static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,
 
        put_task_struct(p);
        infop = wo->wo_info;
-       if (!retval)
-               retval = put_user(SIGCHLD, &infop->si_signo);
-       if (!retval)
-               retval = put_user(0, &infop->si_errno);
-       if (!retval)
-               retval = put_user((short)why, &infop->si_code);
-       if (!retval)
-               retval = put_user(pid, &infop->si_pid);
-       if (!retval)
-               retval = put_user(uid, &infop->si_uid);
-       if (!retval)
-               retval = put_user(status, &infop->si_status);
+       if (infop) {
+               if (!retval)
+                       retval = put_user(SIGCHLD, &infop->si_signo);
+               if (!retval)
+                       retval = put_user(0, &infop->si_errno);
+               if (!retval)
+                       retval = put_user((short)why, &infop->si_code);
+               if (!retval)
+                       retval = put_user(pid, &infop->si_pid);
+               if (!retval)
+                       retval = put_user(uid, &infop->si_uid);
+               if (!retval)
+                       retval = put_user(status, &infop->si_status);
+       }
        if (!retval)
                retval = pid;
        return retval;
@@ -1485,13 +1481,14 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
  * then ->notask_error is 0 if @p is an eligible child,
  * or another error from security_task_wait(), or still -ECHILD.
  */
-static int wait_consider_task(struct wait_opts *wo, struct task_struct *parent,
-                               int ptrace, struct task_struct *p)
+static int wait_consider_task(struct wait_opts *wo, int ptrace,
+                               struct task_struct *p)
 {
        int ret = eligible_child(wo, p);
        if (!ret)
                return ret;
 
+       ret = security_task_wait(p);
        if (unlikely(ret < 0)) {
                /*
                 * If we have not yet seen any eligible child,
@@ -1553,7 +1550,7 @@ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
                 * Do not consider detached threads.
                 */
                if (!task_detached(p)) {
-                       int ret = wait_consider_task(wo, tsk, 0, p);
+                       int ret = wait_consider_task(wo, 0, p);
                        if (ret)
                                return ret;
                }
@@ -1567,7 +1564,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
        struct task_struct *p;
 
        list_for_each_entry(p, &tsk->ptraced, ptrace_entry) {
-               int ret = wait_consider_task(wo, tsk, 1, p);
+               int ret = wait_consider_task(wo, 1, p);
                if (ret)
                        return ret;
        }
@@ -1575,15 +1572,38 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
        return 0;
 }
 
+static int child_wait_callback(wait_queue_t *wait, unsigned mode,
+                               int sync, void *key)
+{
+       struct wait_opts *wo = container_of(wait, struct wait_opts,
+                                               child_wait);
+       struct task_struct *p = key;
+
+       if (!eligible_pid(wo, p))
+               return 0;
+
+       if ((wo->wo_flags & __WNOTHREAD) && wait->private != p->parent)
+               return 0;
+
+       return default_wake_function(wait, mode, sync, key);
+}
+
+void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
+{
+       __wake_up_sync_key(&parent->signal->wait_chldexit,
+                               TASK_INTERRUPTIBLE, 1, p);
+}
+
 static long do_wait(struct wait_opts *wo)
 {
-       DECLARE_WAITQUEUE(wait, current);
        struct task_struct *tsk;
        int retval;
 
        trace_sched_process_wait(wo->wo_pid);
 
-       add_wait_queue(&current->signal->wait_chldexit,&wait);
+       init_waitqueue_func_entry(&wo->child_wait, child_wait_callback);
+       wo->child_wait.private = current;
+       add_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
 repeat:
        /*
         * If there is nothing that can match our critiera just get out.
@@ -1624,32 +1644,7 @@ notask:
        }
 end:
        __set_current_state(TASK_RUNNING);
-       remove_wait_queue(&current->signal->wait_chldexit,&wait);
-       if (wo->wo_info) {
-               struct siginfo __user *infop = wo->wo_info;
-
-               if (retval > 0)
-                       retval = 0;
-               else {
-                       /*
-                        * For a WNOHANG return, clear out all the fields
-                        * we would set so the user can easily tell the
-                        * difference.
-                        */
-                       if (!retval)
-                               retval = put_user(0, &infop->si_signo);
-                       if (!retval)
-                               retval = put_user(0, &infop->si_errno);
-                       if (!retval)
-                               retval = put_user(0, &infop->si_code);
-                       if (!retval)
-                               retval = put_user(0, &infop->si_pid);
-                       if (!retval)
-                               retval = put_user(0, &infop->si_uid);
-                       if (!retval)
-                               retval = put_user(0, &infop->si_status);
-               }
-       }
+       remove_wait_queue(&current->signal->wait_chldexit, &wo->child_wait);
        return retval;
 }
 
@@ -1694,6 +1689,29 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
        wo.wo_stat      = NULL;
        wo.wo_rusage    = ru;
        ret = do_wait(&wo);
+
+       if (ret > 0) {
+               ret = 0;
+       } else if (infop) {
+               /*
+                * For a WNOHANG return, clear out all the fields
+                * we would set so the user can easily tell the
+                * difference.
+                */
+               if (!ret)
+                       ret = put_user(0, &infop->si_signo);
+               if (!ret)
+                       ret = put_user(0, &infop->si_errno);
+               if (!ret)
+                       ret = put_user(0, &infop->si_code);
+               if (!ret)
+                       ret = put_user(0, &infop->si_pid);
+               if (!ret)
+                       ret = put_user(0, &infop->si_uid);
+               if (!ret)
+                       ret = put_user(0, &infop->si_status);
+       }
+
        put_pid(pid);
 
        /* avoid REGPARM breakage on x86: */
index 51ad0b0..266c6af 100644 (file)
@@ -434,6 +434,14 @@ __setup("coredump_filter=", coredump_filter_setup);
 
 #include <linux/init_task.h>
 
+static void mm_init_aio(struct mm_struct *mm)
+{
+#ifdef CONFIG_AIO
+       spin_lock_init(&mm->ioctx_lock);
+       INIT_HLIST_HEAD(&mm->ioctx_list);
+#endif
+}
+
 static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 {
        atomic_set(&mm->mm_users, 1);
@@ -447,10 +455,9 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
        set_mm_counter(mm, file_rss, 0);
        set_mm_counter(mm, anon_rss, 0);
        spin_lock_init(&mm->page_table_lock);
-       spin_lock_init(&mm->ioctx_lock);
-       INIT_HLIST_HEAD(&mm->ioctx_list);
        mm->free_area_cache = TASK_UNMAPPED_BASE;
        mm->cached_hole_size = ~0UL;
+       mm_init_aio(mm);
        mm_init_owner(mm, p);
 
        if (likely(!mm_alloc_pgd(mm))) {
@@ -511,6 +518,8 @@ void mmput(struct mm_struct *mm)
                        spin_unlock(&mmlist_lock);
                }
                put_swap_token(mm);
+               if (mm->binfmt)
+                       module_put(mm->binfmt->module);
                mmdrop(mm);
        }
 }
@@ -636,9 +645,14 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
        mm->hiwater_rss = get_mm_rss(mm);
        mm->hiwater_vm = mm->total_vm;
 
+       if (mm->binfmt && !try_module_get(mm->binfmt->module))
+               goto free_pt;
+
        return mm;
 
 free_pt:
+       /* don't put binfmt in mmput, we haven't got module yet */
+       mm->binfmt = NULL;
        mmput(mm);
 
 fail_nomem:
@@ -979,6 +993,16 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
                return ERR_PTR(-EINVAL);
 
+       /*
+        * Siblings of global init remain as zombies on exit since they are
+        * not reaped by their parent (swapper). To solve this and to avoid
+        * multi-rooted process trees, prevent global and container-inits
+        * from creating siblings.
+        */
+       if ((clone_flags & CLONE_PARENT) &&
+                               current->signal->flags & SIGNAL_UNKILLABLE)
+               return ERR_PTR(-EINVAL);
+
        retval = security_task_create(clone_flags);
        if (retval)
                goto fork_out;
@@ -1020,9 +1044,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        if (!try_module_get(task_thread_info(p)->exec_domain->module))
                goto bad_fork_cleanup_count;
 
-       if (p->binfmt && !try_module_get(p->binfmt->module))
-               goto bad_fork_cleanup_put_domain;
-
        p->did_exec = 0;
        delayacct_tsk_init(p);  /* Must remain after dup_task_struct() */
        copy_flags(clone_flags, p);
@@ -1310,9 +1331,6 @@ bad_fork_cleanup_cgroup:
 #endif
        cgroup_exit(p, cgroup_callbacks_done);
        delayacct_tsk_free(p);
-       if (p->binfmt)
-               module_put(p->binfmt->module);
-bad_fork_cleanup_put_domain:
        module_put(task_thread_info(p)->exec_domain->module);
 bad_fork_cleanup_count:
        atomic_dec(&p->cred->user->processes);
index 654efd0..70a298d 100644 (file)
@@ -34,7 +34,7 @@ config GCOV_KERNEL
 config GCOV_PROFILE_ALL
        bool "Profile entire Kernel"
        depends on GCOV_KERNEL
-       depends on S390 || X86 || (PPC && EXPERIMENTAL)
+       depends on S390 || X86 || (PPC && EXPERIMENTAL) || MICROBLAZE
        default n
        ---help---
        This options activates profiling for the entire kernel.
index 022a492..d4e8417 100644 (file)
@@ -171,12 +171,12 @@ static unsigned long timeout_jiffies(unsigned long timeout)
  * Process updating of timeout sysctl
  */
 int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
-                                 struct file *filp, void __user *buffer,
+                                 void __user *buffer,
                                  size_t *lenp, loff_t *ppos)
 {
        int ret;
 
-       ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+       ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 
        if (ret || !write)
                goto out;
index e6bc4b2..5a29397 100644 (file)
@@ -1797,6 +1797,17 @@ static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs,
        }
 }
 
+static void free_modinfo(struct module *mod)
+{
+       struct module_attribute *attr;
+       int i;
+
+       for (i = 0; (attr = modinfo_attrs[i]); i++) {
+               if (attr->free)
+                       attr->free(mod);
+       }
+}
+
 #ifdef CONFIG_KALLSYMS
 
 /* lookup symbol in given range of kernel_symbols */
@@ -1862,13 +1873,93 @@ static char elf_type(const Elf_Sym *sym,
        return '?';
 }
 
+static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
+                           unsigned int shnum)
+{
+       const Elf_Shdr *sec;
+
+       if (src->st_shndx == SHN_UNDEF
+           || src->st_shndx >= shnum
+           || !src->st_name)
+               return false;
+
+       sec = sechdrs + src->st_shndx;
+       if (!(sec->sh_flags & SHF_ALLOC)
+#ifndef CONFIG_KALLSYMS_ALL
+           || !(sec->sh_flags & SHF_EXECINSTR)
+#endif
+           || (sec->sh_entsize & INIT_OFFSET_MASK))
+               return false;
+
+       return true;
+}
+
+static unsigned long layout_symtab(struct module *mod,
+                                  Elf_Shdr *sechdrs,
+                                  unsigned int symindex,
+                                  unsigned int strindex,
+                                  const Elf_Ehdr *hdr,
+                                  const char *secstrings,
+                                  unsigned long *pstroffs,
+                                  unsigned long *strmap)
+{
+       unsigned long symoffs;
+       Elf_Shdr *symsect = sechdrs + symindex;
+       Elf_Shdr *strsect = sechdrs + strindex;
+       const Elf_Sym *src;
+       const char *strtab;
+       unsigned int i, nsrc, ndst;
+
+       /* Put symbol section at end of init part of module. */
+       symsect->sh_flags |= SHF_ALLOC;
+       symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
+                                        symindex) | INIT_OFFSET_MASK;
+       DEBUGP("\t%s\n", secstrings + symsect->sh_name);
+
+       src = (void *)hdr + symsect->sh_offset;
+       nsrc = symsect->sh_size / sizeof(*src);
+       strtab = (void *)hdr + strsect->sh_offset;
+       for (ndst = i = 1; i < nsrc; ++i, ++src)
+               if (is_core_symbol(src, sechdrs, hdr->e_shnum)) {
+                       unsigned int j = src->st_name;
+
+                       while(!__test_and_set_bit(j, strmap) && strtab[j])
+                               ++j;
+                       ++ndst;
+               }
+
+       /* Append room for core symbols at end of core part. */
+       symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
+       mod->core_size = symoffs + ndst * sizeof(Elf_Sym);
+
+       /* Put string table section at end of init part of module. */
+       strsect->sh_flags |= SHF_ALLOC;
+       strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
+                                        strindex) | INIT_OFFSET_MASK;
+       DEBUGP("\t%s\n", secstrings + strsect->sh_name);
+
+       /* Append room for core symbols' strings at end of core part. */
+       *pstroffs = mod->core_size;
+       __set_bit(0, strmap);
+       mod->core_size += bitmap_weight(strmap, strsect->sh_size);
+
+       return symoffs;
+}
+
 static void add_kallsyms(struct module *mod,
                         Elf_Shdr *sechdrs,
+                        unsigned int shnum,
                         unsigned int symindex,
                         unsigned int strindex,
-                        const char *secstrings)
+                        unsigned long symoffs,
+                        unsigned long stroffs,
+                        const char *secstrings,
+                        unsigned long *strmap)
 {
-       unsigned int i;
+       unsigned int i, ndst;
+       const Elf_Sym *src;
+       Elf_Sym *dst;
+       char *s;
 
        mod->symtab = (void *)sechdrs[symindex].sh_addr;
        mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym);
@@ -1878,13 +1969,44 @@ static void add_kallsyms(struct module *mod,
        for (i = 0; i < mod->num_symtab; i++)
                mod->symtab[i].st_info
                        = elf_type(&mod->symtab[i], sechdrs, secstrings, mod);
+
+       mod->core_symtab = dst = mod->module_core + symoffs;
+       src = mod->symtab;
+       *dst = *src;
+       for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) {
+               if (!is_core_symbol(src, sechdrs, shnum))
+                       continue;
+               dst[ndst] = *src;
+               dst[ndst].st_name = bitmap_weight(strmap, dst[ndst].st_name);
+               ++ndst;
+       }
+       mod->core_num_syms = ndst;
+
+       mod->core_strtab = s = mod->module_core + stroffs;
+       for (*s = 0, i = 1; i < sechdrs[strindex].sh_size; ++i)
+               if (test_bit(i, strmap))
+                       *++s = mod->strtab[i];
 }
 #else
+static inline unsigned long layout_symtab(struct module *mod,
+                                         Elf_Shdr *sechdrs,
+                                         unsigned int symindex,
+                                         unsigned int strindex,
+                                         const Elf_Hdr *hdr,
+                                         const char *secstrings,
+                                         unsigned long *pstroffs,
+                                         unsigned long *strmap)
+{
+}
 static inline void add_kallsyms(struct module *mod,
                                Elf_Shdr *sechdrs,
+                               unsigned int shnum,
                                unsigned int symindex,
                                unsigned int strindex,
-                               const char *secstrings)
+                               unsigned long symoffs,
+                               unsigned long stroffs,
+                               const char *secstrings,
+                               const unsigned long *strmap)
 {
 }
 #endif /* CONFIG_KALLSYMS */
@@ -1959,6 +2081,9 @@ static noinline struct module *load_module(void __user *umod,
        struct module *mod;
        long err = 0;
        void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
+#ifdef CONFIG_KALLSYMS
+       unsigned long symoffs, stroffs, *strmap;
+#endif
        mm_segment_t old_fs;
 
        DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
@@ -2040,11 +2165,6 @@ static noinline struct module *load_module(void __user *umod,
        /* Don't keep modinfo and version sections. */
        sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
        sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
-#ifdef CONFIG_KALLSYMS
-       /* Keep symbol and string tables for decoding later. */
-       sechdrs[symindex].sh_flags |= SHF_ALLOC;
-       sechdrs[strindex].sh_flags |= SHF_ALLOC;
-#endif
 
        /* Check module struct version now, before we try to use module. */
        if (!check_modstruct_version(sechdrs, versindex, mod)) {
@@ -2080,6 +2200,13 @@ static noinline struct module *load_module(void __user *umod,
                goto free_hdr;
        }
 
+       strmap = kzalloc(BITS_TO_LONGS(sechdrs[strindex].sh_size)
+                        * sizeof(long), GFP_KERNEL);
+       if (!strmap) {
+               err = -ENOMEM;
+               goto free_mod;
+       }
+
        if (find_module(mod->name)) {
                err = -EEXIST;
                goto free_mod;
@@ -2109,6 +2236,8 @@ static noinline struct module *load_module(void __user *umod,
           this is done generically; there doesn't appear to be any
           special cases for the architectures. */
        layout_sections(mod, hdr, sechdrs, secstrings);
+       symoffs = layout_symtab(mod, sechdrs, symindex, strindex, hdr,
+                               secstrings, &stroffs, strmap);
 
        /* Do the allocs. */
        ptr = module_alloc_update_bounds(mod->core_size);
@@ -2313,7 +2442,10 @@ static noinline struct module *load_module(void __user *umod,
        percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr,
                       sechdrs[pcpuindex].sh_size);
 
-       add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
+       add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex,
+                    symoffs, stroffs, secstrings, strmap);
+       kfree(strmap);
+       strmap = NULL;
 
        if (!mod->taints) {
                struct _ddebug *debug;
@@ -2385,13 +2517,14 @@ static noinline struct module *load_module(void __user *umod,
        synchronize_sched();
        module_arch_cleanup(mod);
  cleanup:
+       free_modinfo(mod);
        kobject_del(&mod->mkobj.kobj);
        kobject_put(&mod->mkobj.kobj);
  free_unload:
        module_unload_free(mod);
 #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
- free_init:
        percpu_modfree(mod->refptr);
+ free_init:
 #endif
        module_free(mod, mod->module_init);
  free_core:
@@ -2402,6 +2535,7 @@ static noinline struct module *load_module(void __user *umod,
                percpu_modfree(percpu);
  free_mod:
        kfree(args);
+       kfree(strmap);
  free_hdr:
        vfree(hdr);
        return ERR_PTR(err);
@@ -2491,6 +2625,11 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
        /* Drop initial reference. */
        module_put(mod);
        trim_init_extable(mod);
+#ifdef CONFIG_KALLSYMS
+       mod->num_symtab = mod->core_num_syms;
+       mod->symtab = mod->core_symtab;
+       mod->strtab = mod->core_strtab;
+#endif
        module_free(mod, mod->module_init);
        mod->module_init = NULL;
        mod->init_size = 0;
index 5aa854f..2a5dfec 100644 (file)
@@ -42,8 +42,8 @@ int ns_cgroup_clone(struct task_struct *task, struct pid *pid)
  *       (hence either you are in the same cgroup as task, or in an
  *        ancestor cgroup thereof)
  */
-static int ns_can_attach(struct cgroup_subsys *ss,
-               struct cgroup *new_cgroup, struct task_struct *task)
+static int ns_can_attach(struct cgroup_subsys *ss, struct cgroup *new_cgroup,
+                        struct task_struct *task, bool threadgroup)
 {
        if (current != task) {
                if (!capable(CAP_SYS_ADMIN))
@@ -56,6 +56,18 @@ static int ns_can_attach(struct cgroup_subsys *ss,
        if (!cgroup_is_descendant(new_cgroup, task))
                return -EPERM;
 
+       if (threadgroup) {
+               struct task_struct *c;
+               rcu_read_lock();
+               list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
+                       if (!cgroup_is_descendant(new_cgroup, c)) {
+                               rcu_read_unlock();
+                               return -EPERM;
+                       }
+               }
+               rcu_read_unlock();
+       }
+
        return 0;
 }
 
index 7f6912c..9da58ea 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/slab.h>
+#include <linux/ctype.h>
 
 #if 0
 #define DEBUGP printk
@@ -87,7 +88,7 @@ static char *next_arg(char *args, char **param, char **val)
        }
 
        for (i = 0; args[i]; i++) {
-               if (args[i] == ' ' && !in_quote)
+               if (isspace(args[i]) && !in_quote)
                        break;
                if (equals == 0) {
                        if (args[i] == '=')
@@ -121,7 +122,7 @@ static char *next_arg(char *args, char **param, char **val)
                next = args + i;
 
        /* Chew up trailing spaces. */
-       while (*next == ' ')
+       while (isspace(*next))
                next++;
        return next;
 }
@@ -138,7 +139,7 @@ int parse_args(const char *name,
        DEBUGP("Parsing ARGS: %s\n", args);
 
        /* Chew leading spaces */
-       while (*args == ' ')
+       while (isspace(*args))
                args++;
 
        while (*args) {
index 821722a..86b3796 100644 (file)
@@ -118,7 +118,7 @@ struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old
 {
        if (!(flags & CLONE_NEWPID))
                return get_pid_ns(old_ns);
-       if (flags & CLONE_THREAD)
+       if (flags & (CLONE_THREAD|CLONE_PARENT))
                return ERR_PTR(-EINVAL);
        return create_pid_namespace(old_ns);
 }
index 307c285..23bd09c 100644 (file)
@@ -266,9 +266,10 @@ static int ignoring_children(struct sighand_struct *sigh)
  * or self-reaping.  Do notification now if it would have happened earlier.
  * If it should reap itself, return true.
  *
- * If it's our own child, there is no notification to do.
- * But if our normal children self-reap, then this child
- * was prevented by ptrace and we must reap it now.
+ * If it's our own child, there is no notification to do. But if our normal
+ * children self-reap, then this child was prevented by ptrace and we must
+ * reap it now, in that case we must also wake up sub-threads sleeping in
+ * do_wait().
  */
 static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
 {
@@ -278,8 +279,10 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
                if (!task_detached(p) && thread_group_empty(p)) {
                        if (!same_thread_group(p->real_parent, tracer))
                                do_notify_parent(p, p->exit_signal);
-                       else if (ignoring_children(tracer->sighand))
+                       else if (ignoring_children(tracer->sighand)) {
+                               __wake_up_parent(p, tracer);
                                p->exit_signal = -1;
+                       }
                }
                if (task_detached(p)) {
                        /* Mark it as in the process of being reaped. */
index e1338f0..88faec2 100644 (file)
@@ -19,6 +19,7 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent)
 {
        spin_lock_init(&counter->lock);
        counter->limit = RESOURCE_MAX;
+       counter->soft_limit = RESOURCE_MAX;
        counter->parent = parent;
 }
 
@@ -36,17 +37,27 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
 }
 
 int res_counter_charge(struct res_counter *counter, unsigned long val,
-                       struct res_counter **limit_fail_at)
+                       struct res_counter **limit_fail_at,
+                       struct res_counter **soft_limit_fail_at)
 {
        int ret;
        unsigned long flags;
        struct res_counter *c, *u;
 
        *limit_fail_at = NULL;
+       if (soft_limit_fail_at)
+               *soft_limit_fail_at = NULL;
        local_irq_save(flags);
        for (c = counter; c != NULL; c = c->parent) {
                spin_lock(&c->lock);
                ret = res_counter_charge_locked(c, val);
+               /*
+                * With soft limits, we return the highest ancestor
+                * that exceeds its soft limit
+                */
+               if (soft_limit_fail_at &&
+                       !res_counter_soft_limit_check_locked(c))
+                       *soft_limit_fail_at = c;
                spin_unlock(&c->lock);
                if (ret < 0) {
                        *limit_fail_at = c;
@@ -74,7 +85,8 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
        counter->usage -= val;
 }
 
-void res_counter_uncharge(struct res_counter *counter, unsigned long val)
+void res_counter_uncharge(struct res_counter *counter, unsigned long val,
+                               bool *was_soft_limit_excess)
 {
        unsigned long flags;
        struct res_counter *c;
@@ -82,6 +94,9 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val)
        local_irq_save(flags);
        for (c = counter; c != NULL; c = c->parent) {
                spin_lock(&c->lock);
+               if (was_soft_limit_excess)
+                       *was_soft_limit_excess =
+                               !res_counter_soft_limit_check_locked(c);
                res_counter_uncharge_locked(c, val);
                spin_unlock(&c->lock);
        }
@@ -101,6 +116,8 @@ res_counter_member(struct res_counter *counter, int member)
                return &counter->limit;
        case RES_FAILCNT:
                return &counter->failcnt;
+       case RES_SOFT_LIMIT:
+               return &counter->soft_limit;
        };
 
        BUG();
index 2f76e06..ee61f45 100644 (file)
@@ -10312,7 +10312,7 @@ static int sched_rt_global_constraints(void)
 #endif /* CONFIG_RT_GROUP_SCHED */
 
 int sched_rt_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                loff_t *ppos)
 {
        int ret;
@@ -10323,7 +10323,7 @@ int sched_rt_handler(struct ctl_table *table, int write,
        old_period = sysctl_sched_rt_period;
        old_runtime = sysctl_sched_rt_runtime;
 
-       ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec(table, write, buffer, lenp, ppos);
 
        if (!ret && write) {
                ret = sched_rt_global_constraints();
@@ -10377,8 +10377,7 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
 }
 
 static int
-cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                     struct task_struct *tsk)
+cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
 #ifdef CONFIG_RT_GROUP_SCHED
        if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk))
@@ -10388,15 +10387,45 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
        if (tsk->sched_class != &fair_sched_class)
                return -EINVAL;
 #endif
+       return 0;
+}
 
+static int
+cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+                     struct task_struct *tsk, bool threadgroup)
+{
+       int retval = cpu_cgroup_can_attach_task(cgrp, tsk);
+       if (retval)
+               return retval;
+       if (threadgroup) {
+               struct task_struct *c;
+               rcu_read_lock();
+               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+                       retval = cpu_cgroup_can_attach_task(cgrp, c);
+                       if (retval) {
+                               rcu_read_unlock();
+                               return retval;
+                       }
+               }
+               rcu_read_unlock();
+       }
        return 0;
 }
 
 static void
 cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                       struct cgroup *old_cont, struct task_struct *tsk)
+                 struct cgroup *old_cont, struct task_struct *tsk,
+                 bool threadgroup)
 {
        sched_move_task(tsk);
+       if (threadgroup) {
+               struct task_struct *c;
+               rcu_read_lock();
+               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+                       sched_move_task(c);
+               }
+               rcu_read_unlock();
+       }
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
index ecc637a..4e777b4 100644 (file)
@@ -384,10 +384,10 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
 
 #ifdef CONFIG_SCHED_DEBUG
 int sched_nr_latency_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                loff_t *ppos)
 {
-       int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+       int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 
        if (ret || !write)
                return ret;
index 64c5dee..6705320 100644 (file)
@@ -705,7 +705,7 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
 
                if (why) {
                        /*
-                        * The first thread which returns from finish_stop()
+                        * The first thread which returns from do_signal_stop()
                         * will take ->siglock, notice SIGNAL_CLD_MASK, and
                         * notify its parent. See get_signal_to_deliver().
                         */
@@ -971,6 +971,20 @@ specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
        return send_signal(sig, info, t, 0);
 }
 
+int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
+                       bool group)
+{
+       unsigned long flags;
+       int ret = -ESRCH;
+
+       if (lock_task_sighand(p, &flags)) {
+               ret = send_signal(sig, info, p, group);
+               unlock_task_sighand(p, &flags);
+       }
+
+       return ret;
+}
+
 /*
  * Force a signal that the process can't ignore: if necessary
  * we unblock the signal and change any SIG_IGN to SIG_DFL.
@@ -1036,12 +1050,6 @@ void zap_other_threads(struct task_struct *p)
        }
 }
 
-int __fatal_signal_pending(struct task_struct *tsk)
-{
-       return sigismember(&tsk->pending.signal, SIGKILL);
-}
-EXPORT_SYMBOL(__fatal_signal_pending);
-
 struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags)
 {
        struct sighand_struct *sighand;
@@ -1068,18 +1076,10 @@ struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long
  */
 int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 {
-       unsigned long flags;
-       int ret;
+       int ret = check_kill_permission(sig, info, p);
 
-       ret = check_kill_permission(sig, info, p);
-
-       if (!ret && sig) {
-               ret = -ESRCH;
-               if (lock_task_sighand(p, &flags)) {
-                       ret = __group_send_sig_info(sig, info, p);
-                       unlock_task_sighand(p, &flags);
-               }
-       }
+       if (!ret && sig)
+               ret = do_send_sig_info(sig, info, p, true);
 
        return ret;
 }
@@ -1224,15 +1224,9 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
  * These are for backward compatibility with the rest of the kernel source.
  */
 
-/*
- * The caller must ensure the task can't exit.
- */
 int
 send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 {
-       int ret;
-       unsigned long flags;
-
        /*
         * Make sure legacy kernel users don't send in bad values
         * (normal paths check this in check_kill_permission).
@@ -1240,10 +1234,7 @@ send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
        if (!valid_signal(sig))
                return -EINVAL;
 
-       spin_lock_irqsave(&p->sighand->siglock, flags);
-       ret = specific_send_sig_info(sig, info, p);
-       spin_unlock_irqrestore(&p->sighand->siglock, flags);
-       return ret;
+       return do_send_sig_info(sig, info, p, false);
 }
 
 #define __si_special(priv) \
@@ -1382,15 +1373,6 @@ ret:
        return ret;
 }
 
-/*
- * Wake up any threads in the parent blocked in wait* syscalls.
- */
-static inline void __wake_up_parent(struct task_struct *p,
-                                   struct task_struct *parent)
-{
-       wake_up_interruptible_sync(&parent->signal->wait_chldexit);
-}
-
 /*
  * Let a parent know about the death of a child.
  * For a stopped/continued status change, use do_notify_parent_cldstop instead.
@@ -1673,29 +1655,6 @@ void ptrace_notify(int exit_code)
        spin_unlock_irq(&current->sighand->siglock);
 }
 
-static void
-finish_stop(int stop_count)
-{
-       /*
-        * If there are no other threads in the group, or if there is
-        * a group stop in progress and we are the last to stop,
-        * report to the parent.  When ptraced, every thread reports itself.
-        */
-       if (tracehook_notify_jctl(stop_count == 0, CLD_STOPPED)) {
-               read_lock(&tasklist_lock);
-               do_notify_parent_cldstop(current, CLD_STOPPED);
-               read_unlock(&tasklist_lock);
-       }
-
-       do {
-               schedule();
-       } while (try_to_freeze());
-       /*
-        * Now we don't run again until continued.
-        */
-       current->exit_code = 0;
-}
-
 /*
  * This performs the stopping for SIGSTOP and other stop signals.
  * We have to stop all threads in the thread group.
@@ -1705,15 +1664,9 @@ finish_stop(int stop_count)
 static int do_signal_stop(int signr)
 {
        struct signal_struct *sig = current->signal;
-       int stop_count;
+       int notify;
 
-       if (sig->group_stop_count > 0) {
-               /*
-                * There is a group stop in progress.  We don't need to
-                * start another one.
-                */
-               stop_count = --sig->group_stop_count;
-       } else {
+       if (!sig->group_stop_count) {
                struct task_struct *t;
 
                if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
@@ -1725,7 +1678,7 @@ static int do_signal_stop(int signr)
                 */
                sig->group_exit_code = signr;
 
-               stop_count = 0;
+               sig->group_stop_count = 1;
                for (t = next_thread(current); t != current; t = next_thread(t))
                        /*
                         * Setting state to TASK_STOPPED for a group
@@ -1734,19 +1687,44 @@ static int do_signal_stop(int signr)
                         */
                        if (!(t->flags & PF_EXITING) &&
                            !task_is_stopped_or_traced(t)) {
-                               stop_count++;
+                               sig->group_stop_count++;
                                signal_wake_up(t, 0);
                        }
-               sig->group_stop_count = stop_count;
        }
+       /*
+        * If there are no other threads in the group, or if there is
+        * a group stop in progress and we are the last to stop, report
+        * to the parent.  When ptraced, every thread reports itself.
+        */
+       notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0;
+       notify = tracehook_notify_jctl(notify, CLD_STOPPED);
+       /*
+        * tracehook_notify_jctl() can drop and reacquire siglock, so
+        * we keep ->group_stop_count != 0 before the call. If SIGCONT
+        * or SIGKILL comes in between ->group_stop_count == 0.
+        */
+       if (sig->group_stop_count) {
+               if (!--sig->group_stop_count)
+                       sig->flags = SIGNAL_STOP_STOPPED;
+               current->exit_code = sig->group_exit_code;
+               __set_current_state(TASK_STOPPED);
+       }
+       spin_unlock_irq(&current->sighand->siglock);
 
-       if (stop_count == 0)
-               sig->flags = SIGNAL_STOP_STOPPED;
-       current->exit_code = sig->group_exit_code;
-       __set_current_state(TASK_STOPPED);
+       if (notify) {
+               read_lock(&tasklist_lock);
+               do_notify_parent_cldstop(current, notify);
+               read_unlock(&tasklist_lock);
+       }
+
+       /* Now we don't run again until woken by SIGCONT or SIGKILL */
+       do {
+               schedule();
+       } while (try_to_freeze());
+
+       tracehook_finish_jctl();
+       current->exit_code = 0;
 
-       spin_unlock_irq(&current->sighand->siglock);
-       finish_stop(stop_count);
        return 1;
 }
 
@@ -1815,14 +1793,15 @@ relock:
                int why = (signal->flags & SIGNAL_STOP_CONTINUED)
                                ? CLD_CONTINUED : CLD_STOPPED;
                signal->flags &= ~SIGNAL_CLD_MASK;
-               spin_unlock_irq(&sighand->siglock);
 
-               if (unlikely(!tracehook_notify_jctl(1, why)))
-                       goto relock;
+               why = tracehook_notify_jctl(why, CLD_CONTINUED);
+               spin_unlock_irq(&sighand->siglock);
 
-               read_lock(&tasklist_lock);
-               do_notify_parent_cldstop(current->group_leader, why);
-               read_unlock(&tasklist_lock);
+               if (why) {
+                       read_lock(&tasklist_lock);
+                       do_notify_parent_cldstop(current->group_leader, why);
+                       read_unlock(&tasklist_lock);
+               }
                goto relock;
        }
 
@@ -1987,14 +1966,14 @@ void exit_signals(struct task_struct *tsk)
        if (unlikely(tsk->signal->group_stop_count) &&
                        !--tsk->signal->group_stop_count) {
                tsk->signal->flags = SIGNAL_STOP_STOPPED;
-               group_stop = 1;
+               group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED);
        }
 out:
        spin_unlock_irq(&tsk->sighand->siglock);
 
-       if (unlikely(group_stop) && tracehook_notify_jctl(1, CLD_STOPPED)) {
+       if (unlikely(group_stop)) {
                read_lock(&tasklist_lock);
-               do_notify_parent_cldstop(tsk, CLD_STOPPED);
+               do_notify_parent_cldstop(tsk, group_stop);
                read_unlock(&tasklist_lock);
        }
 }
@@ -2290,7 +2269,6 @@ static int
 do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
 {
        struct task_struct *p;
-       unsigned long flags;
        int error = -ESRCH;
 
        rcu_read_lock();
@@ -2300,14 +2278,16 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
                /*
                 * The null signal is a permissions and process existence
                 * probe.  No signal is actually delivered.
-                *
-                * If lock_task_sighand() fails we pretend the task dies
-                * after receiving the signal. The window is tiny, and the
-                * signal is private anyway.
                 */
-               if (!error && sig && lock_task_sighand(p, &flags)) {
-                       error = specific_send_sig_info(sig, info, p);
-                       unlock_task_sighand(p, &flags);
+               if (!error && sig) {
+                       error = do_send_sig_info(sig, info, p, false);
+                       /*
+                        * If lock_task_sighand() failed we pretend the task
+                        * dies after receiving the signal. The window is tiny,
+                        * and the signal is private anyway.
+                        */
+                       if (unlikely(error == -ESRCH))
+                               error = 0;
                }
        }
        rcu_read_unlock();
index 09d7519..0d31135 100644 (file)
@@ -26,10 +26,10 @@ static void slow_work_cull_timeout(unsigned long);
 static void slow_work_oom_timeout(unsigned long);
 
 #ifdef CONFIG_SYSCTL
-static int slow_work_min_threads_sysctl(struct ctl_table *, int, struct file *,
+static int slow_work_min_threads_sysctl(struct ctl_table *, int,
                                        void __user *, size_t *, loff_t *);
 
-static int slow_work_max_threads_sysctl(struct ctl_table *, int , struct file *,
+static int slow_work_max_threads_sysctl(struct ctl_table *, int ,
                                        void __user *, size_t *, loff_t *);
 #endif
 
@@ -493,10 +493,10 @@ static void slow_work_oom_timeout(unsigned long data)
  * Handle adjustment of the minimum number of threads
  */
 static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
-                                       struct file *filp, void __user *buffer,
+                                       void __user *buffer,
                                        size_t *lenp, loff_t *ppos)
 {
-       int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+       int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
        int n;
 
        if (ret == 0) {
@@ -521,10 +521,10 @@ static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
  * Handle adjustment of the maximum number of threads
  */
 static int slow_work_max_threads_sysctl(struct ctl_table *table, int write,
-                                       struct file *filp, void __user *buffer,
+                                       void __user *buffer,
                                        size_t *lenp, loff_t *ppos)
 {
-       int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+       int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
        int n;
 
        if (ret == 0) {
index 88796c3..81324d1 100644 (file)
@@ -90,11 +90,11 @@ void touch_all_softlockup_watchdogs(void)
 EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
 
 int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
-                            struct file *filp, void __user *buffer,
+                            void __user *buffer,
                             size_t *lenp, loff_t *ppos)
 {
        touch_all_softlockup_watchdogs();
-       return proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+       return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 }
 
 /*
index ebcb156..255475d 100644 (file)
@@ -1542,6 +1542,28 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
                                current->timer_slack_ns = arg2;
                        error = 0;
                        break;
+               case PR_MCE_KILL:
+                       if (arg4 | arg5)
+                               return -EINVAL;
+                       switch (arg2) {
+                       case 0:
+                               if (arg3 != 0)
+                                       return -EINVAL;
+                               current->flags &= ~PF_MCE_PROCESS;
+                               break;
+                       case 1:
+                               current->flags |= PF_MCE_PROCESS;
+                               if (arg3 != 0)
+                                       current->flags |= PF_MCE_EARLY;
+                               else
+                                       current->flags &= ~PF_MCE_EARLY;
+                               break;
+                       default:
+                               return -EINVAL;
+                       }
+                       error = 0;
+                       break;
+
                default:
                        error = -EINVAL;
                        break;
index 7f4f57b..0d949c5 100644 (file)
@@ -76,6 +76,7 @@ extern int max_threads;
 extern int core_uses_pid;
 extern int suid_dumpable;
 extern char core_pattern[];
+extern unsigned int core_pipe_limit;
 extern int pid_max;
 extern int min_free_kbytes;
 extern int pid_max_min, pid_max_max;
@@ -162,9 +163,9 @@ extern int max_lock_depth;
 #endif
 
 #ifdef CONFIG_PROC_SYSCTL
-static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
+static int proc_do_cad_pid(struct ctl_table *table, int write,
                  void __user *buffer, size_t *lenp, loff_t *ppos);
-static int proc_taint(struct ctl_table *table, int write, struct file *filp,
+static int proc_taint(struct ctl_table *table, int write,
                               void __user *buffer, size_t *lenp, loff_t *ppos);
 #endif
 
@@ -423,6 +424,14 @@ static struct ctl_table kern_table[] = {
                .proc_handler   = &proc_dostring,
                .strategy       = &sysctl_string,
        },
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "core_pipe_limit",
+               .data           = &core_pipe_limit,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
 #ifdef CONFIG_PROC_SYSCTL
        {
                .procname       = "tainted",
@@ -1389,6 +1398,31 @@ static struct ctl_table vm_table[] = {
                .mode           = 0644,
                .proc_handler   = &scan_unevictable_handler,
        },
+#ifdef CONFIG_MEMORY_FAILURE
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "memory_failure_early_kill",
+               .data           = &sysctl_memory_failure_early_kill,
+               .maxlen         = sizeof(sysctl_memory_failure_early_kill),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_minmax,
+               .strategy       = &sysctl_intvec,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "memory_failure_recovery",
+               .data           = &sysctl_memory_failure_recovery,
+               .maxlen         = sizeof(sysctl_memory_failure_recovery),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_minmax,
+               .strategy       = &sysctl_intvec,
+               .extra1         = &zero,
+               .extra2         = &one,
+       },
+#endif
+
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt
@@ -2217,7 +2251,7 @@ void sysctl_head_put(struct ctl_table_header *head)
 #ifdef CONFIG_PROC_SYSCTL
 
 static int _proc_do_string(void* data, int maxlen, int write,
-                          struct file *filp, void __user *buffer,
+                          void __user *buffer,
                           size_t *lenp, loff_t *ppos)
 {
        size_t len;
@@ -2278,7 +2312,6 @@ static int _proc_do_string(void* data, int maxlen, int write,
  * proc_dostring - read a string sysctl
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2292,10 +2325,10 @@ static int _proc_do_string(void* data, int maxlen, int write,
  *
  * Returns 0 on success.
  */
-int proc_dostring(struct ctl_table *table, int write, struct file *filp,
+int proc_dostring(struct ctl_table *table, int write,
                  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-       return _proc_do_string(table->data, table->maxlen, write, filp,
+       return _proc_do_string(table->data, table->maxlen, write,
                               buffer, lenp, ppos);
 }
 
@@ -2320,7 +2353,7 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
 }
 
 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
-                 int write, struct file *filp, void __user *buffer,
+                 int write, void __user *buffer,
                  size_t *lenp, loff_t *ppos,
                  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
                              int write, void *data),
@@ -2427,13 +2460,13 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
 #undef TMPBUFLEN
 }
 
-static int do_proc_dointvec(struct ctl_table *table, int write, struct file *filp,
+static int do_proc_dointvec(struct ctl_table *table, int write,
                  void __user *buffer, size_t *lenp, loff_t *ppos,
                  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
                              int write, void *data),
                  void *data)
 {
-       return __do_proc_dointvec(table->data, table, write, filp,
+       return __do_proc_dointvec(table->data, table, write,
                        buffer, lenp, ppos, conv, data);
 }
 
@@ -2441,7 +2474,6 @@ static int do_proc_dointvec(struct ctl_table *table, int write, struct file *fil
  * proc_dointvec - read a vector of integers
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2451,10 +2483,10 @@ static int do_proc_dointvec(struct ctl_table *table, int write, struct file *fil
  *
  * Returns 0 on success.
  */
-int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec(struct ctl_table *table, int write,
                     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-    return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
+    return do_proc_dointvec(table,write,buffer,lenp,ppos,
                            NULL,NULL);
 }
 
@@ -2462,7 +2494,7 @@ int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
  * Taint values can only be increased
  * This means we can safely use a temporary.
  */
-static int proc_taint(struct ctl_table *table, int write, struct file *filp,
+static int proc_taint(struct ctl_table *table, int write,
                               void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        struct ctl_table t;
@@ -2474,7 +2506,7 @@ static int proc_taint(struct ctl_table *table, int write, struct file *filp,
 
        t = *table;
        t.data = &tmptaint;
-       err = proc_doulongvec_minmax(&t, write, filp, buffer, lenp, ppos);
+       err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
        if (err < 0)
                return err;
 
@@ -2526,7 +2558,6 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
  * proc_dointvec_minmax - read a vector of integers with min/max values
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2539,19 +2570,18 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
  *
  * Returns 0 on success.
  */
-int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_minmax(struct ctl_table *table, int write,
                  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        struct do_proc_dointvec_minmax_conv_param param = {
                .min = (int *) table->extra1,
                .max = (int *) table->extra2,
        };
-       return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
+       return do_proc_dointvec(table, write, buffer, lenp, ppos,
                                do_proc_dointvec_minmax_conv, &param);
 }
 
 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
-                                    struct file *filp,
                                     void __user *buffer,
                                     size_t *lenp, loff_t *ppos,
                                     unsigned long convmul,
@@ -2656,21 +2686,19 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
 }
 
 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
-                                    struct file *filp,
                                     void __user *buffer,
                                     size_t *lenp, loff_t *ppos,
                                     unsigned long convmul,
                                     unsigned long convdiv)
 {
        return __do_proc_doulongvec_minmax(table->data, table, write,
-                       filp, buffer, lenp, ppos, convmul, convdiv);
+                       buffer, lenp, ppos, convmul, convdiv);
 }
 
 /**
  * proc_doulongvec_minmax - read a vector of long integers with min/max values
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2683,17 +2711,16 @@ static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
  *
  * Returns 0 on success.
  */
-int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_doulongvec_minmax(struct ctl_table *table, int write,
                           void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-    return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
+    return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
 }
 
 /**
  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2708,11 +2735,10 @@ int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp
  * Returns 0 on success.
  */
 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
-                                     struct file *filp,
                                      void __user *buffer,
                                      size_t *lenp, loff_t *ppos)
 {
-    return do_proc_doulongvec_minmax(table, write, filp, buffer,
+    return do_proc_doulongvec_minmax(table, write, buffer,
                                     lenp, ppos, HZ, 1000l);
 }
 
@@ -2788,7 +2814,6 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
  * proc_dointvec_jiffies - read a vector of integers as seconds
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2800,10 +2825,10 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
  *
  * Returns 0 on success.
  */
-int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_jiffies(struct ctl_table *table, int write,
                          void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-    return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
+    return do_proc_dointvec(table,write,buffer,lenp,ppos,
                            do_proc_dointvec_jiffies_conv,NULL);
 }
 
@@ -2811,7 +2836,6 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: pointer to the file position
@@ -2823,10 +2847,10 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
  *
  * Returns 0 on success.
  */
-int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
                                 void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-    return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
+    return do_proc_dointvec(table,write,buffer,lenp,ppos,
                            do_proc_dointvec_userhz_jiffies_conv,NULL);
 }
 
@@ -2834,7 +2858,6 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file
  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2847,14 +2870,14 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file
  *
  * Returns 0 on success.
  */
-int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
                             void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-       return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
+       return do_proc_dointvec(table, write, buffer, lenp, ppos,
                                do_proc_dointvec_ms_jiffies_conv, NULL);
 }
 
-static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
+static int proc_do_cad_pid(struct ctl_table *table, int write,
                           void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        struct pid *new_pid;
@@ -2863,7 +2886,7 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp
 
        tmp = pid_vnr(cad_pid);
 
-       r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
+       r = __do_proc_dointvec(&tmp, table, write, buffer,
                               lenp, ppos, NULL, NULL);
        if (r || !write)
                return r;
@@ -2878,50 +2901,49 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp
 
 #else /* CONFIG_PROC_FS */
 
-int proc_dostring(struct ctl_table *table, int write, struct file *filp,
+int proc_dostring(struct ctl_table *table, int write,
                  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        return -ENOSYS;
 }
 
-int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec(struct ctl_table *table, int write,
                  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        return -ENOSYS;
 }
 
-int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_minmax(struct ctl_table *table, int write,
                    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        return -ENOSYS;
 }
 
-int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_jiffies(struct ctl_table *table, int write,
                    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        return -ENOSYS;
 }
 
-int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
                    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        return -ENOSYS;
 }
 
-int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
                             void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        return -ENOSYS;
 }
 
-int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_doulongvec_minmax(struct ctl_table *table, int write,
                    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        return -ENOSYS;
 }
 
 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
-                                     struct file *filp,
                                      void __user *buffer,
                                      size_t *lenp, loff_t *ppos)
 {
index 0b0a636..ee26662 100644 (file)
@@ -1,4 +1,4 @@
-obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o
+obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o timeconv.o
 
 obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD)                += clockevents.o
 obj-$(CONFIG_GENERIC_CLOCKEVENTS)              += tick-common.o
diff --git a/kernel/time/timeconv.c b/kernel/time/timeconv.c
new file mode 100644 (file)
index 0000000..86628e7
--- /dev/null
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
+ * This file is part of the GNU C Library.
+ * Contributed by Paul Eggert (eggert@twinsun.com).
+ *
+ * The GNU C Library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * The GNU C Library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with the GNU C Library; see the file COPYING.LIB.  If not,
+ * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Converts the calendar time to broken-down time representation
+ * Based on code from glibc-2.6
+ *
+ * 2009-7-14:
+ *   Moved from glibc-2.6 to kernel by Zhaolei<zhaolei@cn.fujitsu.com>
+ */
+
+#include <linux/time.h>
+#include <linux/module.h>
+
+/*
+ * Nonzero if YEAR is a leap year (every 4 years,
+ * except every 100th isn't, and every 400th is).
+ */
+static int __isleap(long year)
+{
+       return (year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0);
+}
+
+/* do a mathdiv for long type */
+static long math_div(long a, long b)
+{
+       return a / b - (a % b < 0);
+}
+
+/* How many leap years between y1 and y2, y1 must less or equal to y2 */
+static long leaps_between(long y1, long y2)
+{
+       long leaps1 = math_div(y1 - 1, 4) - math_div(y1 - 1, 100)
+               + math_div(y1 - 1, 400);
+       long leaps2 = math_div(y2 - 1, 4) - math_div(y2 - 1, 100)
+               + math_div(y2 - 1, 400);
+       return leaps2 - leaps1;
+}
+
+/* How many days come before each month (0-12). */
+static const unsigned short __mon_yday[2][13] = {
+       /* Normal years. */
+       {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
+       /* Leap years. */
+       {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
+};
+
+#define SECS_PER_HOUR  (60 * 60)
+#define SECS_PER_DAY   (SECS_PER_HOUR * 24)
+
+/**
+ * time_to_tm - converts the calendar time to local broken-down time
+ *
+ * @totalsecs  the number of seconds elapsed since 00:00:00 on January 1, 1970,
+ *             Coordinated Universal Time (UTC).
+ * @offset     offset seconds adding to totalsecs.
+ * @result     pointer to struct tm variable to receive broken-down time
+ */
+void time_to_tm(time_t totalsecs, int offset, struct tm *result)
+{
+       long days, rem, y;
+       const unsigned short *ip;
+
+       days = totalsecs / SECS_PER_DAY;
+       rem = totalsecs % SECS_PER_DAY;
+       rem += offset;
+       while (rem < 0) {
+               rem += SECS_PER_DAY;
+               --days;
+       }
+       while (rem >= SECS_PER_DAY) {
+               rem -= SECS_PER_DAY;
+               ++days;
+       }
+
+       result->tm_hour = rem / SECS_PER_HOUR;
+       rem %= SECS_PER_HOUR;
+       result->tm_min = rem / 60;
+       result->tm_sec = rem % 60;
+
+       /* January 1, 1970 was a Thursday. */
+       result->tm_wday = (4 + days) % 7;
+       if (result->tm_wday < 0)
+               result->tm_wday += 7;
+
+       y = 1970;
+
+       while (days < 0 || days >= (__isleap(y) ? 366 : 365)) {
+               /* Guess a corrected year, assuming 365 days per year. */
+               long yg = y + math_div(days, 365);
+
+               /* Adjust DAYS and Y to match the guessed year. */
+               days -= (yg - y) * 365 + leaps_between(y, yg);
+               y = yg;
+       }
+
+       result->tm_year = y - 1900;
+
+       result->tm_yday = days;
+
+       ip = __mon_yday[__isleap(y)];
+       for (y = 11; days < ip[y]; y--)
+               continue;
+       days -= ip[y];
+
+       result->tm_mon = y;
+       result->tm_mday = days + 1;
+}
+EXPORT_SYMBOL(time_to_tm);
index 23df777..a142579 100644 (file)
@@ -3015,7 +3015,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
 
 int
 ftrace_enable_sysctl(struct ctl_table *table, int write,
-                    struct file *file, void __user *buffer, size_t *lenp,
+                    void __user *buffer, size_t *lenp,
                     loff_t *ppos)
 {
        int ret;
@@ -3025,7 +3025,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 
        mutex_lock(&ftrace_lock);
 
-       ret  = proc_dointvec(table, write, file, buffer, lenp, ppos);
+       ret  = proc_dointvec(table, write, buffer, lenp, ppos);
 
        if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
                goto out;
index 0f6facb..8504ac7 100644 (file)
@@ -296,14 +296,14 @@ static const struct file_operations stack_trace_fops = {
 
 int
 stack_trace_sysctl(struct ctl_table *table, int write,
-                  struct file *file, void __user *buffer, size_t *lenp,
+                  void __user *buffer, size_t *lenp,
                   loff_t *ppos)
 {
        int ret;
 
        mutex_lock(&stack_sysctl_mutex);
 
-       ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
+       ret = proc_dointvec(table, write, buffer, lenp, ppos);
 
        if (ret || !write ||
            (last_stack_tracer_enabled == !!stack_tracer_enabled))
index 92359cc..69eae35 100644 (file)
@@ -42,14 +42,14 @@ static void put_uts(ctl_table *table, int write, void *which)
  *     Special case of dostring for the UTS structure. This has locks
  *     to observe. Should this be in kernel/sys.c ????
  */
-static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
+static int proc_do_uts_string(ctl_table *table, int write,
                  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        struct ctl_table uts_table;
        int r;
        memcpy(&uts_table, table, sizeof(uts_table));
        uts_table.data = get_uts(table, write);
-       r = proc_dostring(&uts_table,write,filp,buffer,lenp, ppos);
+       r = proc_dostring(&uts_table,write,buffer,lenp, ppos);
        put_uts(table, write, uts_table.data);
        return r;
 }
index 68dfce5..fc686c7 100644 (file)
 
 #define GZIP_IOBUF_SIZE (16*1024)
 
+static int nofill(void *buffer, unsigned int len)
+{
+       return -1;
+}
+
 /* Included from initramfs et al code */
 STATIC int INIT gunzip(unsigned char *buf, int len,
                       int(*fill)(void*, unsigned int),
@@ -76,6 +81,9 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
                goto gunzip_nomem4;
        }
 
+       if (!fill)
+               fill = nofill;
+
        if (len == 0)
                len = fill(zbuf, GZIP_IOBUF_SIZE);
 
index 0b954e0..ca82fde 100644 (file)
@@ -82,6 +82,11 @@ struct rc {
 #define RC_MODEL_TOTAL_BITS 11
 
 
+static int nofill(void *buffer, unsigned int len)
+{
+       return -1;
+}
+
 /* Called twice: once at startup and once in rc_normalize() */
 static void INIT rc_read(struct rc *rc)
 {
@@ -97,7 +102,10 @@ static inline void INIT rc_init(struct rc *rc,
                                       int (*fill)(void*, unsigned int),
                                       char *buffer, int buffer_size)
 {
-       rc->fill = fill;
+       if (fill)
+               rc->fill = fill;
+       else
+               rc->fill = nofill;
        rc->buffer = (uint8_t *)buffer;
        rc->buffer_size = buffer_size;
        rc->buffer_end = rc->buffer + rc->buffer_size;
index 71eb0b4..2477607 100644 (file)
@@ -245,6 +245,20 @@ config DEFAULT_MMAP_MIN_ADDR
          /proc/sys/vm/mmap_min_addr tunable.
 
 
+config MEMORY_FAILURE
+       depends on MMU
+       depends on X86_MCE
+       bool "Enable recovery from hardware memory errors"
+       help
+         Enables code to recover from some memory failures on systems
+         with MCA recovery. This allows a system to continue running
+         even when some of its memory has uncorrected errors. This requires
+         special hardware support and typically ECC memory.
+
+config HWPOISON_INJECT
+       tristate "Poison pages injector"
+       depends on MEMORY_FAILURE && DEBUG_KERNEL
+
 config NOMMU_INITIAL_TRIM_EXCESS
        int "Turn on mmap() excess space trimming before booting"
        depends on !MMU
index 88193d7..ebf8490 100644 (file)
@@ -5,14 +5,14 @@
 mmu-y                  := nommu.o
 mmu-$(CONFIG_MMU)      := fremap.o highmem.o madvise.o memory.o mincore.o \
                           mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
-                          vmalloc.o
+                          vmalloc.o pagewalk.o
 
 obj-y                  := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
                           maccess.o page_alloc.o page-writeback.o \
                           readahead.o swap.o truncate.o vmscan.o shmem.o \
                           prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
                           page_isolation.o mm_init.o mmu_context.o \
-                          pagewalk.o $(mmu-y)
+                          $(mmu-y)
 obj-y += init-mm.o
 
 obj-$(CONFIG_BOUNCE)   += bounce.o
@@ -41,5 +41,7 @@ obj-$(CONFIG_SMP) += allocpercpu.o
 endif
 obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
+obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
+obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
 obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
 obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
index bcc7372..6c84e59 100644 (file)
@@ -58,7 +58,7 @@
 /*
  * Lock ordering:
  *
- *  ->i_mmap_lock              (vmtruncate)
+ *  ->i_mmap_lock              (truncate_pagecache)
  *    ->private_lock           (__free_pte->__set_page_dirty_buffers)
  *      ->swap_lock            (exclusive_swap_page, others)
  *        ->mapping->tree_lock
  *
  *  ->task->proc_lock
  *    ->dcache_lock            (proc_pid_lookup)
+ *
+ *  (code doesn't rely on that order, so you could switch it around)
+ *  ->tasklist_lock             (memory_failure, collect_procs_ao)
+ *    ->i_mmap_lock
  */
 
 /*
index 815dbd4..6f048fc 100644 (file)
@@ -1537,7 +1537,7 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
 
 #ifdef CONFIG_SYSCTL
 int hugetlb_sysctl_handler(struct ctl_table *table, int write,
-                          struct file *file, void __user *buffer,
+                          void __user *buffer,
                           size_t *length, loff_t *ppos)
 {
        struct hstate *h = &default_hstate;
@@ -1548,7 +1548,7 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
 
        table->data = &tmp;
        table->maxlen = sizeof(unsigned long);
-       proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+       proc_doulongvec_minmax(table, write, buffer, length, ppos);
 
        if (write)
                h->max_huge_pages = set_max_huge_pages(h, tmp);
@@ -1557,10 +1557,10 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
 }
 
 int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
-                       struct file *file, void __user *buffer,
+                       void __user *buffer,
                        size_t *length, loff_t *ppos)
 {
-       proc_dointvec(table, write, file, buffer, length, ppos);
+       proc_dointvec(table, write, buffer, length, ppos);
        if (hugepages_treat_as_movable)
                htlb_alloc_mask = GFP_HIGHUSER_MOVABLE;
        else
@@ -1569,7 +1569,7 @@ int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
 }
 
 int hugetlb_overcommit_handler(struct ctl_table *table, int write,
-                       struct file *file, void __user *buffer,
+                       void __user *buffer,
                        size_t *length, loff_t *ppos)
 {
        struct hstate *h = &default_hstate;
@@ -1580,7 +1580,7 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
 
        table->data = &tmp;
        table->maxlen = sizeof(unsigned long);
-       proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+       proc_doulongvec_minmax(table, write, buffer, length, ppos);
 
        if (write) {
                spin_lock(&hugetlb_lock);
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
new file mode 100644 (file)
index 0000000..e1d8513
--- /dev/null
@@ -0,0 +1,41 @@
+/* Inject a hwpoison memory failure on a arbitary pfn */
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+static struct dentry *hwpoison_dir, *corrupt_pfn;
+
+static int hwpoison_inject(void *data, u64 val)
+{
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       printk(KERN_INFO "Injecting memory failure at pfn %Lx\n", val);
+       return __memory_failure(val, 18, 0);
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(hwpoison_fops, NULL, hwpoison_inject, "%lli\n");
+
+static void pfn_inject_exit(void)
+{
+       if (hwpoison_dir)
+               debugfs_remove_recursive(hwpoison_dir);
+}
+
+static int pfn_inject_init(void)
+{
+       hwpoison_dir = debugfs_create_dir("hwpoison", NULL);
+       if (hwpoison_dir == NULL)
+               return -ENOMEM;
+       corrupt_pfn = debugfs_create_file("corrupt-pfn", 0600, hwpoison_dir,
+                                         NULL, &hwpoison_fops);
+       if (corrupt_pfn == NULL) {
+               pfn_inject_exit();
+               return -ENOMEM;
+       }
+       return 0;
+}
+
+module_init(pfn_inject_init);
+module_exit(pfn_inject_exit);
+MODULE_LICENSE("GPL");
index 37cc373..f7edac3 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -30,6 +30,7 @@
 #include <linux/slab.h>
 #include <linux/rbtree.h>
 #include <linux/mmu_notifier.h>
+#include <linux/swap.h>
 #include <linux/ksm.h>
 
 #include <asm/tlbflush.h>
@@ -162,10 +163,10 @@ static unsigned long ksm_pages_unshared;
 static unsigned long ksm_rmap_items;
 
 /* Limit on the number of unswappable pages used */
-static unsigned long ksm_max_kernel_pages = 2000;
+static unsigned long ksm_max_kernel_pages;
 
 /* Number of pages ksmd should scan in one batch */
-static unsigned int ksm_thread_pages_to_scan = 200;
+static unsigned int ksm_thread_pages_to_scan = 100;
 
 /* Milliseconds ksmd should sleep between batches */
 static unsigned int ksm_thread_sleep_millisecs = 20;
@@ -173,7 +174,7 @@ static unsigned int ksm_thread_sleep_millisecs = 20;
 #define KSM_RUN_STOP   0
 #define KSM_RUN_MERGE  1
 #define KSM_RUN_UNMERGE        2
-static unsigned int ksm_run = KSM_RUN_MERGE;
+static unsigned int ksm_run = KSM_RUN_STOP;
 
 static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
 static DEFINE_MUTEX(ksm_thread_mutex);
@@ -183,6 +184,11 @@ static DEFINE_SPINLOCK(ksm_mmlist_lock);
                sizeof(struct __struct), __alignof__(struct __struct),\
                (__flags), NULL)
 
+static void __init ksm_init_max_kernel_pages(void)
+{
+       ksm_max_kernel_pages = nr_free_buffer_pages() / 4;
+}
+
 static int __init ksm_slab_init(void)
 {
        rmap_item_cache = KSM_KMEM_CACHE(rmap_item, 0);
@@ -1667,6 +1673,8 @@ static int __init ksm_init(void)
        struct task_struct *ksm_thread;
        int err;
 
+       ksm_init_max_kernel_pages();
+
        err = ksm_slab_init();
        if (err)
                goto out;
index d9ae206..35b1479 100644 (file)
@@ -218,6 +218,32 @@ static long madvise_remove(struct vm_area_struct *vma,
        return error;
 }
 
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Error injection support for memory error handling.
+ */
+static int madvise_hwpoison(unsigned long start, unsigned long end)
+{
+       int ret = 0;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       for (; start < end; start += PAGE_SIZE) {
+               struct page *p;
+               int ret = get_user_pages(current, current->mm, start, 1,
+                                               0, 0, &p, NULL);
+               if (ret != 1)
+                       return ret;
+               printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
+                      page_to_pfn(p), start);
+               /* Ignore return value for now */
+               __memory_failure(page_to_pfn(p), 0, 1);
+               put_page(p);
+       }
+       return ret;
+}
+#endif
+
 static long
 madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
                unsigned long start, unsigned long end, int behavior)
@@ -308,6 +334,10 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
        int write;
        size_t len;
 
+#ifdef CONFIG_MEMORY_FAILURE
+       if (behavior == MADV_HWPOISON)
+               return madvise_hwpoison(start, start+len_in);
+#endif
        if (!madvise_behavior_valid(behavior))
                return error;
 
index 9b10d87..e2b98a6 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/rcupdate.h>
 #include <linux/limits.h>
 #include <linux/mutex.h>
+#include <linux/rbtree.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/spinlock.h>
@@ -43,6 +44,7 @@
 
 struct cgroup_subsys mem_cgroup_subsys __read_mostly;
 #define MEM_CGROUP_RECLAIM_RETRIES     5
+struct mem_cgroup *root_mem_cgroup __read_mostly;
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */
@@ -53,6 +55,7 @@ static int really_do_swap_account __initdata = 1; /* for remember boot option*/
 #endif
 
 static DEFINE_MUTEX(memcg_tasklist);   /* can be hold under cgroup_mutex */
+#define SOFTLIMIT_EVENTS_THRESH (1000)
 
 /*
  * Statistics for memory cgroup.
@@ -66,6 +69,8 @@ enum mem_cgroup_stat_index {
        MEM_CGROUP_STAT_MAPPED_FILE,  /* # of pages charged as file rss */
        MEM_CGROUP_STAT_PGPGIN_COUNT,   /* # of pages paged in */
        MEM_CGROUP_STAT_PGPGOUT_COUNT,  /* # of pages paged out */
+       MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */
+       MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */
 
        MEM_CGROUP_STAT_NSTATS,
 };
@@ -78,6 +83,20 @@ struct mem_cgroup_stat {
        struct mem_cgroup_stat_cpu cpustat[0];
 };
 
+static inline void
+__mem_cgroup_stat_reset_safe(struct mem_cgroup_stat_cpu *stat,
+                               enum mem_cgroup_stat_index idx)
+{
+       stat->count[idx] = 0;
+}
+
+static inline s64
+__mem_cgroup_stat_read_local(struct mem_cgroup_stat_cpu *stat,
+                               enum mem_cgroup_stat_index idx)
+{
+       return stat->count[idx];
+}
+
 /*
  * For accounting under irq disable, no need for increment preempt count.
  */
@@ -117,6 +136,12 @@ struct mem_cgroup_per_zone {
        unsigned long           count[NR_LRU_LISTS];
 
        struct zone_reclaim_stat reclaim_stat;
+       struct rb_node          tree_node;      /* RB tree node */
+       unsigned long long      usage_in_excess;/* Set to the value by which */
+                                               /* the soft limit is exceeded*/
+       bool                    on_tree;
+       struct mem_cgroup       *mem;           /* Back pointer, we cannot */
+                                               /* use container_of        */
 };
 /* Macro for accessing counter */
 #define MEM_CGROUP_ZSTAT(mz, idx)      ((mz)->count[(idx)])
@@ -129,6 +154,26 @@ struct mem_cgroup_lru_info {
        struct mem_cgroup_per_node *nodeinfo[MAX_NUMNODES];
 };
 
+/*
+ * Cgroups above their limits are maintained in a RB-Tree, independent of
+ * their hierarchy representation
+ */
+
+struct mem_cgroup_tree_per_zone {
+       struct rb_root rb_root;
+       spinlock_t lock;
+};
+
+struct mem_cgroup_tree_per_node {
+       struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES];
+};
+
+struct mem_cgroup_tree {
+       struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
+};
+
+static struct mem_cgroup_tree soft_limit_tree __read_mostly;
+
 /*
  * The memory controller data structure. The memory controller controls both
  * page cache and RSS per cgroup. We would eventually like to provide
@@ -186,6 +231,13 @@ struct mem_cgroup {
        struct mem_cgroup_stat stat;
 };
 
+/*
+ * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft
+ * limit reclaim to prevent infinite loops, if they ever occur.
+ */
+#define        MEM_CGROUP_MAX_RECLAIM_LOOPS            (100)
+#define        MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS (2)
+
 enum charge_type {
        MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
        MEM_CGROUP_CHARGE_TYPE_MAPPED,
@@ -200,13 +252,8 @@ enum charge_type {
 #define PCGF_CACHE     (1UL << PCG_CACHE)
 #define PCGF_USED      (1UL << PCG_USED)
 #define PCGF_LOCK      (1UL << PCG_LOCK)
-static const unsigned long
-pcg_default_flags[NR_CHARGE_TYPE] = {
-       PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* File Cache */
-       PCGF_USED | PCGF_LOCK, /* Anon */
-       PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */
-       0, /* FORCE */
-};
+/* Not used, but added here for completeness */
+#define PCGF_ACCT      (1UL << PCG_ACCT)
 
 /* for encoding cft->private value on file */
 #define _MEM                   (0)
@@ -215,15 +262,241 @@ pcg_default_flags[NR_CHARGE_TYPE] = {
 #define MEMFILE_TYPE(val)      (((val) >> 16) & 0xffff)
 #define MEMFILE_ATTR(val)      ((val) & 0xffff)
 
+/*
+ * Reclaim flags for mem_cgroup_hierarchical_reclaim
+ */
+#define MEM_CGROUP_RECLAIM_NOSWAP_BIT  0x0
+#define MEM_CGROUP_RECLAIM_NOSWAP      (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT)
+#define MEM_CGROUP_RECLAIM_SHRINK_BIT  0x1
+#define MEM_CGROUP_RECLAIM_SHRINK      (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
+#define MEM_CGROUP_RECLAIM_SOFT_BIT    0x2
+#define MEM_CGROUP_RECLAIM_SOFT                (1 << MEM_CGROUP_RECLAIM_SOFT_BIT)
+
 static void mem_cgroup_get(struct mem_cgroup *mem);
 static void mem_cgroup_put(struct mem_cgroup *mem);
 static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem);
 
+static struct mem_cgroup_per_zone *
+mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
+{
+       return &mem->info.nodeinfo[nid]->zoneinfo[zid];
+}
+
+static struct mem_cgroup_per_zone *
+page_cgroup_zoneinfo(struct page_cgroup *pc)
+{
+       struct mem_cgroup *mem = pc->mem_cgroup;
+       int nid = page_cgroup_nid(pc);
+       int zid = page_cgroup_zid(pc);
+
+       if (!mem)
+               return NULL;
+
+       return mem_cgroup_zoneinfo(mem, nid, zid);
+}
+
+static struct mem_cgroup_tree_per_zone *
+soft_limit_tree_node_zone(int nid, int zid)
+{
+       return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+}
+
+static struct mem_cgroup_tree_per_zone *
+soft_limit_tree_from_page(struct page *page)
+{
+       int nid = page_to_nid(page);
+       int zid = page_zonenum(page);
+
+       return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+}
+
+static void
+__mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
+                               struct mem_cgroup_per_zone *mz,
+                               struct mem_cgroup_tree_per_zone *mctz)
+{
+       struct rb_node **p = &mctz->rb_root.rb_node;
+       struct rb_node *parent = NULL;
+       struct mem_cgroup_per_zone *mz_node;
+
+       if (mz->on_tree)
+               return;
+
+       mz->usage_in_excess = res_counter_soft_limit_excess(&mem->res);
+       while (*p) {
+               parent = *p;
+               mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
+                                       tree_node);
+               if (mz->usage_in_excess < mz_node->usage_in_excess)
+                       p = &(*p)->rb_left;
+               /*
+                * We can't avoid mem cgroups that are over their soft
+                * limit by the same amount
+                */
+               else if (mz->usage_in_excess >= mz_node->usage_in_excess)
+                       p = &(*p)->rb_right;
+       }
+       rb_link_node(&mz->tree_node, parent, p);
+       rb_insert_color(&mz->tree_node, &mctz->rb_root);
+       mz->on_tree = true;
+}
+
+static void
+__mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
+                               struct mem_cgroup_per_zone *mz,
+                               struct mem_cgroup_tree_per_zone *mctz)
+{
+       if (!mz->on_tree)
+               return;
+       rb_erase(&mz->tree_node, &mctz->rb_root);
+       mz->on_tree = false;
+}
+
+static void
+mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
+                               struct mem_cgroup_per_zone *mz,
+                               struct mem_cgroup_tree_per_zone *mctz)
+{
+       spin_lock(&mctz->lock);
+       __mem_cgroup_insert_exceeded(mem, mz, mctz);
+       spin_unlock(&mctz->lock);
+}
+
+static void
+mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
+                               struct mem_cgroup_per_zone *mz,
+                               struct mem_cgroup_tree_per_zone *mctz)
+{
+       spin_lock(&mctz->lock);
+       __mem_cgroup_remove_exceeded(mem, mz, mctz);
+       spin_unlock(&mctz->lock);
+}
+
+static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem)
+{
+       bool ret = false;
+       int cpu;
+       s64 val;
+       struct mem_cgroup_stat_cpu *cpustat;
+
+       cpu = get_cpu();
+       cpustat = &mem->stat.cpustat[cpu];
+       val = __mem_cgroup_stat_read_local(cpustat, MEM_CGROUP_STAT_EVENTS);
+       if (unlikely(val > SOFTLIMIT_EVENTS_THRESH)) {
+               __mem_cgroup_stat_reset_safe(cpustat, MEM_CGROUP_STAT_EVENTS);
+               ret = true;
+       }
+       put_cpu();
+       return ret;
+}
+
+static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
+{
+       unsigned long long prev_usage_in_excess, new_usage_in_excess;
+       bool updated_tree = false;
+       struct mem_cgroup_per_zone *mz;
+       struct mem_cgroup_tree_per_zone *mctz;
+
+       mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page));
+       mctz = soft_limit_tree_from_page(page);
+
+       /*
+        * We do updates in lazy mode, mem's are removed
+        * lazily from the per-zone, per-node rb tree
+        */
+       prev_usage_in_excess = mz->usage_in_excess;
+
+       new_usage_in_excess = res_counter_soft_limit_excess(&mem->res);
+       if (prev_usage_in_excess) {
+               mem_cgroup_remove_exceeded(mem, mz, mctz);
+               updated_tree = true;
+       }
+       if (!new_usage_in_excess)
+               goto done;
+       mem_cgroup_insert_exceeded(mem, mz, mctz);
+
+done:
+       if (updated_tree) {
+               spin_lock(&mctz->lock);
+               mz->usage_in_excess = new_usage_in_excess;
+               spin_unlock(&mctz->lock);
+       }
+}
+
+static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem)
+{
+       int node, zone;
+       struct mem_cgroup_per_zone *mz;
+       struct mem_cgroup_tree_per_zone *mctz;
+
+       for_each_node_state(node, N_POSSIBLE) {
+               for (zone = 0; zone < MAX_NR_ZONES; zone++) {
+                       mz = mem_cgroup_zoneinfo(mem, node, zone);
+                       mctz = soft_limit_tree_node_zone(node, zone);
+                       mem_cgroup_remove_exceeded(mem, mz, mctz);
+               }
+       }
+}
+
+static inline unsigned long mem_cgroup_get_excess(struct mem_cgroup *mem)
+{
+       return res_counter_soft_limit_excess(&mem->res) >> PAGE_SHIFT;
+}
+
+static struct mem_cgroup_per_zone *
+__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
+{
+       struct rb_node *rightmost = NULL;
+       struct mem_cgroup_per_zone *mz = NULL;
+
+retry:
+       rightmost = rb_last(&mctz->rb_root);
+       if (!rightmost)
+               goto done;              /* Nothing to reclaim from */
+
+       mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node);
+       /*
+        * Remove the node now but someone else can add it back,
+        * we will to add it back at the end of reclaim to its correct
+        * position in the tree.
+        */
+       __mem_cgroup_remove_exceeded(mz->mem, mz, mctz);
+       if (!res_counter_soft_limit_excess(&mz->mem->res) ||
+               !css_tryget(&mz->mem->css))
+               goto retry;
+done:
+       return mz;
+}
+
+static struct mem_cgroup_per_zone *
+mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
+{
+       struct mem_cgroup_per_zone *mz;
+
+       spin_lock(&mctz->lock);
+       mz = __mem_cgroup_largest_soft_limit_node(mctz);
+       spin_unlock(&mctz->lock);
+       return mz;
+}
+
+static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
+                                        bool charge)
+{
+       int val = (charge) ? 1 : -1;
+       struct mem_cgroup_stat *stat = &mem->stat;
+       struct mem_cgroup_stat_cpu *cpustat;
+       int cpu = get_cpu();
+
+       cpustat = &stat->cpustat[cpu];
+       __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_SWAPOUT, val);
+       put_cpu();
+}
+
 static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
                                         struct page_cgroup *pc,
                                         bool charge)
 {
-       int val = (charge)? 1 : -1;
+       int val = (charge) ? 1 : -1;
        struct mem_cgroup_stat *stat = &mem->stat;
        struct mem_cgroup_stat_cpu *cpustat;
        int cpu = get_cpu();
@@ -240,28 +513,10 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
        else
                __mem_cgroup_stat_add_safe(cpustat,
                                MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
+       __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_EVENTS, 1);
        put_cpu();
 }
 
-static struct mem_cgroup_per_zone *
-mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
-{
-       return &mem->info.nodeinfo[nid]->zoneinfo[zid];
-}
-
-static struct mem_cgroup_per_zone *
-page_cgroup_zoneinfo(struct page_cgroup *pc)
-{
-       struct mem_cgroup *mem = pc->mem_cgroup;
-       int nid = page_cgroup_nid(pc);
-       int zid = page_cgroup_zid(pc);
-
-       if (!mem)
-               return NULL;
-
-       return mem_cgroup_zoneinfo(mem, nid, zid);
-}
-
 static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem,
                                        enum lru_list idx)
 {
@@ -354,6 +609,11 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data,
        return ret;
 }
 
+static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
+{
+       return (mem == root_mem_cgroup);
+}
+
 /*
  * Following LRU functions are allowed to be used without PCG_LOCK.
  * Operations are called by routine of global LRU independently from memcg.
@@ -371,22 +631,24 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data,
 void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
 {
        struct page_cgroup *pc;
-       struct mem_cgroup *mem;
        struct mem_cgroup_per_zone *mz;
 
        if (mem_cgroup_disabled())
                return;
        pc = lookup_page_cgroup(page);
        /* can happen while we handle swapcache. */
-       if (list_empty(&pc->lru) || !pc->mem_cgroup)
+       if (!TestClearPageCgroupAcctLRU(pc))
                return;
+       VM_BUG_ON(!pc->mem_cgroup);
        /*
         * We don't check PCG_USED bit. It's cleared when the "page" is finally
         * removed from global LRU.
         */
        mz = page_cgroup_zoneinfo(pc);
-       mem = pc->mem_cgroup;
        MEM_CGROUP_ZSTAT(mz, lru) -= 1;
+       if (mem_cgroup_is_root(pc->mem_cgroup))
+               return;
+       VM_BUG_ON(list_empty(&pc->lru));
        list_del_init(&pc->lru);
        return;
 }
@@ -410,8 +672,8 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
         * For making pc->mem_cgroup visible, insert smp_rmb() here.
         */
        smp_rmb();
-       /* unused page is not rotated. */
-       if (!PageCgroupUsed(pc))
+       /* unused or root page is not rotated. */
+       if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup))
                return;
        mz = page_cgroup_zoneinfo(pc);
        list_move(&pc->lru, &mz->lists[lru]);
@@ -425,6 +687,7 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
        if (mem_cgroup_disabled())
                return;
        pc = lookup_page_cgroup(page);
+       VM_BUG_ON(PageCgroupAcctLRU(pc));
        /*
         * Used bit is set without atomic ops but after smp_wmb().
         * For making pc->mem_cgroup visible, insert smp_rmb() here.
@@ -435,6 +698,9 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
 
        mz = page_cgroup_zoneinfo(pc);
        MEM_CGROUP_ZSTAT(mz, lru) += 1;
+       SetPageCgroupAcctLRU(pc);
+       if (mem_cgroup_is_root(pc->mem_cgroup))
+               return;
        list_add(&pc->lru, &mz->lists[lru]);
 }
 
@@ -469,7 +735,7 @@ static void mem_cgroup_lru_add_after_commit_swapcache(struct page *page)
 
        spin_lock_irqsave(&zone->lru_lock, flags);
        /* link when the page is linked to LRU but page_cgroup isn't */
-       if (PageLRU(page) && list_empty(&pc->lru))
+       if (PageLRU(page) && !PageCgroupAcctLRU(pc))
                mem_cgroup_add_lru_list(page, page_lru(page));
        spin_unlock_irqrestore(&zone->lru_lock, flags);
 }
@@ -855,28 +1121,62 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
  * If shrink==true, for avoiding to free too much, this returns immedieately.
  */
 static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
-                                  gfp_t gfp_mask, bool noswap, bool shrink)
+                                               struct zone *zone,
+                                               gfp_t gfp_mask,
+                                               unsigned long reclaim_options)
 {
        struct mem_cgroup *victim;
        int ret, total = 0;
        int loop = 0;
+       bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
+       bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
+       bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
+       unsigned long excess = mem_cgroup_get_excess(root_mem);
 
        /* If memsw_is_minimum==1, swap-out is of-no-use. */
        if (root_mem->memsw_is_minimum)
                noswap = true;
 
-       while (loop < 2) {
+       while (1) {
                victim = mem_cgroup_select_victim(root_mem);
-               if (victim == root_mem)
+               if (victim == root_mem) {
                        loop++;
+                       if (loop >= 2) {
+                               /*
+                                * If we have not been able to reclaim
+                                * anything, it might because there are
+                                * no reclaimable pages under this hierarchy
+                                */
+                               if (!check_soft || !total) {
+                                       css_put(&victim->css);
+                                       break;
+                               }
+                               /*
+                                * We want to do more targetted reclaim.
+                                * excess >> 2 is not to excessive so as to
+                                * reclaim too much, nor too less that we keep
+                                * coming back to reclaim from this cgroup
+                                */
+                               if (total >= (excess >> 2) ||
+                                       (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) {
+                                       css_put(&victim->css);
+                                       break;
+                               }
+                       }
+               }
                if (!mem_cgroup_local_usage(&victim->stat)) {
                        /* this cgroup's local usage == 0 */
                        css_put(&victim->css);
                        continue;
                }
                /* we use swappiness of local cgroup */
-               ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, noswap,
-                                                  get_swappiness(victim));
+               if (check_soft)
+                       ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
+                               noswap, get_swappiness(victim), zone,
+                               zone->zone_pgdat->node_id);
+               else
+                       ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
+                                               noswap, get_swappiness(victim));
                css_put(&victim->css);
                /*
                 * At shrinking usage, we can't check we should stop here or
@@ -886,7 +1186,10 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
                if (shrink)
                        return ret;
                total += ret;
-               if (mem_cgroup_check_under_limit(root_mem))
+               if (check_soft) {
+                       if (res_counter_check_under_soft_limit(&root_mem->res))
+                               return total;
+               } else if (mem_cgroup_check_under_limit(root_mem))
                        return 1 + total;
        }
        return total;
@@ -965,11 +1268,11 @@ done:
  */
 static int __mem_cgroup_try_charge(struct mm_struct *mm,
                        gfp_t gfp_mask, struct mem_cgroup **memcg,
-                       bool oom)
+                       bool oom, struct page *page)
 {
-       struct mem_cgroup *mem, *mem_over_limit;
+       struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit;
        int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
-       struct res_counter *fail_res;
+       struct res_counter *fail_res, *soft_fail_res = NULL;
 
        if (unlikely(test_thread_flag(TIF_MEMDIE))) {
                /* Don't account this! */
@@ -996,20 +1299,23 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
        VM_BUG_ON(css_is_removed(&mem->css));
 
        while (1) {
-               int ret;
-               bool noswap = false;
+               int ret = 0;
+               unsigned long flags = 0;
 
-               ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);
+               if (mem_cgroup_is_root(mem))
+                       goto done;
+               ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res,
+                                               &soft_fail_res);
                if (likely(!ret)) {
                        if (!do_swap_account)
                                break;
                        ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
-                                                       &fail_res);
+                                                       &fail_res, NULL);
                        if (likely(!ret))
                                break;
                        /* mem+swap counter fails */
-                       res_counter_uncharge(&mem->res, PAGE_SIZE);
-                       noswap = true;
+                       res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+                       flags |= MEM_CGROUP_RECLAIM_NOSWAP;
                        mem_over_limit = mem_cgroup_from_res_counter(fail_res,
                                                                        memsw);
                } else
@@ -1020,8 +1326,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
                if (!(gfp_mask & __GFP_WAIT))
                        goto nomem;
 
-               ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask,
-                                                       noswap, false);
+               ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
+                                               gfp_mask, flags);
                if (ret)
                        continue;
 
@@ -1046,13 +1352,24 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
                        goto nomem;
                }
        }
+       /*
+        * Insert just the ancestor, we should trickle down to the correct
+        * cgroup for reclaim, since the other nodes will be below their
+        * soft limit
+        */
+       if (soft_fail_res) {
+               mem_over_soft_limit =
+                       mem_cgroup_from_res_counter(soft_fail_res, res);
+               if (mem_cgroup_soft_limit_check(mem_over_soft_limit))
+                       mem_cgroup_update_tree(mem_over_soft_limit, page);
+       }
+done:
        return 0;
 nomem:
        css_put(&mem->css);
        return -ENOMEM;
 }
 
-
 /*
  * A helper function to get mem_cgroup from ID. must be called under
  * rcu_read_lock(). The caller must check css_is_removed() or some if
@@ -1119,15 +1436,38 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
        lock_page_cgroup(pc);
        if (unlikely(PageCgroupUsed(pc))) {
                unlock_page_cgroup(pc);
-               res_counter_uncharge(&mem->res, PAGE_SIZE);
-               if (do_swap_account)
-                       res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+               if (!mem_cgroup_is_root(mem)) {
+                       res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+                       if (do_swap_account)
+                               res_counter_uncharge(&mem->memsw, PAGE_SIZE,
+                                                       NULL);
+               }
                css_put(&mem->css);
                return;
        }
+
        pc->mem_cgroup = mem;
+       /*
+        * We access a page_cgroup asynchronously without lock_page_cgroup().
+        * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
+        * is accessed after testing USED bit. To make pc->mem_cgroup visible
+        * before USED bit, we need memory barrier here.
+        * See mem_cgroup_add_lru_list(), etc.
+        */
        smp_wmb();
-       pc->flags = pcg_default_flags[ctype];
+       switch (ctype) {
+       case MEM_CGROUP_CHARGE_TYPE_CACHE:
+       case MEM_CGROUP_CHARGE_TYPE_SHMEM:
+               SetPageCgroupCache(pc);
+               SetPageCgroupUsed(pc);
+               break;
+       case MEM_CGROUP_CHARGE_TYPE_MAPPED:
+               ClearPageCgroupCache(pc);
+               SetPageCgroupUsed(pc);
+               break;
+       default:
+               break;
+       }
 
        mem_cgroup_charge_statistics(mem, pc, true);
 
@@ -1178,7 +1518,8 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
        if (pc->mem_cgroup != from)
                goto out;
 
-       res_counter_uncharge(&from->res, PAGE_SIZE);
+       if (!mem_cgroup_is_root(from))
+               res_counter_uncharge(&from->res, PAGE_SIZE, NULL);
        mem_cgroup_charge_statistics(from, pc, false);
 
        page = pc->page;
@@ -1197,8 +1538,8 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
                                                1);
        }
 
-       if (do_swap_account)
-               res_counter_uncharge(&from->memsw, PAGE_SIZE);
+       if (do_swap_account && !mem_cgroup_is_root(from))
+               res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL);
        css_put(&from->css);
 
        css_get(&to->css);
@@ -1238,7 +1579,7 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
        parent = mem_cgroup_from_cont(pcg);
 
 
-       ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false);
+       ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, page);
        if (ret || !parent)
                return ret;
 
@@ -1268,9 +1609,11 @@ uncharge:
        /* drop extra refcnt by try_charge() */
        css_put(&parent->css);
        /* uncharge if move fails */
-       res_counter_uncharge(&parent->res, PAGE_SIZE);
-       if (do_swap_account)
-               res_counter_uncharge(&parent->memsw, PAGE_SIZE);
+       if (!mem_cgroup_is_root(parent)) {
+               res_counter_uncharge(&parent->res, PAGE_SIZE, NULL);
+               if (do_swap_account)
+                       res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL);
+       }
        return ret;
 }
 
@@ -1295,7 +1638,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
        prefetchw(pc);
 
        mem = memcg;
-       ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true);
+       ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page);
        if (ret || !mem)
                return ret;
 
@@ -1414,14 +1757,14 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
        if (!mem)
                goto charge_cur_mm;
        *ptr = mem;
-       ret = __mem_cgroup_try_charge(NULL, mask, ptr, true);
+       ret = __mem_cgroup_try_charge(NULL, mask, ptr, true, page);
        /* drop extra refcnt from tryget */
        css_put(&mem->css);
        return ret;
 charge_cur_mm:
        if (unlikely(!mm))
                mm = &init_mm;
-       return __mem_cgroup_try_charge(mm, mask, ptr, true);
+       return __mem_cgroup_try_charge(mm, mask, ptr, true, page);
 }
 
 static void
@@ -1459,7 +1802,10 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
                         * This recorded memcg can be obsolete one. So, avoid
                         * calling css_tryget
                         */
-                       res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+                       if (!mem_cgroup_is_root(memcg))
+                               res_counter_uncharge(&memcg->memsw, PAGE_SIZE,
+                                                       NULL);
+                       mem_cgroup_swap_statistics(memcg, false);
                        mem_cgroup_put(memcg);
                }
                rcu_read_unlock();
@@ -1484,9 +1830,11 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
                return;
        if (!mem)
                return;
-       res_counter_uncharge(&mem->res, PAGE_SIZE);
-       if (do_swap_account)
-               res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+       if (!mem_cgroup_is_root(mem)) {
+               res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+               if (do_swap_account)
+                       res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+       }
        css_put(&mem->css);
 }
 
@@ -1500,6 +1848,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
        struct page_cgroup *pc;
        struct mem_cgroup *mem = NULL;
        struct mem_cgroup_per_zone *mz;
+       bool soft_limit_excess = false;
 
        if (mem_cgroup_disabled())
                return NULL;
@@ -1538,9 +1887,14 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
                break;
        }
 
-       res_counter_uncharge(&mem->res, PAGE_SIZE);
-       if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
-               res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+       if (!mem_cgroup_is_root(mem)) {
+               res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
+               if (do_swap_account &&
+                               (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
+                       res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+       }
+       if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
+               mem_cgroup_swap_statistics(mem, true);
        mem_cgroup_charge_statistics(mem, pc, false);
 
        ClearPageCgroupUsed(pc);
@@ -1554,6 +1908,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
        mz = page_cgroup_zoneinfo(pc);
        unlock_page_cgroup(pc);
 
+       if (soft_limit_excess && mem_cgroup_soft_limit_check(mem))
+               mem_cgroup_update_tree(mem, page);
        /* at swapout, this memcg will be accessed to record to swap */
        if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
                css_put(&mem->css);
@@ -1629,7 +1985,9 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
                 * We uncharge this because swap is freed.
                 * This memcg can be obsolete one. We avoid calling css_tryget
                 */
-               res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+               if (!mem_cgroup_is_root(memcg))
+                       res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
+               mem_cgroup_swap_statistics(memcg, false);
                mem_cgroup_put(memcg);
        }
        rcu_read_unlock();
@@ -1658,7 +2016,8 @@ int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
        unlock_page_cgroup(pc);
 
        if (mem) {
-               ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false);
+               ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false,
+                                               page);
                css_put(&mem->css);
        }
        *ptr = mem;
@@ -1798,8 +2157,9 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
                if (!ret)
                        break;
 
-               progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL,
-                                                  false, true);
+               progress = mem_cgroup_hierarchical_reclaim(memcg, NULL,
+                                               GFP_KERNEL,
+                                               MEM_CGROUP_RECLAIM_SHRINK);
                curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
                /* Usage is reduced ? */
                if (curusage >= oldusage)
@@ -1851,7 +2211,9 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
                if (!ret)
                        break;
 
-               mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL, true, true);
+               mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
+                                               MEM_CGROUP_RECLAIM_NOSWAP |
+                                               MEM_CGROUP_RECLAIM_SHRINK);
                curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
                /* Usage is reduced ? */
                if (curusage >= oldusage)
@@ -1862,6 +2224,97 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
        return ret;
 }
 
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+                                               gfp_t gfp_mask, int nid,
+                                               int zid)
+{
+       unsigned long nr_reclaimed = 0;
+       struct mem_cgroup_per_zone *mz, *next_mz = NULL;
+       unsigned long reclaimed;
+       int loop = 0;
+       struct mem_cgroup_tree_per_zone *mctz;
+
+       if (order > 0)
+               return 0;
+
+       mctz = soft_limit_tree_node_zone(nid, zid);
+       /*
+        * This loop can run a while, specially if mem_cgroup's continuously
+        * keep exceeding their soft limit and putting the system under
+        * pressure
+        */
+       do {
+               if (next_mz)
+                       mz = next_mz;
+               else
+                       mz = mem_cgroup_largest_soft_limit_node(mctz);
+               if (!mz)
+                       break;
+
+               reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
+                                               gfp_mask,
+                                               MEM_CGROUP_RECLAIM_SOFT);
+               nr_reclaimed += reclaimed;
+               spin_lock(&mctz->lock);
+
+               /*
+                * If we failed to reclaim anything from this memory cgroup
+                * it is time to move on to the next cgroup
+                */
+               next_mz = NULL;
+               if (!reclaimed) {
+                       do {
+                               /*
+                                * Loop until we find yet another one.
+                                *
+                                * By the time we get the soft_limit lock
+                                * again, someone might have aded the
+                                * group back on the RB tree. Iterate to
+                                * make sure we get a different mem.
+                                * mem_cgroup_largest_soft_limit_node returns
+                                * NULL if no other cgroup is present on
+                                * the tree
+                                */
+                               next_mz =
+                               __mem_cgroup_largest_soft_limit_node(mctz);
+                               if (next_mz == mz) {
+                                       css_put(&next_mz->mem->css);
+                                       next_mz = NULL;
+                               } else /* next_mz == NULL or other memcg */
+                                       break;
+                       } while (1);
+               }
+               mz->usage_in_excess =
+                       res_counter_soft_limit_excess(&mz->mem->res);
+               __mem_cgroup_remove_exceeded(mz->mem, mz, mctz);
+               /*
+                * One school of thought says that we should not add
+                * back the node to the tree if reclaim returns 0.
+                * But our reclaim could return 0, simply because due
+                * to priority we are exposing a smaller subset of
+                * memory to reclaim from. Consider this as a longer
+                * term TODO.
+                */
+               if (mz->usage_in_excess)
+                       __mem_cgroup_insert_exceeded(mz->mem, mz, mctz);
+               spin_unlock(&mctz->lock);
+               css_put(&mz->mem->css);
+               loop++;
+               /*
+                * Could not reclaim anything and there are no more
+                * mem cgroups to try or we seem to be looping without
+                * reclaiming anything.
+                */
+               if (!nr_reclaimed &&
+                       (next_mz == NULL ||
+                       loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
+                       break;
+       } while (!nr_reclaimed);
+       if (next_mz)
+               css_put(&next_mz->mem->css);
+       return nr_reclaimed;
+}
+
 /*
  * This routine traverse page_cgroup in given list and drop them all.
  * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
@@ -2046,20 +2499,64 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
        return retval;
 }
 
+struct mem_cgroup_idx_data {
+       s64 val;
+       enum mem_cgroup_stat_index idx;
+};
+
+static int
+mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data)
+{
+       struct mem_cgroup_idx_data *d = data;
+       d->val += mem_cgroup_read_stat(&mem->stat, d->idx);
+       return 0;
+}
+
+static void
+mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem,
+                               enum mem_cgroup_stat_index idx, s64 *val)
+{
+       struct mem_cgroup_idx_data d;
+       d.idx = idx;
+       d.val = 0;
+       mem_cgroup_walk_tree(mem, &d, mem_cgroup_get_idx_stat);
+       *val = d.val;
+}
+
 static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
 {
        struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
-       u64 val = 0;
+       u64 idx_val, val;
        int type, name;
 
        type = MEMFILE_TYPE(cft->private);
        name = MEMFILE_ATTR(cft->private);
        switch (type) {
        case _MEM:
-               val = res_counter_read_u64(&mem->res, name);
+               if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
+                       mem_cgroup_get_recursive_idx_stat(mem,
+                               MEM_CGROUP_STAT_CACHE, &idx_val);
+                       val = idx_val;
+                       mem_cgroup_get_recursive_idx_stat(mem,
+                               MEM_CGROUP_STAT_RSS, &idx_val);
+                       val += idx_val;
+                       val <<= PAGE_SHIFT;
+               } else
+                       val = res_counter_read_u64(&mem->res, name);
                break;
        case _MEMSWAP:
-               val = res_counter_read_u64(&mem->memsw, name);
+               if (name == RES_USAGE && mem_cgroup_is_root(mem)) {
+                       mem_cgroup_get_recursive_idx_stat(mem,
+                               MEM_CGROUP_STAT_CACHE, &idx_val);
+                       val = idx_val;
+                       mem_cgroup_get_recursive_idx_stat(mem,
+                               MEM_CGROUP_STAT_RSS, &idx_val);
+                       val += idx_val;
+                       mem_cgroup_get_recursive_idx_stat(mem,
+                               MEM_CGROUP_STAT_SWAPOUT, &idx_val);
+                       val <<= PAGE_SHIFT;
+               } else
+                       val = res_counter_read_u64(&mem->memsw, name);
                break;
        default:
                BUG();
@@ -2083,6 +2580,10 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
        name = MEMFILE_ATTR(cft->private);
        switch (name) {
        case RES_LIMIT:
+               if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */
+                       ret = -EINVAL;
+                       break;
+               }
                /* This function does all necessary parse...reuse it */
                ret = res_counter_memparse_write_strategy(buffer, &val);
                if (ret)
@@ -2092,6 +2593,20 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
                else
                        ret = mem_cgroup_resize_memsw_limit(memcg, val);
                break;
+       case RES_SOFT_LIMIT:
+               ret = res_counter_memparse_write_strategy(buffer, &val);
+               if (ret)
+                       break;
+               /*
+                * For memsw, soft limits are hard to implement in terms
+                * of semantics, for now, we support soft limits for
+                * control without swap
+                */
+               if (type == _MEM)
+                       ret = res_counter_set_soft_limit(&memcg->res, val);
+               else
+                       ret = -EINVAL;
+               break;
        default:
                ret = -EINVAL; /* should be BUG() ? */
                break;
@@ -2149,6 +2664,7 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
                        res_counter_reset_failcnt(&mem->memsw);
                break;
        }
+
        return 0;
 }
 
@@ -2160,6 +2676,7 @@ enum {
        MCS_MAPPED_FILE,
        MCS_PGPGIN,
        MCS_PGPGOUT,
+       MCS_SWAP,
        MCS_INACTIVE_ANON,
        MCS_ACTIVE_ANON,
        MCS_INACTIVE_FILE,
@@ -2181,6 +2698,7 @@ struct {
        {"mapped_file", "total_mapped_file"},
        {"pgpgin", "total_pgpgin"},
        {"pgpgout", "total_pgpgout"},
+       {"swap", "total_swap"},
        {"inactive_anon", "total_inactive_anon"},
        {"active_anon", "total_active_anon"},
        {"inactive_file", "total_inactive_file"},
@@ -2205,6 +2723,10 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data)
        s->stat[MCS_PGPGIN] += val;
        val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT);
        s->stat[MCS_PGPGOUT] += val;
+       if (do_swap_account) {
+               val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_SWAPOUT);
+               s->stat[MCS_SWAP] += val * PAGE_SIZE;
+       }
 
        /* per zone stat */
        val = mem_cgroup_get_local_zonestat(mem, LRU_INACTIVE_ANON);
@@ -2236,8 +2758,11 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
        memset(&mystat, 0, sizeof(mystat));
        mem_cgroup_get_local_stat(mem_cont, &mystat);
 
-       for (i = 0; i < NR_MCS_STAT; i++)
+       for (i = 0; i < NR_MCS_STAT; i++) {
+               if (i == MCS_SWAP && !do_swap_account)
+                       continue;
                cb->fill(cb, memcg_stat_strings[i].local_name, mystat.stat[i]);
+       }
 
        /* Hierarchical information */
        {
@@ -2250,9 +2775,11 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
 
        memset(&mystat, 0, sizeof(mystat));
        mem_cgroup_get_total_stat(mem_cont, &mystat);
-       for (i = 0; i < NR_MCS_STAT; i++)
+       for (i = 0; i < NR_MCS_STAT; i++) {
+               if (i == MCS_SWAP && !do_swap_account)
+                       continue;
                cb->fill(cb, memcg_stat_strings[i].total_name, mystat.stat[i]);
-
+       }
 
 #ifdef CONFIG_DEBUG_VM
        cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL));
@@ -2344,6 +2871,12 @@ static struct cftype mem_cgroup_files[] = {
                .write_string = mem_cgroup_write,
                .read_u64 = mem_cgroup_read,
        },
+       {
+               .name = "soft_limit_in_bytes",
+               .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),
+               .write_string = mem_cgroup_write,
+               .read_u64 = mem_cgroup_read,
+       },
        {
                .name = "failcnt",
                .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT),
@@ -2438,6 +2971,9 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
                mz = &pn->zoneinfo[zone];
                for_each_lru(l)
                        INIT_LIST_HEAD(&mz->lists[l]);
+               mz->usage_in_excess = 0;
+               mz->on_tree = false;
+               mz->mem = mem;
        }
        return 0;
 }
@@ -2483,6 +3019,7 @@ static void __mem_cgroup_free(struct mem_cgroup *mem)
 {
        int node;
 
+       mem_cgroup_remove_from_trees(mem);
        free_css_id(&mem_cgroup_subsys, &mem->css);
 
        for_each_node_state(node, N_POSSIBLE)
@@ -2531,6 +3068,31 @@ static void __init enable_swap_cgroup(void)
 }
 #endif
 
+static int mem_cgroup_soft_limit_tree_init(void)
+{
+       struct mem_cgroup_tree_per_node *rtpn;
+       struct mem_cgroup_tree_per_zone *rtpz;
+       int tmp, node, zone;
+
+       for_each_node_state(node, N_POSSIBLE) {
+               tmp = node;
+               if (!node_state(node, N_NORMAL_MEMORY))
+                       tmp = -1;
+               rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp);
+               if (!rtpn)
+                       return 1;
+
+               soft_limit_tree.rb_tree_per_node[node] = rtpn;
+
+               for (zone = 0; zone < MAX_NR_ZONES; zone++) {
+                       rtpz = &rtpn->rb_tree_per_zone[zone];
+                       rtpz->rb_root = RB_ROOT;
+                       spin_lock_init(&rtpz->lock);
+               }
+       }
+       return 0;
+}
+
 static struct cgroup_subsys_state * __ref
 mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 {
@@ -2545,10 +3107,15 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
        for_each_node_state(node, N_POSSIBLE)
                if (alloc_mem_cgroup_per_zone_info(mem, node))
                        goto free_out;
+
        /* root ? */
        if (cont->parent == NULL) {
                enable_swap_cgroup();
                parent = NULL;
+               root_mem_cgroup = mem;
+               if (mem_cgroup_soft_limit_tree_init())
+                       goto free_out;
+
        } else {
                parent = mem_cgroup_from_cont(cont->parent);
                mem->use_hierarchy = parent->use_hierarchy;
@@ -2577,6 +3144,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
        return &mem->css;
 free_out:
        __mem_cgroup_free(mem);
+       root_mem_cgroup = NULL;
        return ERR_PTR(error);
 }
 
@@ -2612,7 +3180,8 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss,
 static void mem_cgroup_move_task(struct cgroup_subsys *ss,
                                struct cgroup *cont,
                                struct cgroup *old_cont,
-                               struct task_struct *p)
+                               struct task_struct *p,
+                               bool threadgroup)
 {
        mutex_lock(&memcg_tasklist);
        /*
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
new file mode 100644 (file)
index 0000000..729d4b1
--- /dev/null
@@ -0,0 +1,832 @@
+/*
+ * Copyright (C) 2008, 2009 Intel Corporation
+ * Authors: Andi Kleen, Fengguang Wu
+ *
+ * This software may be redistributed and/or modified under the terms of
+ * the GNU General Public License ("GPL") version 2 only as published by the
+ * Free Software Foundation.
+ *
+ * High level machine check handler. Handles pages reported by the
+ * hardware as being corrupted usually due to a 2bit ECC memory or cache
+ * failure.
+ *
+ * Handles page cache pages in various states. The tricky part
+ * here is that we can access any page asynchronous to other VM
+ * users, because memory failures could happen anytime and anywhere,
+ * possibly violating some of their assumptions. This is why this code
+ * has to be extremely careful. Generally it tries to use normal locking
+ * rules, as in get the standard locks, even if that means the
+ * error handling takes potentially a long time.
+ *
+ * The operation to map back from RMAP chains to processes has to walk
+ * the complete process list and has non linear complexity with the number
+ * mappings. In short it can be quite slow. But since memory corruptions
+ * are rare we hope to get away with this.
+ */
+
+/*
+ * Notebook:
+ * - hugetlb needs more code
+ * - kcore/oldmem/vmcore/mem/kmem check for hwpoison pages
+ * - pass bad pages to kdump next kernel
+ */
+#define DEBUG 1                /* remove me in 2.6.34 */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/page-flags.h>
+#include <linux/sched.h>
+#include <linux/rmap.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <linux/backing-dev.h>
+#include "internal.h"
+
+int sysctl_memory_failure_early_kill __read_mostly = 0;
+
+int sysctl_memory_failure_recovery __read_mostly = 1;
+
+atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0);
+
+/*
+ * Send all the processes who have the page mapped an ``action optional''
+ * signal.
+ */
+static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
+                       unsigned long pfn)
+{
+       struct siginfo si;
+       int ret;
+
+       printk(KERN_ERR
+               "MCE %#lx: Killing %s:%d early due to hardware memory corruption\n",
+               pfn, t->comm, t->pid);
+       si.si_signo = SIGBUS;
+       si.si_errno = 0;
+       si.si_code = BUS_MCEERR_AO;
+       si.si_addr = (void *)addr;
+#ifdef __ARCH_SI_TRAPNO
+       si.si_trapno = trapno;
+#endif
+       si.si_addr_lsb = PAGE_SHIFT;
+       /*
+        * Don't use force here, it's convenient if the signal
+        * can be temporarily blocked.
+        * This could cause a loop when the user sets SIGBUS
+        * to SIG_IGN, but hopefully noone will do that?
+        */
+       ret = send_sig_info(SIGBUS, &si, t);  /* synchronous? */
+       if (ret < 0)
+               printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n",
+                      t->comm, t->pid, ret);
+       return ret;
+}
+
+/*
+ * Kill all processes that have a poisoned page mapped and then isolate
+ * the page.
+ *
+ * General strategy:
+ * Find all processes having the page mapped and kill them.
+ * But we keep a page reference around so that the page is not
+ * actually freed yet.
+ * Then stash the page away
+ *
+ * There's no convenient way to get back to mapped processes
+ * from the VMAs. So do a brute-force search over all
+ * running processes.
+ *
+ * Remember that machine checks are not common (or rather
+ * if they are common you have other problems), so this shouldn't
+ * be a performance issue.
+ *
+ * Also there are some races possible while we get from the
+ * error detection to actually handle it.
+ */
+
+struct to_kill {
+       struct list_head nd;
+       struct task_struct *tsk;
+       unsigned long addr;
+       unsigned addr_valid:1;
+};
+
+/*
+ * Failure handling: if we can't find or can't kill a process there's
+ * not much we can do. We just print a message and ignore otherwise.
+ */
+
+/*
+ * Schedule a process for later kill.
+ * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
+ * TBD would GFP_NOIO be enough?
+ */
+static void add_to_kill(struct task_struct *tsk, struct page *p,
+                      struct vm_area_struct *vma,
+                      struct list_head *to_kill,
+                      struct to_kill **tkc)
+{
+       struct to_kill *tk;
+
+       if (*tkc) {
+               tk = *tkc;
+               *tkc = NULL;
+       } else {
+               tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC);
+               if (!tk) {
+                       printk(KERN_ERR
+               "MCE: Out of memory while machine check handling\n");
+                       return;
+               }
+       }
+       tk->addr = page_address_in_vma(p, vma);
+       tk->addr_valid = 1;
+
+       /*
+        * In theory we don't have to kill when the page was
+        * munmaped. But it could be also a mremap. Since that's
+        * likely very rare kill anyways just out of paranoia, but use
+        * a SIGKILL because the error is not contained anymore.
+        */
+       if (tk->addr == -EFAULT) {
+               pr_debug("MCE: Unable to find user space address %lx in %s\n",
+                       page_to_pfn(p), tsk->comm);
+               tk->addr_valid = 0;
+       }
+       get_task_struct(tsk);
+       tk->tsk = tsk;
+       list_add_tail(&tk->nd, to_kill);
+}
+
+/*
+ * Kill the processes that have been collected earlier.
+ *
+ * Only do anything when DOIT is set, otherwise just free the list
+ * (this is used for clean pages which do not need killing)
+ * Also when FAIL is set do a force kill because something went
+ * wrong earlier.
+ */
+static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
+                         int fail, unsigned long pfn)
+{
+       struct to_kill *tk, *next;
+
+       list_for_each_entry_safe (tk, next, to_kill, nd) {
+               if (doit) {
+                       /*
+                        * In case something went wrong with munmaping
+                        * make sure the process doesn't catch the
+                        * signal and then access the memory. Just kill it.
+                        * the signal handlers
+                        */
+                       if (fail || tk->addr_valid == 0) {
+                               printk(KERN_ERR
+               "MCE %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
+                                       pfn, tk->tsk->comm, tk->tsk->pid);
+                               force_sig(SIGKILL, tk->tsk);
+                       }
+
+                       /*
+                        * In theory the process could have mapped
+                        * something else on the address in-between. We could
+                        * check for that, but we need to tell the
+                        * process anyways.
+                        */
+                       else if (kill_proc_ao(tk->tsk, tk->addr, trapno,
+                                             pfn) < 0)
+                               printk(KERN_ERR
+               "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n",
+                                       pfn, tk->tsk->comm, tk->tsk->pid);
+               }
+               put_task_struct(tk->tsk);
+               kfree(tk);
+       }
+}
+
+static int task_early_kill(struct task_struct *tsk)
+{
+       if (!tsk->mm)
+               return 0;
+       if (tsk->flags & PF_MCE_PROCESS)
+               return !!(tsk->flags & PF_MCE_EARLY);
+       return sysctl_memory_failure_early_kill;
+}
+
+/*
+ * Collect processes when the error hit an anonymous page.
+ */
+static void collect_procs_anon(struct page *page, struct list_head *to_kill,
+                             struct to_kill **tkc)
+{
+       struct vm_area_struct *vma;
+       struct task_struct *tsk;
+       struct anon_vma *av;
+
+       read_lock(&tasklist_lock);
+       av = page_lock_anon_vma(page);
+       if (av == NULL) /* Not actually mapped anymore */
+               goto out;
+       for_each_process (tsk) {
+               if (!task_early_kill(tsk))
+                       continue;
+               list_for_each_entry (vma, &av->head, anon_vma_node) {
+                       if (!page_mapped_in_vma(page, vma))
+                               continue;
+                       if (vma->vm_mm == tsk->mm)
+                               add_to_kill(tsk, page, vma, to_kill, tkc);
+               }
+       }
+       page_unlock_anon_vma(av);
+out:
+       read_unlock(&tasklist_lock);
+}
+
+/*
+ * Collect processes when the error hit a file mapped page.
+ */
+static void collect_procs_file(struct page *page, struct list_head *to_kill,
+                             struct to_kill **tkc)
+{
+       struct vm_area_struct *vma;
+       struct task_struct *tsk;
+       struct prio_tree_iter iter;
+       struct address_space *mapping = page->mapping;
+
+       /*
+        * A note on the locking order between the two locks.
+        * We don't rely on this particular order.
+        * If you have some other code that needs a different order
+        * feel free to switch them around. Or add a reverse link
+        * from mm_struct to task_struct, then this could be all
+        * done without taking tasklist_lock and looping over all tasks.
+        */
+
+       read_lock(&tasklist_lock);
+       spin_lock(&mapping->i_mmap_lock);
+       for_each_process(tsk) {
+               pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+
+               if (!task_early_kill(tsk))
+                       continue;
+
+               vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff,
+                                     pgoff) {
+                       /*
+                        * Send early kill signal to tasks where a vma covers
+                        * the page but the corrupted page is not necessarily
+                        * mapped it in its pte.
+                        * Assume applications who requested early kill want
+                        * to be informed of all such data corruptions.
+                        */
+                       if (vma->vm_mm == tsk->mm)
+                               add_to_kill(tsk, page, vma, to_kill, tkc);
+               }
+       }
+       spin_unlock(&mapping->i_mmap_lock);
+       read_unlock(&tasklist_lock);
+}
+
+/*
+ * Collect the processes who have the corrupted page mapped to kill.
+ * This is done in two steps for locking reasons.
+ * First preallocate one tokill structure outside the spin locks,
+ * so that we can kill at least one process reasonably reliable.
+ */
+static void collect_procs(struct page *page, struct list_head *tokill)
+{
+       struct to_kill *tk;
+
+       if (!page->mapping)
+               return;
+
+       tk = kmalloc(sizeof(struct to_kill), GFP_NOIO);
+       if (!tk)
+               return;
+       if (PageAnon(page))
+               collect_procs_anon(page, tokill, &tk);
+       else
+               collect_procs_file(page, tokill, &tk);
+       kfree(tk);
+}
+
+/*
+ * Error handlers for various types of pages.
+ */
+
+enum outcome {
+       FAILED,         /* Error handling failed */
+       DELAYED,        /* Will be handled later */
+       IGNORED,        /* Error safely ignored */
+       RECOVERED,      /* Successfully recovered */
+};
+
+static const char *action_name[] = {
+       [FAILED] = "Failed",
+       [DELAYED] = "Delayed",
+       [IGNORED] = "Ignored",
+       [RECOVERED] = "Recovered",
+};
+
+/*
+ * Error hit kernel page.
+ * Do nothing, try to be lucky and not touch this instead. For a few cases we
+ * could be more sophisticated.
+ */
+static int me_kernel(struct page *p, unsigned long pfn)
+{
+       return DELAYED;
+}
+
+/*
+ * Already poisoned page.
+ */
+static int me_ignore(struct page *p, unsigned long pfn)
+{
+       return IGNORED;
+}
+
+/*
+ * Page in unknown state. Do nothing.
+ */
+static int me_unknown(struct page *p, unsigned long pfn)
+{
+       printk(KERN_ERR "MCE %#lx: Unknown page state\n", pfn);
+       return FAILED;
+}
+
+/*
+ * Free memory
+ */
+static int me_free(struct page *p, unsigned long pfn)
+{
+       return DELAYED;
+}
+
+/*
+ * Clean (or cleaned) page cache page.
+ */
+static int me_pagecache_clean(struct page *p, unsigned long pfn)
+{
+       int err;
+       int ret = FAILED;
+       struct address_space *mapping;
+
+       if (!isolate_lru_page(p))
+               page_cache_release(p);
+
+       /*
+        * For anonymous pages we're done the only reference left
+        * should be the one m_f() holds.
+        */
+       if (PageAnon(p))
+               return RECOVERED;
+
+       /*
+        * Now truncate the page in the page cache. This is really
+        * more like a "temporary hole punch"
+        * Don't do this for block devices when someone else
+        * has a reference, because it could be file system metadata
+        * and that's not safe to truncate.
+        */
+       mapping = page_mapping(p);
+       if (!mapping) {
+               /*
+                * Page has been teared down in the meanwhile
+                */
+               return FAILED;
+       }
+
+       /*
+        * Truncation is a bit tricky. Enable it per file system for now.
+        *
+        * Open: to take i_mutex or not for this? Right now we don't.
+        */
+       if (mapping->a_ops->error_remove_page) {
+               err = mapping->a_ops->error_remove_page(mapping, p);
+               if (err != 0) {
+                       printk(KERN_INFO "MCE %#lx: Failed to punch page: %d\n",
+                                       pfn, err);
+               } else if (page_has_private(p) &&
+                               !try_to_release_page(p, GFP_NOIO)) {
+                       pr_debug("MCE %#lx: failed to release buffers\n", pfn);
+               } else {
+                       ret = RECOVERED;
+               }
+       } else {
+               /*
+                * If the file system doesn't support it just invalidate
+                * This fails on dirty or anything with private pages
+                */
+               if (invalidate_inode_page(p))
+                       ret = RECOVERED;
+               else
+                       printk(KERN_INFO "MCE %#lx: Failed to invalidate\n",
+                               pfn);
+       }
+       return ret;
+}
+
+/*
+ * Dirty cache page page
+ * Issues: when the error hit a hole page the error is not properly
+ * propagated.
+ */
+static int me_pagecache_dirty(struct page *p, unsigned long pfn)
+{
+       struct address_space *mapping = page_mapping(p);
+
+       SetPageError(p);
+       /* TBD: print more information about the file. */
+       if (mapping) {
+               /*
+                * IO error will be reported by write(), fsync(), etc.
+                * who check the mapping.
+                * This way the application knows that something went
+                * wrong with its dirty file data.
+                *
+                * There's one open issue:
+                *
+                * The EIO will be only reported on the next IO
+                * operation and then cleared through the IO map.
+                * Normally Linux has two mechanisms to pass IO error
+                * first through the AS_EIO flag in the address space
+                * and then through the PageError flag in the page.
+                * Since we drop pages on memory failure handling the
+                * only mechanism open to use is through AS_AIO.
+                *
+                * This has the disadvantage that it gets cleared on
+                * the first operation that returns an error, while
+                * the PageError bit is more sticky and only cleared
+                * when the page is reread or dropped.  If an
+                * application assumes it will always get error on
+                * fsync, but does other operations on the fd before
+                * and the page is dropped inbetween then the error
+                * will not be properly reported.
+                *
+                * This can already happen even without hwpoisoned
+                * pages: first on metadata IO errors (which only
+                * report through AS_EIO) or when the page is dropped
+                * at the wrong time.
+                *
+                * So right now we assume that the application DTRT on
+                * the first EIO, but we're not worse than other parts
+                * of the kernel.
+                */
+               mapping_set_error(mapping, EIO);
+       }
+
+       return me_pagecache_clean(p, pfn);
+}
+
+/*
+ * Clean and dirty swap cache.
+ *
+ * Dirty swap cache page is tricky to handle. The page could live both in page
+ * cache and swap cache(ie. page is freshly swapped in). So it could be
+ * referenced concurrently by 2 types of PTEs:
+ * normal PTEs and swap PTEs. We try to handle them consistently by calling
+ * try_to_unmap(TTU_IGNORE_HWPOISON) to convert the normal PTEs to swap PTEs,
+ * and then
+ *      - clear dirty bit to prevent IO
+ *      - remove from LRU
+ *      - but keep in the swap cache, so that when we return to it on
+ *        a later page fault, we know the application is accessing
+ *        corrupted data and shall be killed (we installed simple
+ *        interception code in do_swap_page to catch it).
+ *
+ * Clean swap cache pages can be directly isolated. A later page fault will
+ * bring in the known good data from disk.
+ */
+static int me_swapcache_dirty(struct page *p, unsigned long pfn)
+{
+       int ret = FAILED;
+
+       ClearPageDirty(p);
+       /* Trigger EIO in shmem: */
+       ClearPageUptodate(p);
+
+       if (!isolate_lru_page(p)) {
+               page_cache_release(p);
+               ret = DELAYED;
+       }
+
+       return ret;
+}
+
+static int me_swapcache_clean(struct page *p, unsigned long pfn)
+{
+       int ret = FAILED;
+
+       if (!isolate_lru_page(p)) {
+               page_cache_release(p);
+               ret = RECOVERED;
+       }
+       delete_from_swap_cache(p);
+       return ret;
+}
+
+/*
+ * Huge pages. Needs work.
+ * Issues:
+ * No rmap support so we cannot find the original mapper. In theory could walk
+ * all MMs and look for the mappings, but that would be non atomic and racy.
+ * Need rmap for hugepages for this. Alternatively we could employ a heuristic,
+ * like just walking the current process and hoping it has it mapped (that
+ * should be usually true for the common "shared database cache" case)
+ * Should handle free huge pages and dequeue them too, but this needs to
+ * handle huge page accounting correctly.
+ */
+static int me_huge_page(struct page *p, unsigned long pfn)
+{
+       return FAILED;
+}
+
+/*
+ * Various page states we can handle.
+ *
+ * A page state is defined by its current page->flags bits.
+ * The table matches them in order and calls the right handler.
+ *
+ * This is quite tricky because we can access page at any time
+ * in its live cycle, so all accesses have to be extremly careful.
+ *
+ * This is not complete. More states could be added.
+ * For any missing state don't attempt recovery.
+ */
+
+#define dirty          (1UL << PG_dirty)
+#define sc             (1UL << PG_swapcache)
+#define unevict                (1UL << PG_unevictable)
+#define mlock          (1UL << PG_mlocked)
+#define writeback      (1UL << PG_writeback)
+#define lru            (1UL << PG_lru)
+#define swapbacked     (1UL << PG_swapbacked)
+#define head           (1UL << PG_head)
+#define tail           (1UL << PG_tail)
+#define compound       (1UL << PG_compound)
+#define slab           (1UL << PG_slab)
+#define buddy          (1UL << PG_buddy)
+#define reserved       (1UL << PG_reserved)
+
+static struct page_state {
+       unsigned long mask;
+       unsigned long res;
+       char *msg;
+       int (*action)(struct page *p, unsigned long pfn);
+} error_states[] = {
+       { reserved,     reserved,       "reserved kernel",      me_ignore },
+       { buddy,        buddy,          "free kernel",  me_free },
+
+       /*
+        * Could in theory check if slab page is free or if we can drop
+        * currently unused objects without touching them. But just
+        * treat it as standard kernel for now.
+        */
+       { slab,         slab,           "kernel slab",  me_kernel },
+
+#ifdef CONFIG_PAGEFLAGS_EXTENDED
+       { head,         head,           "huge",         me_huge_page },
+       { tail,         tail,           "huge",         me_huge_page },
+#else
+       { compound,     compound,       "huge",         me_huge_page },
+#endif
+
+       { sc|dirty,     sc|dirty,       "swapcache",    me_swapcache_dirty },
+       { sc|dirty,     sc,             "swapcache",    me_swapcache_clean },
+
+       { unevict|dirty, unevict|dirty, "unevictable LRU", me_pagecache_dirty},
+       { unevict,      unevict,        "unevictable LRU", me_pagecache_clean},
+
+#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
+       { mlock|dirty,  mlock|dirty,    "mlocked LRU",  me_pagecache_dirty },
+       { mlock,        mlock,          "mlocked LRU",  me_pagecache_clean },
+#endif
+
+       { lru|dirty,    lru|dirty,      "LRU",          me_pagecache_dirty },
+       { lru|dirty,    lru,            "clean LRU",    me_pagecache_clean },
+       { swapbacked,   swapbacked,     "anonymous",    me_pagecache_clean },
+
+       /*
+        * Catchall entry: must be at end.
+        */
+       { 0,            0,              "unknown page state",   me_unknown },
+};
+
+#undef lru
+
+static void action_result(unsigned long pfn, char *msg, int result)
+{
+       struct page *page = NULL;
+       if (pfn_valid(pfn))
+               page = pfn_to_page(pfn);
+
+       printk(KERN_ERR "MCE %#lx: %s%s page recovery: %s\n",
+               pfn,
+               page && PageDirty(page) ? "dirty " : "",
+               msg, action_name[result]);
+}
+
+static int page_action(struct page_state *ps, struct page *p,
+                       unsigned long pfn, int ref)
+{
+       int result;
+
+       result = ps->action(p, pfn);
+       action_result(pfn, ps->msg, result);
+       if (page_count(p) != 1 + ref)
+               printk(KERN_ERR
+                      "MCE %#lx: %s page still referenced by %d users\n",
+                      pfn, ps->msg, page_count(p) - 1);
+
+       /* Could do more checks here if page looks ok */
+       /*
+        * Could adjust zone counters here to correct for the missing page.
+        */
+
+       return result == RECOVERED ? 0 : -EBUSY;
+}
+
+#define N_UNMAP_TRIES 5
+
+/*
+ * Do all that is necessary to remove user space mappings. Unmap
+ * the pages and send SIGBUS to the processes if the data was dirty.
+ */
+static void hwpoison_user_mappings(struct page *p, unsigned long pfn,
+                                 int trapno)
+{
+       enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
+       struct address_space *mapping;
+       LIST_HEAD(tokill);
+       int ret;
+       int i;
+       int kill = 1;
+
+       if (PageReserved(p) || PageCompound(p) || PageSlab(p))
+               return;
+
+       if (!PageLRU(p))
+               lru_add_drain_all();
+
+       /*
+        * This check implies we don't kill processes if their pages
+        * are in the swap cache early. Those are always late kills.
+        */
+       if (!page_mapped(p))
+               return;
+
+       if (PageSwapCache(p)) {
+               printk(KERN_ERR
+                      "MCE %#lx: keeping poisoned page in swap cache\n", pfn);
+               ttu |= TTU_IGNORE_HWPOISON;
+       }
+
+       /*
+        * Propagate the dirty bit from PTEs to struct page first, because we
+        * need this to decide if we should kill or just drop the page.
+        */
+       mapping = page_mapping(p);
+       if (!PageDirty(p) && mapping && mapping_cap_writeback_dirty(mapping)) {
+               if (page_mkclean(p)) {
+                       SetPageDirty(p);
+               } else {
+                       kill = 0;
+                       ttu |= TTU_IGNORE_HWPOISON;
+                       printk(KERN_INFO
+       "MCE %#lx: corrupted page was clean: dropped without side effects\n",
+                               pfn);
+               }
+       }
+
+       /*
+        * First collect all the processes that have the page
+        * mapped in dirty form.  This has to be done before try_to_unmap,
+        * because ttu takes the rmap data structures down.
+        *
+        * Error handling: We ignore errors here because
+        * there's nothing that can be done.
+        */
+       if (kill)
+               collect_procs(p, &tokill);
+
+       /*
+        * try_to_unmap can fail temporarily due to races.
+        * Try a few times (RED-PEN better strategy?)
+        */
+       for (i = 0; i < N_UNMAP_TRIES; i++) {
+               ret = try_to_unmap(p, ttu);
+               if (ret == SWAP_SUCCESS)
+                       break;
+               pr_debug("MCE %#lx: try_to_unmap retry needed %d\n", pfn,  ret);
+       }
+
+       if (ret != SWAP_SUCCESS)
+               printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n",
+                               pfn, page_mapcount(p));
+
+       /*
+        * Now that the dirty bit has been propagated to the
+        * struct page and all unmaps done we can decide if
+        * killing is needed or not.  Only kill when the page
+        * was dirty, otherwise the tokill list is merely
+        * freed.  When there was a problem unmapping earlier
+        * use a more force-full uncatchable kill to prevent
+        * any accesses to the poisoned memory.
+        */
+       kill_procs_ao(&tokill, !!PageDirty(p), trapno,
+                     ret != SWAP_SUCCESS, pfn);
+}
+
+int __memory_failure(unsigned long pfn, int trapno, int ref)
+{
+       struct page_state *ps;
+       struct page *p;
+       int res;
+
+       if (!sysctl_memory_failure_recovery)
+               panic("Memory failure from trap %d on page %lx", trapno, pfn);
+
+       if (!pfn_valid(pfn)) {
+               action_result(pfn, "memory outside kernel control", IGNORED);
+               return -EIO;
+       }
+
+       p = pfn_to_page(pfn);
+       if (TestSetPageHWPoison(p)) {
+               action_result(pfn, "already hardware poisoned", IGNORED);
+               return 0;
+       }
+
+       atomic_long_add(1, &mce_bad_pages);
+
+       /*
+        * We need/can do nothing about count=0 pages.
+        * 1) it's a free page, and therefore in safe hand:
+        *    prep_new_page() will be the gate keeper.
+        * 2) it's part of a non-compound high order page.
+        *    Implies some kernel user: cannot stop them from
+        *    R/W the page; let's pray that the page has been
+        *    used and will be freed some time later.
+        * In fact it's dangerous to directly bump up page count from 0,
+        * that may make page_freeze_refs()/page_unfreeze_refs() mismatch.
+        */
+       if (!get_page_unless_zero(compound_head(p))) {
+               action_result(pfn, "free or high order kernel", IGNORED);
+               return PageBuddy(compound_head(p)) ? 0 : -EBUSY;
+       }
+
+       /*
+        * Lock the page and wait for writeback to finish.
+        * It's very difficult to mess with pages currently under IO
+        * and in many cases impossible, so we just avoid it here.
+        */
+       lock_page_nosync(p);
+       wait_on_page_writeback(p);
+
+       /*
+        * Now take care of user space mappings.
+        */
+       hwpoison_user_mappings(p, pfn, trapno);
+
+       /*
+        * Torn down by someone else?
+        */
+       if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
+               action_result(pfn, "already truncated LRU", IGNORED);
+               res = 0;
+               goto out;
+       }
+
+       res = -EBUSY;
+       for (ps = error_states;; ps++) {
+               if ((p->flags & ps->mask) == ps->res) {
+                       res = page_action(ps, p, pfn, ref);
+                       break;
+               }
+       }
+out:
+       unlock_page(p);
+       return res;
+}
+EXPORT_SYMBOL_GPL(__memory_failure);
+
+/**
+ * memory_failure - Handle memory failure of a page.
+ * @pfn: Page Number of the corrupted page
+ * @trapno: Trap number reported in the signal to user space.
+ *
+ * This function is called by the low level machine check code
+ * of an architecture when it detects hardware memory corruption
+ * of a page. It tries its best to recover, which includes
+ * dropping pages, killing processes etc.
+ *
+ * The function is primarily of use for corruptions that
+ * happen outside the current execution context (e.g. when
+ * detected by a background scrubber)
+ *
+ * Must run in process context (e.g. a work queue) with interrupts
+ * enabled and no spinlocks hold.
+ */
+void memory_failure(unsigned long pfn, int trapno)
+{
+       __memory_failure(pfn, trapno, 0);
+}
index b1443ac..7e91b5f 100644 (file)
@@ -297,7 +297,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
                unsigned long addr = vma->vm_start;
 
                /*
-                * Hide vma from rmap and vmtruncate before freeing pgtables
+                * Hide vma from rmap and truncate_pagecache before freeing
+                * pgtables
                 */
                anon_vma_unlink(vma);
                unlink_file_vma(vma);
@@ -1325,7 +1326,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                if (ret & VM_FAULT_ERROR) {
                                        if (ret & VM_FAULT_OOM)
                                                return i ? i : -ENOMEM;
-                                       else if (ret & VM_FAULT_SIGBUS)
+                                       if (ret &
+                                           (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS))
                                                return i ? i : -EFAULT;
                                        BUG();
                                }
@@ -2407,7 +2409,7 @@ restart:
  * @mapping: the address space containing mmaps to be unmapped.
  * @holebegin: byte in first page to unmap, relative to the start of
  * the underlying file.  This will be rounded down to a PAGE_SIZE
- * boundary.  Note that this is different from vmtruncate(), which
+ * boundary.  Note that this is different from truncate_pagecache(), which
  * must keep the partial page.  In contrast, we must get rid of
  * partial pages.
  * @holelen: size of prospective hole in bytes.  This will be rounded
@@ -2458,63 +2460,6 @@ void unmap_mapping_range(struct address_space *mapping,
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
-/**
- * vmtruncate - unmap mappings "freed" by truncate() syscall
- * @inode: inode of the file used
- * @offset: file offset to start truncating
- *
- * NOTE! We have to be ready to update the memory sharing
- * between the file and the memory map for a potential last
- * incomplete page.  Ugly, but necessary.
- */
-int vmtruncate(struct inode * inode, loff_t offset)
-{
-       if (inode->i_size < offset) {
-               unsigned long limit;
-
-               limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-               if (limit != RLIM_INFINITY && offset > limit)
-                       goto out_sig;
-               if (offset > inode->i_sb->s_maxbytes)
-                       goto out_big;
-               i_size_write(inode, offset);
-       } else {
-               struct address_space *mapping = inode->i_mapping;
-
-               /*
-                * truncation of in-use swapfiles is disallowed - it would
-                * cause subsequent swapout to scribble on the now-freed
-                * blocks.
-                */
-               if (IS_SWAPFILE(inode))
-                       return -ETXTBSY;
-               i_size_write(inode, offset);
-
-               /*
-                * unmap_mapping_range is called twice, first simply for
-                * efficiency so that truncate_inode_pages does fewer
-                * single-page unmaps.  However after this first call, and
-                * before truncate_inode_pages finishes, it is possible for
-                * private pages to be COWed, which remain after
-                * truncate_inode_pages finishes, hence the second
-                * unmap_mapping_range call must be made for correctness.
-                */
-               unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-               truncate_inode_pages(mapping, offset);
-               unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-       }
-
-       if (inode->i_op->truncate)
-               inode->i_op->truncate(inode);
-       return 0;
-
-out_sig:
-       send_sig(SIGXFSZ, current, 0);
-out_big:
-       return -EFBIG;
-}
-EXPORT_SYMBOL(vmtruncate);
-
 int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
 {
        struct address_space *mapping = inode->i_mapping;
@@ -2559,8 +2504,15 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                goto out;
 
        entry = pte_to_swp_entry(orig_pte);
-       if (is_migration_entry(entry)) {
-               migration_entry_wait(mm, pmd, address);
+       if (unlikely(non_swap_entry(entry))) {
+               if (is_migration_entry(entry)) {
+                       migration_entry_wait(mm, pmd, address);
+               } else if (is_hwpoison_entry(entry)) {
+                       ret = VM_FAULT_HWPOISON;
+               } else {
+                       print_bad_pte(vma, address, orig_pte, NULL);
+                       ret = VM_FAULT_OOM;
+               }
                goto out;
        }
        delayacct_set_flag(DELAYACCT_PF_SWAPIN);
@@ -2584,6 +2536,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                /* Had to read the page from swap area: Major fault */
                ret = VM_FAULT_MAJOR;
                count_vm_event(PGMAJFAULT);
+       } else if (PageHWPoison(page)) {
+               ret = VM_FAULT_HWPOISON;
+               delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+               goto out;
        }
 
        lock_page(page);
@@ -2760,6 +2716,12 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
        if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
                return ret;
 
+       if (unlikely(PageHWPoison(vmf.page))) {
+               if (ret & VM_FAULT_LOCKED)
+                       unlock_page(vmf.page);
+               return VM_FAULT_HWPOISON;
+       }
+
        /*
         * For consistency in subsequent calls, make the faulted page always
         * locked.
index 16052e8..1a4bf48 100644 (file)
@@ -675,7 +675,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
        }
 
        /* Establish migration ptes or remove ptes */
-       try_to_unmap(page, 1);
+       try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
 
 skip_unmap:
        if (!page_mapped(page))
index 20a07db..97bff25 100644 (file)
@@ -86,8 +86,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
        if (vma->vm_file) {
                /*
                 * Subtle point from Rajesh Venkatasubramanian: before
-                * moving file-based ptes, we must lock vmtruncate out,
-                * since it might clean the dst vma before the src vma,
+                * moving file-based ptes, we must lock truncate_pagecache
+                * out, since it might clean the dst vma before the src vma,
                 * and we propagate stale pages into the dst afterward.
                 */
                mapping = vma->vm_file->f_mapping;
index 8d48424..c73aa47 100644 (file)
@@ -82,46 +82,6 @@ DECLARE_RWSEM(nommu_region_sem);
 struct vm_operations_struct generic_file_vm_ops = {
 };
 
-/*
- * Handle all mappings that got truncated by a "truncate()"
- * system call.
- *
- * NOTE! We have to be ready to update the memory sharing
- * between the file and the memory map for a potential last
- * incomplete page.  Ugly, but necessary.
- */
-int vmtruncate(struct inode *inode, loff_t offset)
-{
-       struct address_space *mapping = inode->i_mapping;
-       unsigned long limit;
-
-       if (inode->i_size < offset)
-               goto do_expand;
-       i_size_write(inode, offset);
-
-       truncate_inode_pages(mapping, offset);
-       goto out_truncate;
-
-do_expand:
-       limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-       if (limit != RLIM_INFINITY && offset > limit)
-               goto out_sig;
-       if (offset > inode->i_sb->s_maxbytes)
-               goto out;
-       i_size_write(inode, offset);
-
-out_truncate:
-       if (inode->i_op->truncate)
-               inode->i_op->truncate(inode);
-       return 0;
-out_sig:
-       send_sig(SIGXFSZ, current, 0);
-out:
-       return -EFBIG;
-}
-
-EXPORT_SYMBOL(vmtruncate);
-
 /*
  * Return the total memory allocated for this pointer, not
  * just what the caller asked for.
@@ -866,7 +826,7 @@ static int validate_mmap_request(struct file *file,
        int ret;
 
        /* do the simple checks first */
-       if (flags & MAP_FIXED || addr) {
+       if (flags & MAP_FIXED) {
                printk(KERN_DEBUG
                       "%d: Can't do fixed-address/overlay mmap of RAM\n",
                       current->pid);
@@ -1074,7 +1034,7 @@ static int do_mmap_shared_file(struct vm_area_struct *vma)
        ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
        if (ret == 0) {
                vma->vm_region->vm_top = vma->vm_region->vm_end;
-               return ret;
+               return 0;
        }
        if (ret != -ENOSYS)
                return ret;
@@ -1091,7 +1051,8 @@ static int do_mmap_shared_file(struct vm_area_struct *vma)
  */
 static int do_mmap_private(struct vm_area_struct *vma,
                           struct vm_region *region,
-                          unsigned long len)
+                          unsigned long len,
+                          unsigned long capabilities)
 {
        struct page *pages;
        unsigned long total, point, n, rlen;
@@ -1102,13 +1063,13 @@ static int do_mmap_private(struct vm_area_struct *vma,
         * shared mappings on devices or memory
         * - VM_MAYSHARE will be set if it may attempt to share
         */
-       if (vma->vm_file) {
+       if (capabilities & BDI_CAP_MAP_DIRECT) {
                ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
                if (ret == 0) {
                        /* shouldn't return success if we're not sharing */
                        BUG_ON(!(vma->vm_flags & VM_MAYSHARE));
                        vma->vm_region->vm_top = vma->vm_region->vm_end;
-                       return ret;
+                       return 0;
                }
                if (ret != -ENOSYS)
                        return ret;
@@ -1221,9 +1182,6 @@ unsigned long do_mmap_pgoff(struct file *file,
 
        kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff);
 
-       if (!(flags & MAP_FIXED))
-               addr = round_hint_to_min(addr);
-
        /* decide whether we should attempt the mapping, and if so what sort of
         * mapping */
        ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
@@ -1233,6 +1191,9 @@ unsigned long do_mmap_pgoff(struct file *file,
                return ret;
        }
 
+       /* we ignore the address hint */
+       addr = 0;
+
        /* we've determined that we can make the mapping, now translate what we
         * now know into VMA flags */
        vm_flags = determine_vm_flags(file, prot, flags, capabilities);
@@ -1346,7 +1307,7 @@ unsigned long do_mmap_pgoff(struct file *file,
                 * - this is the hook for quasi-memory character devices to
                 *   tell us the location of a shared mapping
                 */
-               if (file && file->f_op->get_unmapped_area) {
+               if (capabilities & BDI_CAP_MAP_DIRECT) {
                        addr = file->f_op->get_unmapped_area(file, addr, len,
                                                             pgoff, flags);
                        if (IS_ERR((void *) addr)) {
@@ -1370,15 +1331,17 @@ unsigned long do_mmap_pgoff(struct file *file,
        }
 
        vma->vm_region = region;
-       add_nommu_region(region);
 
-       /* set up the mapping */
+       /* set up the mapping
+        * - the region is filled in if BDI_CAP_MAP_DIRECT is still set
+        */
        if (file && vma->vm_flags & VM_SHARED)
                ret = do_mmap_shared_file(vma);
        else
-               ret = do_mmap_private(vma, region, len);
+               ret = do_mmap_private(vma, region, len, capabilities);
        if (ret < 0)
-               goto error_put_region;
+               goto error_just_free;
+       add_nommu_region(region);
 
        /* okay... we have a mapping; now we have to register it */
        result = vma->vm_start;
@@ -1396,19 +1359,6 @@ share:
        kleave(" = %lx", result);
        return result;
 
-error_put_region:
-       __put_nommu_region(region);
-       if (vma) {
-               if (vma->vm_file) {
-                       fput(vma->vm_file);
-                       if (vma->vm_flags & VM_EXECUTABLE)
-                               removed_exe_file_vma(vma->vm_mm);
-               }
-               kmem_cache_free(vm_area_cachep, vma);
-       }
-       kleave(" = %d [pr]", ret);
-       return ret;
-
 error_just_free:
        up_write(&nommu_region_sem);
 error:
index 5f378dd..d99664e 100644 (file)
@@ -155,37 +155,37 @@ static void update_completion_period(void)
 }
 
 int dirty_background_ratio_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                loff_t *ppos)
 {
        int ret;
 
-       ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
        if (ret == 0 && write)
                dirty_background_bytes = 0;
        return ret;
 }
 
 int dirty_background_bytes_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                loff_t *ppos)
 {
        int ret;
 
-       ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+       ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
        if (ret == 0 && write)
                dirty_background_ratio = 0;
        return ret;
 }
 
 int dirty_ratio_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                loff_t *ppos)
 {
        int old_ratio = vm_dirty_ratio;
        int ret;
 
-       ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
        if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
                update_completion_period();
                vm_dirty_bytes = 0;
@@ -195,13 +195,13 @@ int dirty_ratio_handler(struct ctl_table *table, int write,
 
 
 int dirty_bytes_handler(struct ctl_table *table, int write,
-               struct file *filp, void __user *buffer, size_t *lenp,
+               void __user *buffer, size_t *lenp,
                loff_t *ppos)
 {
        unsigned long old_bytes = vm_dirty_bytes;
        int ret;
 
-       ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+       ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
        if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
                update_completion_period();
                vm_dirty_ratio = 0;
@@ -686,9 +686,9 @@ static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0);
  * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
  */
 int dirty_writeback_centisecs_handler(ctl_table *table, int write,
-       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+       void __user *buffer, size_t *length, loff_t *ppos)
 {
-       proc_dointvec(table, write, file, buffer, length, ppos);
+       proc_dointvec(table, write, buffer, length, ppos);
        return 0;
 }
 
@@ -1149,6 +1149,13 @@ int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
 EXPORT_SYMBOL(redirty_page_for_writepage);
 
 /*
+ * Dirty a page.
+ *
+ * For pages with a mapping this should be done under the page lock
+ * for the benefit of asynchronous memory errors who prefer a consistent
+ * dirty state. This rule can be broken in some special cases,
+ * but should be better not to.
+ *
  * If the mapping doesn't provide a set_page_dirty a_op, then
  * just fall through and assume that it wants buffer_heads.
  */
index 5717f27..bf72055 100644 (file)
@@ -234,6 +234,12 @@ static void bad_page(struct page *page)
        static unsigned long nr_shown;
        static unsigned long nr_unshown;
 
+       /* Don't complain about poisoned pages */
+       if (PageHWPoison(page)) {
+               __ClearPageBuddy(page);
+               return;
+       }
+
        /*
         * Allow a burst of 60 reports, then keep quiet for that minute;
         * or allow a steady drip of one report per second.
@@ -666,7 +672,7 @@ static inline void expand(struct zone *zone, struct page *page,
 /*
  * This page is about to be returned from the page allocator
  */
-static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
+static inline int check_new_page(struct page *page)
 {
        if (unlikely(page_mapcount(page) |
                (page->mapping != NULL)  |
@@ -675,6 +681,18 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
                bad_page(page);
                return 1;
        }
+       return 0;
+}
+
+static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
+{
+       int i;
+
+       for (i = 0; i < (1 << order); i++) {
+               struct page *p = page + i;
+               if (unlikely(check_new_page(p)))
+                       return 1;
+       }
 
        set_page_private(page, 0);
        set_page_refcounted(page);
@@ -2373,7 +2391,7 @@ early_param("numa_zonelist_order", setup_numa_zonelist_order);
  * sysctl handler for numa_zonelist_order
  */
 int numa_zonelist_order_handler(ctl_table *table, int write,
-               struct file *file, void __user *buffer, size_t *length,
+               void __user *buffer, size_t *length,
                loff_t *ppos)
 {
        char saved_string[NUMA_ZONELIST_ORDER_LEN];
@@ -2382,7 +2400,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
        if (write)
                strncpy(saved_string, (char*)table->data,
                        NUMA_ZONELIST_ORDER_LEN);
-       ret = proc_dostring(table, write, file, buffer, length, ppos);
+       ret = proc_dostring(table, write, buffer, length, ppos);
        if (ret)
                return ret;
        if (write) {
@@ -4706,9 +4724,9 @@ module_init(init_per_zone_wmark_min)
  *     changes.
  */
 int min_free_kbytes_sysctl_handler(ctl_table *table, int write, 
-       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+       void __user *buffer, size_t *length, loff_t *ppos)
 {
-       proc_dointvec(table, write, file, buffer, length, ppos);
+       proc_dointvec(table, write, buffer, length, ppos);
        if (write)
                setup_per_zone_wmarks();
        return 0;
@@ -4716,12 +4734,12 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
 
 #ifdef CONFIG_NUMA
 int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
-       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+       void __user *buffer, size_t *length, loff_t *ppos)
 {
        struct zone *zone;
        int rc;
 
-       rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+       rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
        if (rc)
                return rc;
 
@@ -4732,12 +4750,12 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
 }
 
 int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
-       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+       void __user *buffer, size_t *length, loff_t *ppos)
 {
        struct zone *zone;
        int rc;
 
-       rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+       rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
        if (rc)
                return rc;
 
@@ -4758,9 +4776,9 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
  * if in function of the boot time zone sizes.
  */
 int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
-       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+       void __user *buffer, size_t *length, loff_t *ppos)
 {
-       proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+       proc_dointvec_minmax(table, write, buffer, length, ppos);
        setup_per_zone_lowmem_reserve();
        return 0;
 }
@@ -4772,13 +4790,13 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
  */
 
 int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
-       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+       void __user *buffer, size_t *length, loff_t *ppos)
 {
        struct zone *zone;
        unsigned int cpu;
        int ret;
 
-       ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+       ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
        if (!write || (ret == -EINVAL))
                return ret;
        for_each_populated_zone(zone) {
index 720fc03..28aafe2 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
  *                 mapping->tree_lock (widely used, in set_page_dirty,
  *                           in arch-dependent flush_dcache_mmap_lock,
  *                           within inode_lock in __sync_single_inode)
+ *
+ * (code doesn't rely on that order so it could be switched around)
+ * ->tasklist_lock
+ *   anon_vma->lock      (memory_failure, collect_procs_anon)
+ *     pte map lock
  */
 
 #include <linux/mm.h>
@@ -191,7 +196,7 @@ void __init anon_vma_init(void)
  * Getting a lock on a stable anon_vma from a page off the LRU is
  * tricky: page_lock_anon_vma rely on RCU to guard against the races.
  */
-static struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *page_lock_anon_vma(struct page *page)
 {
        struct anon_vma *anon_vma;
        unsigned long anon_mapping;
@@ -211,7 +216,7 @@ out:
        return NULL;
 }
 
-static void page_unlock_anon_vma(struct anon_vma *anon_vma)
+void page_unlock_anon_vma(struct anon_vma *anon_vma)
 {
        spin_unlock(&anon_vma->lock);
        rcu_read_unlock();
@@ -311,7 +316,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
  * if the page is not mapped into the page tables of this VMA.  Only
  * valid for normal file or anonymous VMAs.
  */
-static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
+int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
 {
        unsigned long address;
        pte_t *pte;
@@ -756,7 +761,7 @@ void page_remove_rmap(struct page *page)
  * repeatedly from either try_to_unmap_anon or try_to_unmap_file.
  */
 static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
-                               int migration)
+                               enum ttu_flags flags)
 {
        struct mm_struct *mm = vma->vm_mm;
        unsigned long address;
@@ -778,11 +783,13 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
         * If it's recently referenced (perhaps page_referenced
         * skipped over this mm) then we should reactivate it.
         */
-       if (!migration) {
+       if (!(flags & TTU_IGNORE_MLOCK)) {
                if (vma->vm_flags & VM_LOCKED) {
                        ret = SWAP_MLOCK;
                        goto out_unmap;
                }
+       }
+       if (!(flags & TTU_IGNORE_ACCESS)) {
                if (ptep_clear_flush_young_notify(vma, address, pte)) {
                        ret = SWAP_FAIL;
                        goto out_unmap;
@@ -800,7 +807,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
        /* Update high watermark before we lower rss */
        update_hiwater_rss(mm);
 
-       if (PageAnon(page)) {
+       if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
+               if (PageAnon(page))
+                       dec_mm_counter(mm, anon_rss);
+               else
+                       dec_mm_counter(mm, file_rss);
+               set_pte_at(mm, address, pte,
+                               swp_entry_to_pte(make_hwpoison_entry(page)));
+       } else if (PageAnon(page)) {
                swp_entry_t entry = { .val = page_private(page) };
 
                if (PageSwapCache(page)) {
@@ -822,12 +836,12 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
                         * pte. do_swap_page() will wait until the migration
                         * pte is removed and then restart fault handling.
                         */
-                       BUG_ON(!migration);
+                       BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
                        entry = make_migration_entry(page, pte_write(pteval));
                }
                set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
                BUG_ON(pte_file(*pte));
-       } else if (PAGE_MIGRATION && migration) {
+       } else if (PAGE_MIGRATION && (TTU_ACTION(flags) == TTU_MIGRATION)) {
                /* Establish migration entry for a file page */
                swp_entry_t entry;
                entry = make_migration_entry(page, pte_write(pteval));
@@ -996,12 +1010,13 @@ static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma)
  * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
  * 'LOCKED.
  */
-static int try_to_unmap_anon(struct page *page, int unlock, int migration)
+static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
 {
        struct anon_vma *anon_vma;
        struct vm_area_struct *vma;
        unsigned int mlocked = 0;
        int ret = SWAP_AGAIN;
+       int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
 
        if (MLOCK_PAGES && unlikely(unlock))
                ret = SWAP_SUCCESS;     /* default for try_to_munlock() */
@@ -1017,7 +1032,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
                                continue;  /* must visit all unlocked vmas */
                        ret = SWAP_MLOCK;  /* saw at least one mlocked vma */
                } else {
-                       ret = try_to_unmap_one(page, vma, migration);
+                       ret = try_to_unmap_one(page, vma, flags);
                        if (ret == SWAP_FAIL || !page_mapped(page))
                                break;
                }
@@ -1041,8 +1056,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
 /**
  * try_to_unmap_file - unmap/unlock file page using the object-based rmap method
  * @page: the page to unmap/unlock
- * @unlock:  request for unlock rather than unmap [unlikely]
- * @migration:  unmapping for migration - ignored if @unlock
+ * @flags: action and flags
  *
  * Find all the mappings of a page using the mapping pointer and the vma chains
  * contained in the address_space struct it points to.
@@ -1054,7 +1068,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
  * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
  * 'LOCKED.
  */
-static int try_to_unmap_file(struct page *page, int unlock, int migration)
+static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 {
        struct address_space *mapping = page->mapping;
        pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
@@ -1066,6 +1080,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
        unsigned long max_nl_size = 0;
        unsigned int mapcount;
        unsigned int mlocked = 0;
+       int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
 
        if (MLOCK_PAGES && unlikely(unlock))
                ret = SWAP_SUCCESS;     /* default for try_to_munlock() */
@@ -1078,7 +1093,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
                                continue;       /* must visit all vmas */
                        ret = SWAP_MLOCK;
                } else {
-                       ret = try_to_unmap_one(page, vma, migration);
+                       ret = try_to_unmap_one(page, vma, flags);
                        if (ret == SWAP_FAIL || !page_mapped(page))
                                goto out;
                }
@@ -1103,7 +1118,8 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
                        ret = SWAP_MLOCK;       /* leave mlocked == 0 */
                        goto out;               /* no need to look further */
                }
-               if (!MLOCK_PAGES && !migration && (vma->vm_flags & VM_LOCKED))
+               if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
+                       (vma->vm_flags & VM_LOCKED))
                        continue;
                cursor = (unsigned long) vma->vm_private_data;
                if (cursor > max_nl_cursor)
@@ -1137,7 +1153,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
        do {
                list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
                                                shared.vm_set.list) {
-                       if (!MLOCK_PAGES && !migration &&
+                       if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
                            (vma->vm_flags & VM_LOCKED))
                                continue;
                        cursor = (unsigned long) vma->vm_private_data;
@@ -1177,7 +1193,7 @@ out:
 /**
  * try_to_unmap - try to remove all page table mappings to a page
  * @page: the page to get unmapped
- * @migration: migration flag
+ * @flags: action and flags
  *
  * Tries to remove all the page table entries which are mapping this
  * page, used in the pageout path.  Caller must hold the page lock.
@@ -1188,16 +1204,16 @@ out:
  * SWAP_FAIL   - the page is unswappable
  * SWAP_MLOCK  - page is mlocked.
  */
-int try_to_unmap(struct page *page, int migration)
+int try_to_unmap(struct page *page, enum ttu_flags flags)
 {
        int ret;
 
        BUG_ON(!PageLocked(page));
 
        if (PageAnon(page))
-               ret = try_to_unmap_anon(page, 0, migration);
+               ret = try_to_unmap_anon(page, flags);
        else
-               ret = try_to_unmap_file(page, 0, migration);
+               ret = try_to_unmap_file(page, flags);
        if (ret != SWAP_MLOCK && !page_mapped(page))
                ret = SWAP_SUCCESS;
        return ret;
@@ -1222,8 +1238,8 @@ int try_to_munlock(struct page *page)
        VM_BUG_ON(!PageLocked(page) || PageLRU(page));
 
        if (PageAnon(page))
-               return try_to_unmap_anon(page, 1, 0);
+               return try_to_unmap_anon(page, TTU_MUNLOCK);
        else
-               return try_to_unmap_file(page, 1, 0);
+               return try_to_unmap_file(page, TTU_MUNLOCK);
 }
 
index b206a7a..98631c2 100644 (file)
@@ -1633,8 +1633,8 @@ shmem_write_end(struct file *file, struct address_space *mapping,
        if (pos + copied > inode->i_size)
                i_size_write(inode, pos + copied);
 
-       unlock_page(page);
        set_page_dirty(page);
+       unlock_page(page);
        page_cache_release(page);
 
        return copied;
@@ -1971,13 +1971,13 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
                        iput(inode);
                        return error;
                }
-               unlock_page(page);
                inode->i_mapping->a_ops = &shmem_aops;
                inode->i_op = &shmem_symlink_inode_operations;
                kaddr = kmap_atomic(page, KM_USER0);
                memcpy(kaddr, symname, len);
                kunmap_atomic(kaddr, KM_USER0);
                set_page_dirty(page);
+               unlock_page(page);
                page_cache_release(page);
        }
        if (dir->i_mode & S_ISGID)
@@ -2420,6 +2420,7 @@ static const struct address_space_operations shmem_aops = {
        .write_end      = shmem_write_end,
 #endif
        .migratepage    = migrate_page,
+       .error_remove_page = generic_error_remove_page,
 };
 
 static const struct file_operations shmem_file_operations = {
index f1bf19d..4de7f02 100644 (file)
@@ -699,7 +699,7 @@ int free_swap_and_cache(swp_entry_t entry)
        struct swap_info_struct *p;
        struct page *page = NULL;
 
-       if (is_migration_entry(entry))
+       if (non_swap_entry(entry))
                return 1;
 
        p = swap_info_get(entry);
@@ -2085,7 +2085,7 @@ static int __swap_duplicate(swp_entry_t entry, bool cache)
        int count;
        bool has_cache;
 
-       if (is_migration_entry(entry))
+       if (non_swap_entry(entry))
                return -EINVAL;
 
        type = swp_type(entry);
index ccc3ecf..450cebd 100644 (file)
@@ -93,11 +93,11 @@ EXPORT_SYMBOL(cancel_dirty_page);
  * its lock, b) when a concurrent invalidate_mapping_pages got there first and
  * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
  */
-static void
+static int
 truncate_complete_page(struct address_space *mapping, struct page *page)
 {
        if (page->mapping != mapping)
-               return;
+               return -EIO;
 
        if (page_has_private(page))
                do_invalidatepage(page, 0);
@@ -108,6 +108,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
        remove_from_page_cache(page);
        ClearPageMappedToDisk(page);
        page_cache_release(page);       /* pagecache ref */
+       return 0;
 }
 
 /*
@@ -135,6 +136,51 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
        return ret;
 }
 
+int truncate_inode_page(struct address_space *mapping, struct page *page)
+{
+       if (page_mapped(page)) {
+               unmap_mapping_range(mapping,
+                                  (loff_t)page->index << PAGE_CACHE_SHIFT,
+                                  PAGE_CACHE_SIZE, 0);
+       }
+       return truncate_complete_page(mapping, page);
+}
+
+/*
+ * Used to get rid of pages on hardware memory corruption.
+ */
+int generic_error_remove_page(struct address_space *mapping, struct page *page)
+{
+       if (!mapping)
+               return -EINVAL;
+       /*
+        * Only punch for normal data pages for now.
+        * Handling other types like directories would need more auditing.
+        */
+       if (!S_ISREG(mapping->host->i_mode))
+               return -EIO;
+       return truncate_inode_page(mapping, page);
+}
+EXPORT_SYMBOL(generic_error_remove_page);
+
+/*
+ * Safely invalidate one page from its pagecache mapping.
+ * It only drops clean, unused pages. The page must be locked.
+ *
+ * Returns 1 if the page is successfully invalidated, otherwise 0.
+ */
+int invalidate_inode_page(struct page *page)
+{
+       struct address_space *mapping = page_mapping(page);
+       if (!mapping)
+               return 0;
+       if (PageDirty(page) || PageWriteback(page))
+               return 0;
+       if (page_mapped(page))
+               return 0;
+       return invalidate_complete_page(mapping, page);
+}
+
 /**
  * truncate_inode_pages - truncate range of pages specified by start & end byte offsets
  * @mapping: mapping to truncate
@@ -196,12 +242,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
                                unlock_page(page);
                                continue;
                        }
-                       if (page_mapped(page)) {
-                               unmap_mapping_range(mapping,
-                                 (loff_t)page_index<<PAGE_CACHE_SHIFT,
-                                 PAGE_CACHE_SIZE, 0);
-                       }
-                       truncate_complete_page(mapping, page);
+                       truncate_inode_page(mapping, page);
                        unlock_page(page);
                }
                pagevec_release(&pvec);
@@ -238,15 +279,10 @@ void truncate_inode_pages_range(struct address_space *mapping,
                                break;
                        lock_page(page);
                        wait_on_page_writeback(page);
-                       if (page_mapped(page)) {
-                               unmap_mapping_range(mapping,
-                                 (loff_t)page->index<<PAGE_CACHE_SHIFT,
-                                 PAGE_CACHE_SIZE, 0);
-                       }
+                       truncate_inode_page(mapping, page);
                        if (page->index > next)
                                next = page->index;
                        next++;
-                       truncate_complete_page(mapping, page);
                        unlock_page(page);
                }
                pagevec_release(&pvec);
@@ -311,12 +347,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
                        if (lock_failed)
                                continue;
 
-                       if (PageDirty(page) || PageWriteback(page))
-                               goto unlock;
-                       if (page_mapped(page))
-                               goto unlock;
-                       ret += invalidate_complete_page(mapping, page);
-unlock:
+                       ret += invalidate_inode_page(page);
+
                        unlock_page(page);
                        if (next > end)
                                break;
@@ -465,3 +497,67 @@ int invalidate_inode_pages2(struct address_space *mapping)
        return invalidate_inode_pages2_range(mapping, 0, -1);
 }
 EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
+
+/**
+ * truncate_pagecache - unmap and remove pagecache that has been truncated
+ * @inode: inode
+ * @old: old file offset
+ * @new: new file offset
+ *
+ * inode's new i_size must already be written before truncate_pagecache
+ * is called.
+ *
+ * This function should typically be called before the filesystem
+ * releases resources associated with the freed range (eg. deallocates
+ * blocks). This way, pagecache will always stay logically coherent
+ * with on-disk format, and the filesystem would not have to deal with
+ * situations such as writepage being called for a page that has already
+ * had its underlying blocks deallocated.
+ */
+void truncate_pagecache(struct inode *inode, loff_t old, loff_t new)
+{
+       if (new < old) {
+               struct address_space *mapping = inode->i_mapping;
+
+               /*
+                * unmap_mapping_range is called twice, first simply for
+                * efficiency so that truncate_inode_pages does fewer
+                * single-page unmaps.  However after this first call, and
+                * before truncate_inode_pages finishes, it is possible for
+                * private pages to be COWed, which remain after
+                * truncate_inode_pages finishes, hence the second
+                * unmap_mapping_range call must be made for correctness.
+                */
+               unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
+               truncate_inode_pages(mapping, new);
+               unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
+       }
+}
+EXPORT_SYMBOL(truncate_pagecache);
+
+/**
+ * vmtruncate - unmap mappings "freed" by truncate() syscall
+ * @inode: inode of the file used
+ * @offset: file offset to start truncating
+ *
+ * NOTE! We have to be ready to update the memory sharing
+ * between the file and the memory map for a potential last
+ * incomplete page.  Ugly, but necessary.
+ */
+int vmtruncate(struct inode *inode, loff_t offset)
+{
+       loff_t oldsize;
+       int error;
+
+       error = inode_newsize_ok(inode, offset);
+       if (error)
+               return error;
+       oldsize = inode->i_size;
+       i_size_write(inode, offset);
+       truncate_pagecache(inode, oldsize, offset);
+       if (inode->i_op->truncate)
+               inode->i_op->truncate(inode);
+
+       return error;
+}
+EXPORT_SYMBOL(vmtruncate);
index 613e89f..1219ceb 100644 (file)
@@ -663,7 +663,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                 * processes. Try to unmap it here.
                 */
                if (page_mapped(page) && mapping) {
-                       switch (try_to_unmap(page, 0)) {
+                       switch (try_to_unmap(page, TTU_UNMAP)) {
                        case SWAP_FAIL:
                                goto activate_locked;
                        case SWAP_AGAIN:
@@ -1836,11 +1836,45 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 
+unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
+                                               gfp_t gfp_mask, bool noswap,
+                                               unsigned int swappiness,
+                                               struct zone *zone, int nid)
+{
+       struct scan_control sc = {
+               .may_writepage = !laptop_mode,
+               .may_unmap = 1,
+               .may_swap = !noswap,
+               .swap_cluster_max = SWAP_CLUSTER_MAX,
+               .swappiness = swappiness,
+               .order = 0,
+               .mem_cgroup = mem,
+               .isolate_pages = mem_cgroup_isolate_pages,
+       };
+       nodemask_t nm  = nodemask_of_node(nid);
+
+       sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
+                       (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
+       sc.nodemask = &nm;
+       sc.nr_reclaimed = 0;
+       sc.nr_scanned = 0;
+       /*
+        * NOTE: Although we can get the priority field, using it
+        * here is not a good idea, since it limits the pages we can scan.
+        * if we don't reclaim here, the shrink_zone from balance_pgdat
+        * will pick up pages from other mem cgroup's as well. We hack
+        * the priority and make it zero.
+        */
+       shrink_zone(0, zone, &sc);
+       return sc.nr_reclaimed;
+}
+
 unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
                                           gfp_t gfp_mask,
                                           bool noswap,
                                           unsigned int swappiness)
 {
+       struct zonelist *zonelist;
        struct scan_control sc = {
                .may_writepage = !laptop_mode,
                .may_unmap = 1,
@@ -1852,7 +1886,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
                .isolate_pages = mem_cgroup_isolate_pages,
                .nodemask = NULL, /* we don't care the placement */
        };
-       struct zonelist *zonelist;
 
        sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                        (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -1974,6 +2007,7 @@ loop_again:
                for (i = 0; i <= end_zone; i++) {
                        struct zone *zone = pgdat->node_zones + i;
                        int nr_slab;
+                       int nid, zid;
 
                        if (!populated_zone(zone))
                                continue;
@@ -1988,6 +2022,15 @@ loop_again:
                        temp_priority[i] = priority;
                        sc.nr_scanned = 0;
                        note_zone_scanning_priority(zone, priority);
+
+                       nid = pgdat->node_id;
+                       zid = zone_idx(zone);
+                       /*
+                        * Call soft limit reclaim before calling shrink_zone.
+                        * For now we ignore the return value
+                        */
+                       mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask,
+                                                       nid, zid);
                        /*
                         * We put equal pressure on every zone, unless one
                         * zone has way too many pages free already.
@@ -2801,10 +2844,10 @@ static void scan_all_zones_unevictable_pages(void)
 unsigned long scan_unevictable_pages;
 
 int scan_unevictable_handler(struct ctl_table *table, int write,
-                          struct file *file, void __user *buffer,
+                          void __user *buffer,
                           size_t *length, loff_t *ppos)
 {
-       proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+       proc_doulongvec_minmax(table, write, buffer, length, ppos);
 
        if (write && *(unsigned long *)table->data)
                scan_all_zones_unevictable_pages();
index 907a82e..a16a234 100644 (file)
@@ -965,12 +965,12 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
 
 #ifdef CONFIG_SYSCTL
 static
-int brnf_sysctl_call_tables(ctl_table * ctl, int write, struct file *filp,
+int brnf_sysctl_call_tables(ctl_table * ctl, int write,
                            void __user * buffer, size_t * lenp, loff_t * ppos)
 {
        int ret;
 
-       ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
        if (write && *(int *)(ctl->data))
                *(int *)(ctl->data) = 1;
index 1c6a5bb..6e1f085 100644 (file)
@@ -164,7 +164,7 @@ static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MU
 static int min_priority[1];
 static int max_priority[] = { 127 }; /* From DECnet spec */
 
-static int dn_forwarding_proc(ctl_table *, int, struct file *,
+static int dn_forwarding_proc(ctl_table *, int,
                        void __user *, size_t *, loff_t *);
 static int dn_forwarding_sysctl(ctl_table *table,
                        void __user *oldval, size_t __user *oldlenp,
@@ -274,7 +274,6 @@ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms)
 }
 
 static int dn_forwarding_proc(ctl_table *table, int write,
-                               struct file *filep,
                                void __user *buffer,
                                size_t *lenp, loff_t *ppos)
 {
@@ -290,7 +289,7 @@ static int dn_forwarding_proc(ctl_table *table, int write,
        dn_db = dev->dn_ptr;
        old = dn_db->parms.forwarding;
 
-       err = proc_dointvec(table, write, filep, buffer, lenp, ppos);
+       err = proc_dointvec(table, write, buffer, lenp, ppos);
 
        if ((err >= 0) && write) {
                if (dn_db->parms.forwarding < 0)
index 5bcd592..26b0ab1 100644 (file)
@@ -165,7 +165,6 @@ static int dn_node_address_strategy(ctl_table *table,
 }
 
 static int dn_node_address_handler(ctl_table *table, int write,
-                               struct file *filp,
                                void __user *buffer,
                                size_t *lenp, loff_t *ppos)
 {
@@ -276,7 +275,6 @@ static int dn_def_dev_strategy(ctl_table *table,
 
 
 static int dn_def_dev_handler(ctl_table *table, int write,
-                               struct file * filp,
                                void __user *buffer,
                                size_t *lenp, loff_t *ppos)
 {
index 07336c6..e92f1fd 100644 (file)
@@ -1270,10 +1270,10 @@ static void inet_forward_change(struct net *net)
 }
 
 static int devinet_conf_proc(ctl_table *ctl, int write,
-                            struct file *filp, void __user *buffer,
+                            void __user *buffer,
                             size_t *lenp, loff_t *ppos)
 {
-       int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+       int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
        if (write) {
                struct ipv4_devconf *cnf = ctl->extra1;
@@ -1342,12 +1342,12 @@ static int devinet_conf_sysctl(ctl_table *table,
 }
 
 static int devinet_sysctl_forward(ctl_table *ctl, int write,
-                                 struct file *filp, void __user *buffer,
+                                 void __user *buffer,
                                  size_t *lenp, loff_t *ppos)
 {
        int *valp = ctl->data;
        int val = *valp;
-       int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+       int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
        if (write && *valp != val) {
                struct net *net = ctl->extra2;
@@ -1372,12 +1372,12 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
 }
 
 int ipv4_doint_and_flush(ctl_table *ctl, int write,
-                        struct file *filp, void __user *buffer,
+                        void __user *buffer,
                         size_t *lenp, loff_t *ppos)
 {
        int *valp = ctl->data;
        int val = *valp;
-       int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+       int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
        struct net *net = ctl->extra2;
 
        if (write && *valp != val)
index df93473..bb41992 100644 (file)
@@ -3036,7 +3036,7 @@ void ip_rt_multicast_event(struct in_device *in_dev)
 
 #ifdef CONFIG_SYSCTL
 static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
-                                       struct file *filp, void __user *buffer,
+                                       void __user *buffer,
                                        size_t *lenp, loff_t *ppos)
 {
        if (write) {
@@ -3046,7 +3046,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
 
                memcpy(&ctl, __ctl, sizeof(ctl));
                ctl.data = &flush_delay;
-               proc_dointvec(&ctl, write, filp, buffer, lenp, ppos);
+               proc_dointvec(&ctl, write, buffer, lenp, ppos);
 
                net = (struct net *)__ctl->extra1;
                rt_cache_flush(net, flush_delay);
@@ -3106,12 +3106,11 @@ static void rt_secret_reschedule(int old)
 }
 
 static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
-                                         struct file *filp,
                                          void __user *buffer, size_t *lenp,
                                          loff_t *ppos)
 {
        int old = ip_rt_secret_interval;
-       int ret = proc_dointvec_jiffies(ctl, write, filp, buffer, lenp, ppos);
+       int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
 
        rt_secret_reschedule(old);
 
index 4710d21..2dcf04d 100644 (file)
@@ -36,7 +36,7 @@ static void set_local_port_range(int range[2])
 }
 
 /* Validate changes from /proc interface. */
-static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
+static int ipv4_local_port_range(ctl_table *table, int write,
                                 void __user *buffer,
                                 size_t *lenp, loff_t *ppos)
 {
@@ -51,7 +51,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
        };
 
        inet_get_local_port_range(range, range + 1);
-       ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
 
        if (write && ret == 0) {
                if (range[1] < range[0])
@@ -91,7 +91,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table,
 }
 
 
-static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * filp,
+static int proc_tcp_congestion_control(ctl_table *ctl, int write,
                                       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        char val[TCP_CA_NAME_MAX];
@@ -103,7 +103,7 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file *
 
        tcp_get_default_congestion_control(val);
 
-       ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+       ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
        if (write && ret == 0)
                ret = tcp_set_default_congestion_control(val);
        return ret;
@@ -129,7 +129,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table,
 }
 
 static int proc_tcp_available_congestion_control(ctl_table *ctl,
-                                                int write, struct file * filp,
+                                                int write,
                                                 void __user *buffer, size_t *lenp,
                                                 loff_t *ppos)
 {
@@ -140,13 +140,13 @@ static int proc_tcp_available_congestion_control(ctl_table *ctl,
        if (!tbl.data)
                return -ENOMEM;
        tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX);
-       ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+       ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
        kfree(tbl.data);
        return ret;
 }
 
 static int proc_allowed_congestion_control(ctl_table *ctl,
-                                          int write, struct file * filp,
+                                          int write,
                                           void __user *buffer, size_t *lenp,
                                           loff_t *ppos)
 {
@@ -158,7 +158,7 @@ static int proc_allowed_congestion_control(ctl_table *ctl,
                return -ENOMEM;
 
        tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
-       ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+       ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
        if (write && ret == 0)
                ret = tcp_set_allowed_congestion_control(tbl.data);
        kfree(tbl.data);
index 55f486d..1fd0a3d 100644 (file)
@@ -3986,14 +3986,14 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 #ifdef CONFIG_SYSCTL
 
 static
-int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+int addrconf_sysctl_forward(ctl_table *ctl, int write,
                           void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        int *valp = ctl->data;
        int val = *valp;
        int ret;
 
-       ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
        if (write)
                ret = addrconf_fixup_forwarding(ctl, valp, val);
@@ -4090,14 +4090,14 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old)
 }
 
 static
-int addrconf_sysctl_disable(ctl_table *ctl, int write, struct file * filp,
+int addrconf_sysctl_disable(ctl_table *ctl, int write,
                            void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        int *valp = ctl->data;
        int val = *valp;
        int ret;
 
-       ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
        if (write)
                ret = addrconf_disable_ipv6(ctl, valp, val);
index 7015478..498b9b0 100644 (file)
@@ -1735,7 +1735,7 @@ static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
        }
 }
 
-int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        struct net_device *dev = ctl->extra1;
        struct inet6_dev *idev;
@@ -1746,16 +1746,16 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f
                ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
 
        if (strcmp(ctl->procname, "retrans_time") == 0)
-               ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+               ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
        else if (strcmp(ctl->procname, "base_reachable_time") == 0)
                ret = proc_dointvec_jiffies(ctl, write,
-                                           filp, buffer, lenp, ppos);
+                                           buffer, lenp, ppos);
 
        else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
                 (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
                ret = proc_dointvec_ms_jiffies(ctl, write,
-                                              filp, buffer, lenp, ppos);
+                                              buffer, lenp, ppos);
        else
                ret = -1;
 
index 77aecbe..d6fe764 100644 (file)
@@ -2524,13 +2524,13 @@ static const struct file_operations rt6_stats_seq_fops = {
 #ifdef CONFIG_SYSCTL
 
 static
-int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
+int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
                              void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        struct net *net = current->nsproxy->net_ns;
        int delay = net->ipv6.sysctl.flush_delay;
        if (write) {
-               proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+               proc_dointvec(ctl, write, buffer, lenp, ppos);
                fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
                return 0;
        } else
index 57f8817..5c86567 100644 (file)
@@ -73,12 +73,12 @@ static int min_lap_keepalive_time = 100;    /* 100us */
 /* For other sysctl, I've no idea of the range. Maybe Dag could help
  * us on that - Jean II */
 
-static int do_devname(ctl_table *table, int write, struct file *filp,
+static int do_devname(ctl_table *table, int write,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        int ret;
 
-       ret = proc_dostring(table, write, filp, buffer, lenp, ppos);
+       ret = proc_dostring(table, write, buffer, lenp, ppos);
        if (ret == 0 && write) {
                struct ias_value *val;
 
@@ -90,12 +90,12 @@ static int do_devname(ctl_table *table, int write, struct file *filp,
 }
 
 
-static int do_discovery(ctl_table *table, int write, struct file *filp,
+static int do_discovery(ctl_table *table, int write,
                     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        int ret;
 
-       ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec(table, write, buffer, lenp, ppos);
        if (ret)
               return ret;
 
index fba2892..446e9bd 100644 (file)
@@ -1496,14 +1496,14 @@ static int ip_vs_zero_all(void)
 
 
 static int
-proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
+proc_do_defense_mode(ctl_table *table, int write,
                     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        int *valp = table->data;
        int val = *valp;
        int rc;
 
-       rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       rc = proc_dointvec(table, write, buffer, lenp, ppos);
        if (write && (*valp != val)) {
                if ((*valp < 0) || (*valp > 3)) {
                        /* Restore the correct value */
@@ -1517,7 +1517,7 @@ proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
 
 
 static int
-proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
+proc_do_sync_threshold(ctl_table *table, int write,
                       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        int *valp = table->data;
@@ -1527,7 +1527,7 @@ proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
        /* backup the value first */
        memcpy(val, valp, sizeof(val));
 
-       rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       rc = proc_dointvec(table, write, buffer, lenp, ppos);
        if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
                /* Restore the correct value */
                memcpy(valp, val, sizeof(val));
index 4e62030..c93494f 100644 (file)
@@ -226,7 +226,7 @@ static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];
 static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
 static struct ctl_table_header *nf_log_dir_header;
 
-static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp,
+static int nf_log_proc_dostring(ctl_table *table, int write,
                         void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        const struct nf_logger *logger;
@@ -260,7 +260,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp,
                        table->data = "NONE";
                else
                        table->data = logger->name;
-               r = proc_dostring(table, write, filp, buffer, lenp, ppos);
+               r = proc_dostring(table, write, buffer, lenp, ppos);
                mutex_unlock(&nf_log_mutex);
        }
 
index 7b5749e..2220f33 100644 (file)
@@ -56,7 +56,7 @@ void phonet_get_local_port_range(int *min, int *max)
        } while (read_seqretry(&local_port_range_lock, seq));
 }
 
-static int proc_local_port_range(ctl_table *table, int write, struct file *filp,
+static int proc_local_port_range(ctl_table *table, int write,
                                void __user *buffer,
                                size_t *lenp, loff_t *ppos)
 {
@@ -70,7 +70,7 @@ static int proc_local_port_range(ctl_table *table, int write, struct file *filp,
                .extra2 = &local_port_range_max,
        };
 
-       ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
 
        if (write && ret == 0) {
                if (range[1] < range[0])
index a417d5a..38829e2 100644 (file)
@@ -640,10 +640,11 @@ EXPORT_SYMBOL_GPL(rpc_call_async);
 /**
  * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
  * rpc_execute against it
- * @ops: RPC call ops
+ * @req: RPC request
+ * @tk_ops: RPC call ops
  */
 struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
-                                       const struct rpc_call_ops *tk_ops)
+                               const struct rpc_call_ops *tk_ops)
 {
        struct rpc_task *task;
        struct xdr_buf *xbufp = &req->rq_snd_buf;
index 5231f7a..42f9748 100644 (file)
@@ -56,7 +56,7 @@ rpc_unregister_sysctl(void)
        }
 }
 
-static int proc_do_xprt(ctl_table *table, int write, struct file *file,
+static int proc_do_xprt(ctl_table *table, int write,
                        void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        char tmpbuf[256];
@@ -71,7 +71,7 @@ static int proc_do_xprt(ctl_table *table, int write, struct file *file,
 }
 
 static int
-proc_dodebug(ctl_table *table, int write, struct file *file,
+proc_dodebug(ctl_table *table, int write,
                                void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        char            tmpbuf[20], c, *s;
index 8710117..35fb68b 100644 (file)
@@ -80,7 +80,7 @@ struct kmem_cache *svc_rdma_ctxt_cachep;
  * current value.
  */
 static int read_reset_stat(ctl_table *table, int write,
-                          struct file *filp, void __user *buffer, size_t *lenp,
+                          void __user *buffer, size_t *lenp,
                           loff_t *ppos)
 {
        atomic_t *stat = (atomic_t *)table->data;
index b8186ba..6cf8fd2 100644 (file)
@@ -61,7 +61,8 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
 struct cgroup_subsys devices_subsys;
 
 static int devcgroup_can_attach(struct cgroup_subsys *ss,
-               struct cgroup *new_cgroup, struct task_struct *task)
+               struct cgroup *new_cgroup, struct task_struct *task,
+               bool threadgroup)
 {
        if (current != task && !capable(CAP_SYS_ADMIN))
                        return -EPERM;
index 500aad0..3bb90b6 100644 (file)
@@ -187,7 +187,7 @@ static inline void print_ipv6_addr(struct audit_buffer *ab,
                                   char *name1, char *name2)
 {
        if (!ipv6_addr_any(addr))
-               audit_log_format(ab, " %s=%pI6", name1, addr);
+               audit_log_format(ab, " %s=%pI6c", name1, addr);
        if (port)
                audit_log_format(ab, " %s=%d", name2, ntohs(port));
 }
index 14cc7b3..c844eed 100644 (file)
@@ -28,12 +28,12 @@ static void update_mmap_min_addr(void)
  * sysctl handler which just sets dac_mmap_min_addr = the new value and then
  * calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly
  */
-int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp,
+int mmap_min_addr_handler(struct ctl_table *table, int write,
                          void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        int ret;
 
-       ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+       ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 
        update_mmap_min_addr();
 
index 417f7c9..bb230d5 100644 (file)
@@ -2411,7 +2411,7 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm)
        /* Wake up the parent if it is waiting so that it can recheck
         * wait permission to the new task SID. */
        read_lock(&tasklist_lock);
-       wake_up_interruptible(&current->real_parent->signal->wait_chldexit);
+       __wake_up_parent(current, current->real_parent);
        read_unlock(&tasklist_lock);
 }
 
index 4e34d19..b4b48af 100644 (file)
@@ -137,9 +137,9 @@ static int pxa2xx_ac97_do_resume(struct snd_card *card)
        return 0;
 }
 
-static int pxa2xx_ac97_suspend(struct platform_device *dev, pm_message_t state)
+static int pxa2xx_ac97_suspend(struct device *dev)
 {
-       struct snd_card *card = platform_get_drvdata(dev);
+       struct snd_card *card = dev_get_drvdata(dev);
        int ret = 0;
 
        if (card)
@@ -148,9 +148,9 @@ static int pxa2xx_ac97_suspend(struct platform_device *dev, pm_message_t state)
        return ret;
 }
 
-static int pxa2xx_ac97_resume(struct platform_device *dev)
+static int pxa2xx_ac97_resume(struct device *dev)
 {
-       struct snd_card *card = platform_get_drvdata(dev);
+       struct snd_card *card = dev_get_drvdata(dev);
        int ret = 0;
 
        if (card)
@@ -159,9 +159,10 @@ static int pxa2xx_ac97_resume(struct platform_device *dev)
        return ret;
 }
 
-#else
-#define pxa2xx_ac97_suspend    NULL
-#define pxa2xx_ac97_resume     NULL
+static struct dev_pm_ops pxa2xx_ac97_pm_ops = {
+       .suspend        = pxa2xx_ac97_suspend,
+       .resume         = pxa2xx_ac97_resume,
+};
 #endif
 
 static int __devinit pxa2xx_ac97_probe(struct platform_device *dev)
@@ -241,11 +242,12 @@ static int __devexit pxa2xx_ac97_remove(struct platform_device *dev)
 static struct platform_driver pxa2xx_ac97_driver = {
        .probe          = pxa2xx_ac97_probe,
        .remove         = __devexit_p(pxa2xx_ac97_remove),
-       .suspend        = pxa2xx_ac97_suspend,
-       .resume         = pxa2xx_ac97_resume,
        .driver         = {
                .name   = "pxa2xx-ac97",
                .owner  = THIS_MODULE,
+#ifdef CONFIG_PM
+               .pm     = &pxa2xx_ac97_pm_ops,
+#endif
        },
 };