diff --git a/target/linux/ps3/config-2.6.27 b/target/linux/ps3/config-2.6.27 deleted file mode 100644 index a3fe801554..0000000000 --- a/target/linux/ps3/config-2.6.27 +++ /dev/null @@ -1,438 +0,0 @@ -CONFIG_64BIT=y -CONFIG_ALTIVEC=y -CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y -CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y -CONFIG_ARCH_HAS_ILOG2_U32=y -CONFIG_ARCH_HAS_ILOG2_U64=y -CONFIG_ARCH_HAS_WALK_MEMORY=y -CONFIG_ARCH_MAY_HAVE_PC_FDC=y -CONFIG_ARCH_MEMORY_PROBE=y -CONFIG_ARCH_NO_VIRT_TO_BUS=y -CONFIG_ARCH_POPULATES_NODE_MAP=y -CONFIG_ARCH_SELECT_MEMORY_MODEL=y -CONFIG_ARCH_SPARSEMEM_DEFAULT=y -CONFIG_ARCH_SPARSEMEM_ENABLE=y -# CONFIG_ARCH_SUPPORTS_MSI is not set -CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y -# CONFIG_ARPD is not set -# CONFIG_ATM is not set -CONFIG_AUDIT_ARCH=y -CONFIG_BASE_SMALL=0 -CONFIG_BITREVERSE=y -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_BLK_DEV_RAM_SIZE=65535 -CONFIG_BLK_DEV_SD=y -CONFIG_BLK_DEV_SR=y -# CONFIG_BLK_DEV_SR_VENDOR is not set -# CONFIG_BLK_DEV_XIP is not set -CONFIG_BLOCK_COMPAT=y -# CONFIG_BONDING is not set -# CONFIG_BOOTX_TEXT is not set -CONFIG_BOUNCE=y -# CONFIG_BRIDGE is not set -# CONFIG_BSD_PROCESS_ACCT is not set -# CONFIG_BT is not set -# CONFIG_CGROUP_SCHED is not set -# CONFIG_CIFS is not set -CONFIG_CLASSIC_RCU=y -# CONFIG_CMDLINE_BOOL is not set -CONFIG_COMPAT=y -CONFIG_COMPAT_BINFMT_ELF=y -CONFIG_COMPAT_BRK=y -# CONFIG_CONFIGFS_FS is not set -CONFIG_CONSOLE_TRANSLATIONS=y -# CONFIG_CPU_FREQ is not set -# CONFIG_CRASH_DUMP is not set -# CONFIG_CRC16 is not set -# CONFIG_CRC_CCITT is not set -# CONFIG_CRC_ITU_T is not set -# CONFIG_CRYPTO_AES is not set -CONFIG_CRYPTO_ALGAPI=y -# CONFIG_CRYPTO_ANUBIS is not set -# CONFIG_CRYPTO_ARC4 is not set -# CONFIG_CRYPTO_AUTHENC is not set -CONFIG_CRYPTO_BLKCIPHER=y -# CONFIG_CRYPTO_BLOWFISH is not set -# CONFIG_CRYPTO_CAMELLIA is not set -# CONFIG_CRYPTO_CAST5 is not set -# CONFIG_CRYPTO_CAST6 is not set -CONFIG_CRYPTO_CBC=y -# CONFIG_CRYPTO_CRC32C is not set -# CONFIG_CRYPTO_DEFLATE is not set -CONFIG_CRYPTO_DES=y -# CONFIG_CRYPTO_ECB is not set -# CONFIG_CRYPTO_HMAC is not set -# CONFIG_CRYPTO_KHAZAD is not set -CONFIG_CRYPTO_MANAGER=y -# CONFIG_CRYPTO_MD4 is not set -CONFIG_CRYPTO_MD5=y -# CONFIG_CRYPTO_MICHAEL_MIC is not set -# CONFIG_CRYPTO_NULL is not set -# CONFIG_CRYPTO_SERPENT is not set -# CONFIG_CRYPTO_SHA1 is not set -# CONFIG_CRYPTO_SHA256 is not set -# CONFIG_CRYPTO_SHA512 is not set -# CONFIG_CRYPTO_TEA is not set -# CONFIG_CRYPTO_TEST is not set -# CONFIG_CRYPTO_TGR192 is not set -# CONFIG_CRYPTO_TWOFISH is not set -# CONFIG_CRYPTO_WP512 is not set -# CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_DEBUG_FS is not set -CONFIG_DEFAULT_AS=y -# CONFIG_DEFAULT_DEADLINE is not set -CONFIG_DEFAULT_IOSCHED="anticipatory" -CONFIG_DEFAULT_TCP_CONG="cubic" -# CONFIG_DEFAULT_UIMAGE is not set -CONFIG_DEVKMEM=y -CONFIG_DNOTIFY=y -CONFIG_DUMMY_CONSOLE=y -CONFIG_EARLY_PRINTK=y -# CONFIG_EDAC is not set -CONFIG_ELF_CORE=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set -CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_POSIX_ACL is not set -# CONFIG_EXT3_FS_SECURITY is not set -CONFIG_EXT3_FS_XATTR=y -CONFIG_FAIR_GROUP_SCHED=y -CONFIG_FAT_FS=y -CONFIG_FB=y -# CONFIG_FB_BACKLIGHT is not set -# CONFIG_FB_CFB_COPYAREA is not set -# CONFIG_FB_CFB_FILLRECT is not set -# CONFIG_FB_CFB_IMAGEBLIT is not set -# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set -# CONFIG_FB_DDC is not set -# CONFIG_FB_FOREIGN_ENDIAN is not set -# CONFIG_FB_MACMODES is not set -# CONFIG_FB_MODE_HELPERS is not set -# CONFIG_FB_OF is not set -CONFIG_FB_PS3=y -CONFIG_FB_PS3_DEFAULT_SIZE_M=9 -# CONFIG_FB_S1D13XXX is not set -# CONFIG_FB_SVGALIB is not set -CONFIG_FB_SYS_COPYAREA=y -CONFIG_FB_SYS_FILLRECT=y -CONFIG_FB_SYS_FOPS=y -CONFIG_FB_SYS_IMAGEBLIT=y -# CONFIG_FB_TILEBLITTING is not set -# CONFIG_FB_VGA16 is not set -# CONFIG_FB_VIRTUAL is not set -# CONFIG_FIRMWARE_EDID is not set -# CONFIG_FLATMEM_MANUAL is not set -# CONFIG_FONTS is not set -CONFIG_FONT_8x16=y -CONFIG_FONT_8x8=y -CONFIG_FORCE_MAX_ZONEORDER=13 -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y -CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y -CONFIG_FRAME_WARN=2048 -# CONFIG_FSL_ULI1575 is not set -CONFIG_FS_MBCACHE=y -CONFIG_GELIC_NET=y -# CONFIG_GELIC_WIRELESS is not set -CONFIG_GENERIC_BUG=y -CONFIG_GENERIC_CLOCKEVENTS=y -CONFIG_GENERIC_CLOCKEVENTS_BUILD=y -CONFIG_GENERIC_CMOS_UPDATE=y -# CONFIG_GENERIC_FIND_FIRST_BIT is not set -CONFIG_GENERIC_FIND_NEXT_BIT=y -# CONFIG_GENERIC_IOMAP is not set -CONFIG_GENERIC_ISA_DMA=y -# CONFIG_GENERIC_TBSYNC is not set -CONFIG_GENERIC_TIME_VSYSCALL=y -# CONFIG_GEN_RTC is not set -CONFIG_GROUP_SCHED=y -# CONFIG_HAMRADIO is not set -# CONFIG_HANGCHECK_TIMER is not set -CONFIG_HAS_DMA=y -CONFIG_HAS_IOMEM=y -CONFIG_HAS_IOPORT=y -# CONFIG_HAS_RAPIDIO is not set -CONFIG_HAVE_ARCH_KGDB=y -CONFIG_HAVE_ARCH_TRACEHOOK=y -# CONFIG_HAVE_CLK is not set -CONFIG_HAVE_DMA_ATTRS=y -CONFIG_HAVE_DYNAMIC_FTRACE=y -CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y -CONFIG_HAVE_FTRACE=y -# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set -CONFIG_HAVE_IDE=y -CONFIG_HAVE_IOREMAP_PROT=y -CONFIG_HAVE_KPROBES=y -CONFIG_HAVE_KRETPROBES=y -CONFIG_HAVE_LATENCYTOP_SUPPORT=y -CONFIG_HAVE_LMB=y -CONFIG_HAVE_MEMORY_PRESENT=y -CONFIG_HAVE_OPROFILE=y -CONFIG_HAVE_SETUP_PER_CPU_AREA=y -# CONFIG_HFSPLUS_FS is not set -# CONFIG_HFS_FS is not set -CONFIG_HID=y -CONFIG_HID_SUPPORT=y -# CONFIG_HIGH_RES_TIMERS is not set -# CONFIG_HUGETLBFS is not set -CONFIG_HW_CONSOLE=y -# CONFIG_HW_RANDOM is not set -CONFIG_HZ=250 -# CONFIG_HZ_100 is not set -CONFIG_HZ_250=y -# CONFIG_I2C is not set -# CONFIG_IDE is not set -# CONFIG_IEEE80211 is not set -# CONFIG_INET_AH is not set -# CONFIG_INET_DIAG is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_INET_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_TUNNEL is not set -CONFIG_INITRAMFS_SOURCE="" -CONFIG_INOTIFY=y -CONFIG_INOTIFY_USER=y -CONFIG_INPUT=y -# CONFIG_INPUT_EVDEV is not set -CONFIG_INPUT_JOYSTICK=y -CONFIG_INPUT_MOUSEDEV=y -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_IOMMU_HELPER=y -# CONFIG_IOMMU_VMERGE is not set -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_CFQ=y -# CONFIG_IPIC is not set -# CONFIG_IPV6 is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -# CONFIG_IP_MULTICAST is not set -# CONFIG_IRQSTACKS is not set -# CONFIG_IRQ_ALL_CPUS is not set -CONFIG_IRQ_PER_CPU=y -CONFIG_ISA_DMA_API=y -# CONFIG_ISDN is not set -CONFIG_ISO9660_FS=y -CONFIG_JBD=y -# CONFIG_JOYSTICK_A3D is not set -# CONFIG_JOYSTICK_ADI is not set -# CONFIG_JOYSTICK_ANALOG is not set -# CONFIG_JOYSTICK_COBRA is not set -# CONFIG_JOYSTICK_GF2K is not set -# CONFIG_JOYSTICK_GRIP is not set -# CONFIG_JOYSTICK_GRIP_MP is not set -# CONFIG_JOYSTICK_GUILLEMOT is not set -# CONFIG_JOYSTICK_IFORCE is not set -# CONFIG_JOYSTICK_INTERACT is not set -# CONFIG_JOYSTICK_JOYDUMP is not set -# CONFIG_JOYSTICK_MAGELLAN is not set -# CONFIG_JOYSTICK_SIDEWINDER is not set -# CONFIG_JOYSTICK_SPACEBALL is not set -# CONFIG_JOYSTICK_SPACEORB is not set -# CONFIG_JOYSTICK_STINGER is not set -# CONFIG_JOYSTICK_TMDC is not set -# CONFIG_JOYSTICK_TWIDJOY is not set -# CONFIG_JOYSTICK_WARRIOR is not set -# CONFIG_JOYSTICK_XPAD is not set -# CONFIG_JOYSTICK_ZHENHUA is not set -CONFIG_KERNEL_START=0xc000000000000000 -CONFIG_KEXEC=y -CONFIG_KMOD=y -CONFIG_LEGACY_PTYS=y -CONFIG_LEGACY_PTY_COUNT=16 -# CONFIG_LIBCRC32C is not set -# CONFIG_LLC2 is not set -CONFIG_LOCALVERSION_AUTO=y -CONFIG_LOCKD=y -CONFIG_LOCK_KERNEL=y -CONFIG_LOGO=y -CONFIG_LOGO_LINUX_CLUT224=y -# CONFIG_LOGO_LINUX_MONO is not set -# CONFIG_LOGO_LINUX_VGA16 is not set -CONFIG_LOG_BUF_SHIFT=15 -# CONFIG_MACINTOSH_DRIVERS is not set -CONFIG_MEMORY_HOTPLUG=y -CONFIG_MEMORY_HOTPLUG_SPARSE=y -# CONFIG_MEMORY_HOTREMOVE is not set -# CONFIG_MFD_CORE is not set -# CONFIG_MFD_TMIO is not set -CONFIG_MIGRATION=y -# CONFIG_MINIX_FS is not set -# CONFIG_MISC_DEVICES is not set -# CONFIG_MMIO_NVRAM is not set -# CONFIG_MODULE_UNLOAD is not set -# CONFIG_MPIC is not set -# CONFIG_MPIC_WEIRD is not set -CONFIG_MSDOS_FS=y -# CONFIG_MTD is not set -# CONFIG_NETFILTER is not set -# CONFIG_NET_ETHERNET is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_NET_KEY is not set -# CONFIG_NET_PKTGEN is not set -# CONFIG_NET_SCHED is not set -# CONFIG_NEW_LEDS is not set -# CONFIG_NFSD is not set -CONFIG_NFS_FS=y -CONFIG_NLS=y -# CONFIG_NLS_CODEPAGE_1250 is not set -CONFIG_NLS_CODEPAGE_437=y -# CONFIG_NLS_CODEPAGE_850 is not set -CONFIG_NLS_ISO8859_1=y -# CONFIG_NLS_ISO8859_15 is not set -# CONFIG_NLS_KOI8_R is not set -# CONFIG_NLS_UTF8 is not set -CONFIG_NR_CPUS=2 -# CONFIG_NUMA is not set -CONFIG_OF=y -CONFIG_OF_DEVICE=y -# CONFIG_PACKET_MMAP is not set -CONFIG_PAGEFLAGS_EXTENDED=y -CONFIG_PAGE_OFFSET=0xc000000000000000 -# CONFIG_PARTITION_ADVANCED is not set -# CONFIG_PCI_DOMAINS is not set -# CONFIG_PCI_SYSCALL is not set -CONFIG_PHYSICAL_START=0x00000000 -CONFIG_POWER3=y -CONFIG_POWER4=y -# CONFIG_POWER4_ONLY is not set -CONFIG_PPC=y -CONFIG_PPC64=y -# CONFIG_PPC_64K_PAGES is not set -# CONFIG_PPC_970_NAP is not set -CONFIG_PPC_CELL=y -# CONFIG_PPC_CELLEB is not set -# CONFIG_PPC_CELL_NATIVE is not set -# CONFIG_PPC_CLOCK is not set -# CONFIG_PPC_DCR_MMIO is not set -# CONFIG_PPC_DCR_NATIVE is not set -# CONFIG_PPC_EARLY_DEBUG is not set -CONFIG_PPC_FPU=y -# CONFIG_PPC_HAS_HASH_64K is not set -# CONFIG_PPC_I8259 is not set -# CONFIG_PPC_IBM_CELL_BLADE is not set -# CONFIG_PPC_INDIRECT_IO is not set -# CONFIG_PPC_ISERIES is not set -# CONFIG_PPC_MAPLE is not set -CONFIG_PPC_MERGE=y -# CONFIG_PPC_MM_SLICES is not set -# CONFIG_PPC_MPC106 is not set -CONFIG_PPC_MULTIPLATFORM=y -CONFIG_PPC_OF=y -# CONFIG_PPC_PASEMI is not set -CONFIG_PPC_PCI_CHOICE=y -# CONFIG_PPC_PMAC is not set -CONFIG_PPC_PS3=y -# CONFIG_PPC_PSERIES is not set -# CONFIG_PPC_RTAS is not set -CONFIG_PPC_STD_MMU=y -# CONFIG_PPC_UDBG_16550 is not set -# CONFIG_PPP is not set -# CONFIG_PQ2ADS is not set -CONFIG_PROC_DEVICETREE=y -CONFIG_PROC_PAGE_MONITOR=y -# CONFIG_PS3_ADVANCED is not set -CONFIG_PS3_DISK=y -# CONFIG_PS3_DYNAMIC_DMA is not set -CONFIG_PS3_FLASH=y -CONFIG_PS3_HTAB_SIZE=20 -# CONFIG_PS3_LPM is not set -CONFIG_PS3_PS3AV=y -CONFIG_PS3_ROM=y -CONFIG_PS3_STORAGE=y -CONFIG_PS3_SYS_MANAGER=y -CONFIG_PS3_VUART=y -CONFIG_RESOURCES_64BIT=y -# CONFIG_RFKILL is not set -CONFIG_RPCSEC_GSS_KRB5=y -# CONFIG_RT_GROUP_SCHED is not set -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -# CONFIG_SCHED_HRTICK is not set -CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y -# CONFIG_SCHED_SMT is not set -CONFIG_SCSI=y -# CONFIG_SCSI_LOWLEVEL is not set -# CONFIG_SCSI_PROC_FS is not set -# CONFIG_SCSI_WAIT_SCAN is not set -# CONFIG_SERIAL_8250 is not set -# CONFIG_SHMEM is not set -CONFIG_SMP=y -# CONFIG_SOUND is not set -CONFIG_SPARSEMEM=y -CONFIG_SPARSEMEM_EXTREME=y -CONFIG_SPARSEMEM_MANUAL=y -# CONFIG_SPARSEMEM_STATIC is not set -CONFIG_SPARSEMEM_VMEMMAP=y -CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y -# CONFIG_SPU_BASE is not set -# CONFIG_SPU_FS is not set -CONFIG_SSB_POSSIBLE=y -CONFIG_SUNRPC=y -CONFIG_SUNRPC_GSS=y -# CONFIG_SYN_COOKIES is not set -# CONFIG_SYSCTL_SYSCALL_CHECK is not set -CONFIG_SYSFS_DEPRECATED=y -CONFIG_SYSFS_DEPRECATED_V2=y -# CONFIG_SYSVIPC is not set -# CONFIG_TCP_CONG_ADVANCED is not set -CONFIG_TCP_CONG_CUBIC=y -# CONFIG_TICK_ONESHOT is not set -CONFIG_TINY_SHMEM=y -# CONFIG_TUN is not set -CONFIG_TUNE_CELL=y -# CONFIG_U3_DART is not set -# CONFIG_UDF_FS is not set -CONFIG_USB=y -# CONFIG_USB_ACM is not set -# CONFIG_USB_CATC is not set -# CONFIG_USB_DEVICEFS is not set -CONFIG_USB_EHCI_BIG_ENDIAN_MMIO=y -CONFIG_USB_EHCI_HCD=y -CONFIG_USB_EHCI_HCD_PPC_OF=y -# CONFIG_USB_EHCI_ROOT_HUB_TT is not set -CONFIG_USB_HID=y -# CONFIG_USB_KAWETH is not set -# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set -CONFIG_USB_OHCI_BIG_ENDIAN_MMIO=y -CONFIG_USB_OHCI_HCD=y -# CONFIG_USB_OHCI_HCD_PPC_OF is not set -# CONFIG_USB_PEGASUS is not set -# CONFIG_USB_PRINTER is not set -# CONFIG_USB_SERIAL is not set -CONFIG_USB_STORAGE=y -# CONFIG_USB_STORAGE_ALAUDA is not set -# CONFIG_USB_STORAGE_DATAFAB is not set -# CONFIG_USB_STORAGE_DPCM is not set -# CONFIG_USB_STORAGE_FREECOM is not set -# CONFIG_USB_STORAGE_JUMPSHOT is not set -# CONFIG_USB_STORAGE_KARMA is not set -# CONFIG_USB_STORAGE_SDDR09 is not set -# CONFIG_USB_STORAGE_SDDR55 is not set -# CONFIG_USB_STORAGE_USBAT is not set -CONFIG_USB_SUPPORT=y -# CONFIG_USB_USBNET is not set -CONFIG_USER_SCHED=y -CONFIG_USE_GENERIC_SMP_HELPERS=y -CONFIG_VFAT_FS=y -# CONFIG_VGASTATE is not set -# CONFIG_VGA_CONSOLE is not set -# CONFIG_VIDEO_DEV is not set -CONFIG_VIDEO_OUTPUT_CONTROL=y -CONFIG_VIRT_CPU_ACCOUNTING=y -# CONFIG_VLAN_8021Q is not set -CONFIG_VM_EVENT_COUNTERS=y -# CONFIG_VSX is not set -CONFIG_VT=y -CONFIG_VT_CONSOLE=y -CONFIG_VT_HW_CONSOLE_BINDING=y -# CONFIG_W1 is not set -# CONFIG_WATCHDOG is not set -# CONFIG_WIRELESS_EXT_SYSFS is not set -# CONFIG_WLAN_80211 is not set -CONFIG_WORD_SIZE=64 -# CONFIG_ZISOFS is not set diff --git a/target/linux/ps3/patches-2.6.27/001-perfmon-2.6.27.patch b/target/linux/ps3/patches-2.6.27/001-perfmon-2.6.27.patch deleted file mode 100644 index 958416aab6..0000000000 --- a/target/linux/ps3/patches-2.6.27/001-perfmon-2.6.27.patch +++ /dev/null @@ -1,31652 +0,0 @@ -diff --git a/Documentation/ABI/testing/sysfs-perfmon b/Documentation/ABI/testing/sysfs-perfmon -new file mode 100644 -index 0000000..bde434c ---- /dev/null -+++ b/Documentation/ABI/testing/sysfs-perfmon -@@ -0,0 +1,87 @@ -+What: /sys/kernel/perfmon -+Date: Nov 2007 -+KernelVersion: 2.6.24 -+Contact: eranian@gmail.com -+ -+Description: provide the configuration interface for the perfmon2 subsystems. -+ The tree contains information about the detected hardware, current -+ state of the subsystem as well as some configuration parameters. -+ -+ The tree consists of the following entries: -+ -+ /sys/kernel/perfmon/debug (read-write): -+ -+ Enable perfmon2 debugging output via klogd. Debug messages produced during -+ PMU interrupt handling are not controlled by this entry. The traces a rate-limited -+ to avoid flooding of the console. It is possible to change the throttling -+ via /proc/sys/kernel/printk_ratelimit. The value is interpreted as a bitmask. -+ Each bit enables a particular type of debug messages. Refer to the file -+ include/linux/perfmon_kern.h for more information -+ -+ /sys/kernel/perfmon/pmc_max_fast_arg (read-only): -+ -+ Number of perfmon2 syscall arguments copied directly onto the -+ stack (copy_from_user) for pfm_write_pmcs(). Copying to the stack avoids -+ having to allocate a buffer. The unit is the number of pfarg_pmc_t -+ structures. -+ -+ /sys/kernel/perfmon/pmd_max_fast_arg (read-only): -+ -+ Number of perfmon2 syscall arguments copied directly onto the -+ stack (copy_from_user) for pfm_write_pmds()/pfm_read_pmds(). Copying -+ to the stack avoids having to allocate a buffer. The unit is the number -+ of pfarg_pmd_t structures. -+ -+ -+ /sys/kernel/perfmon/reset_stats (write-only): -+ -+ Reset the statistics collected by perfmon2. Stats are available -+ per-cpu via debugfs. -+ -+ /sys/kernel/perfmon/smpl_buffer_mem_cur (read-only): -+ -+ Reports the amount of memory currently dedicated to sampling -+ buffers by the kernel. The unit is byte. -+ -+ /sys/kernel/perfmon/smpl_buffer_mem_max (read-write): -+ -+ Maximum amount of kernel memory usable for sampling buffers. -1 means -+ everything that is available. Unit is byte. -+ -+ /sys/kernel/perfmon/smpl_buffer_mem_cur (read-only): -+ -+ Current utilization of kernel memory in bytes. -+ -+ /sys/kernel/perfmon/sys_group (read-write): -+ -+ Users group allowed to create a system-wide perfmon2 context (session). -+ -1 means any group. This control will be kept until we find a package -+ able to control capabilities via PAM. -+ -+ /sys/kernel/perfmon/task_group (read-write): -+ -+ Users group allowed to create a per-thread context (session). -+ -1 means any group. This control will be kept until we find a -+ package able to control capabilities via PAM. -+ -+ /sys/kernel/perfmon/sys_sessions_count (read-only): -+ -+ Number of system-wide contexts currently attached to CPUs. -+ -+ /sys/kernel/perfmon/task_sessions_count (read-only): -+ -+ Number of per-thread contexts currently attached to threads. -+ -+ /sys/kernel/perfmon/version (read-only): -+ -+ Perfmon2 interface revision number. -+ -+ /sys/kernel/perfmon/arg_mem_max(read-write): -+ -+ Maximum size of vector arguments expressed in bytes. Can be modified -+ -+ /sys/kernel/perfmon/mode(read-write): -+ -+ Bitmask to enable/disable certain perfmon2 features. -+ Currently defined: -+ - bit 0: if set, then reserved bitfield are ignored on PMC writes -diff --git a/Documentation/ABI/testing/sysfs-perfmon-fmt b/Documentation/ABI/testing/sysfs-perfmon-fmt -new file mode 100644 -index 0000000..1b45270 ---- /dev/null -+++ b/Documentation/ABI/testing/sysfs-perfmon-fmt -@@ -0,0 +1,18 @@ -+What: /sys/kernel/perfmon/formats -+Date: 2007 -+KernelVersion: 2.6.24 -+Contact: eranian@gmail.com -+ -+Description: provide description of available perfmon2 custom sampling buffer formats -+ which are implemented as independent kernel modules. Each formats gets -+ a subdir which a few entries. -+ -+ The name of the subdir is the name of the sampling format. The same name -+ must be passed to pfm_create_context() to use the format. -+ -+ Each subdir XX contains the following entries: -+ -+ /sys/kernel/perfmon/formats/XX/version (read-only): -+ -+ Version number of the format in clear text and null terminated. -+ -diff --git a/Documentation/ABI/testing/sysfs-perfmon-pmu b/Documentation/ABI/testing/sysfs-perfmon-pmu -new file mode 100644 -index 0000000..a1afc7e ---- /dev/null -+++ b/Documentation/ABI/testing/sysfs-perfmon-pmu -@@ -0,0 +1,46 @@ -+What: /sys/kernel/perfmon/pmu -+Date: Nov 2007 -+KernelVersion: 2.6.24 -+Contact: eranian@gmail.com -+ -+Description: provide information about the currently loaded PMU description module. -+ The module contains the mapping of the actual performance counter registers -+ onto the logical PMU exposed by perfmon. There is at most one PMU description -+ module loaded at any time. -+ -+ The sysfs PMU tree provides a description of the mapping for each register. -+ There is one subdir per config and data registers along an entry for the -+ name of the PMU model. -+ -+ The model entry is as follows: -+ -+ /sys/kernel/perfmon/pmu_desc/model (read-only): -+ -+ Name of the PMU model is clear text and zero terminated. -+ -+ Then for each logical PMU register, XX, gets a subtree with the following entries: -+ -+ /sys/kernel/perfmon/pmu_desc/pm*XX/addr (read-only): -+ -+ The physical address or index of the actual underlying hardware register. -+ On Itanium, it corresponds to the index. But on X86 processor, this is -+ the actual MSR address. -+ -+ /sys/kernel/perfmon/pmu_desc/pm*XX/dfl_val (read-only): -+ -+ The default value of the register in hexadecimal. -+ -+ /sys/kernel/perfmon/pmu_desc/pm*XX/name (read-only): -+ -+ The name of the hardware register. -+ -+ /sys/kernel/perfmon/pmu_desc/pm*XX/rsvd_msk (read-only): -+ -+ The bitmask of reserved bits, i.e., bits which cannot be changed by -+ applications. When a bit is set, it means the corresponding bit in the -+ actual register is reserved. -+ -+ /sys/kernel/perfmon/pmu_desc/pm*XX/width (read-only): -+ -+ the width in bits of the registers. This field is only relevant for counter -+ registers. -diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt -index 1150444..2652b6c 100644 ---- a/Documentation/kernel-parameters.txt -+++ b/Documentation/kernel-parameters.txt -@@ -1643,6 +1643,9 @@ and is between 256 and 4096 characters. It is defined in the file - Format: { 0 | 1 } - See arch/parisc/kernel/pdc_chassis.c - -+ perfmon_debug [PERFMON] Enables Perfmon debug messages. Needed -+ to see traces of the early startup startup phase. -+ - pf. [PARIDE] - See Documentation/paride.txt. - -diff --git a/Documentation/perfmon2-debugfs.txt b/Documentation/perfmon2-debugfs.txt -new file mode 100644 -index 0000000..b30cae8 ---- /dev/null -+++ b/Documentation/perfmon2-debugfs.txt -@@ -0,0 +1,126 @@ -+ The perfmon2 debug and statistics interface -+ ------------------------------------------ -+ Stephane Eranian -+ -+ -+The perfmon2 interfaces exports a set of statistics which are used to tune and -+debug the implementation. The data is composed of a set of very simple metrics -+mostly aggregated counts and durations. They instruments key points in the -+perfmon2 code, such as context switch and interrupt handling. -+ -+The data is accessible via the debug filesystem (debugfs). Thus you need to -+have the filesystem support enabled in your kernel. Furthermore since, 2.6.25, -+the perfmon2 statistics interface is an optional component. It needs to be -+explicitely enabled in the kernel config file (CONFIG_PERFMON_DEBUG_FS). -+ -+To access the data, the debugs filesystem must be mounted. Supposing the mount -+point is /debugfs, you would need to do: -+ $ mount -t debugs none /debugfs -+ -+The data is located under the perfmon subdirectory and is organized per CPU. -+For each CPU, the same set of metrics is available, one metric per file in -+clear ASCII text. -+ -+The metrics are as follows: -+ -+ ctxswin_count (read-only): -+ -+ Number of PMU context switch in. -+ -+ ctxswin_ns (read-only): -+ -+ Number of nanoseconds spent in the PMU context switch in -+ routine. Dividing this number by the value of ctxswin_count, -+ yields average cost of the PMU context switch in. -+ -+ ctxswout_count (read-only): -+ -+ Number of PMU context switch out. -+ -+ ctxswout_ns (read-only): -+ -+ Number of nanoseconds spent in the PMU context switch in -+ routine. Dividing this number by the value of ctxswout_count, -+ yields average cost of the PMU context switch out. -+ -+ fmt_handler_calls (read-only): -+ -+ Number of calls to the sampling format routine that handles -+ PMU interrupts, i.e., typically the routine that records a -+ sample. -+ -+ fmt_handler_ns (read-only): -+ -+ Number of nanoseconds spent in the routine that handle PMU -+ interrupt in the sampling format. Dividing this number by -+ the number of calls provided by fmt_handler_calls, yields -+ average time spent in this routine. -+ -+ ovfl_intr_all_count (read-only): -+ -+ Number of PMU interrupts received by the kernel. -+ -+ -+ ovfl_intr_nmi_count (read-only): -+ -+ Number of Non Maskeable Interrupts (NMI) received by the kernel -+ for perfmon. This is relevant only on X86 hardware. -+ -+ ovfl_intr_ns (read-only): -+ -+ Number of nanoseconds spent in the perfmon2 PMU interrupt -+ handler routine. Dividing this number of ovfl_intr_all_count -+ yields the average time to handle one PMU interrupt. -+ -+ ovfl_intr_regular_count (read-only): -+ -+ Number of PMU interrupts which are actually processed by -+ the perfmon interrupt handler. There may be spurious or replay -+ interrupts. -+ -+ ovfl_intr_replay_count (read-only): -+ -+ Number of PMU interrupts which were replayed on context switch -+ in or on event set switching. Interrupts get replayed when they -+ were in flight at the time monitoring had to be stopped. -+ -+ perfmon/ovfl_intr_spurious_count (read-only): -+ -+ Number of PMU interrupts which were dropped because there was -+ no active context (session). -+ -+ ovfl_notify_count (read-only): -+ -+ Number of user level notifications sent. Notifications are -+ appended as messages to the context queue. Notifications may -+ be sent on PMU interrupts. -+ -+ pfm_restart_count (read-only): -+ -+ Number of times pfm_restart() is called. -+ -+ reset_pmds_count (read-only): -+ -+ Number of times pfm_reset_pmds() is called. -+ -+ set_switch_count (read-only): -+ -+ Number of event set switches. -+ -+ set_switch_ns (read-only): -+ -+ Number of nanoseconds spent in the set switching routine. -+ Dividing this number by set_switch_count yields the average -+ cost of switching sets. -+ -+ handle_timeout_count (read-only): -+ -+ Number of times the pfm_handle_timeout() routine is called. -+ It is used for timeout-based set switching. -+ -+ handle_work_count (read-only): -+ -+ Number of times pfm_handle_work() is called. The routine -+ handles asynchronous perfmon2 work for per-thread contexts -+ (sessions). -+ -diff --git a/Documentation/perfmon2.txt b/Documentation/perfmon2.txt -new file mode 100644 -index 0000000..4a8fada ---- /dev/null -+++ b/Documentation/perfmon2.txt -@@ -0,0 +1,213 @@ -+ The perfmon2 hardware monitoring interface -+ ------------------------------------------ -+ Stephane Eranian -+ -+ -+I/ Introduction -+ -+ The perfmon2 interface provides access to the hardware performance counters of -+ major processors. Nowadays, all processors implement some flavors of performance -+ counters which capture micro-architectural level information such as the number -+ of elapsed cycles, number of cache misses, and so on. -+ -+ The interface is implemented as a set of new system calls and a set of config files -+ in /sys. -+ -+ It is possible to monitoring a single thread or a CPU. In either mode, applications -+ can count or collect samples. System-wide monitoring is supported by running a -+ monitoring session on each CPU. The interface support event-based sampling where the -+ sampling period is expressed as the number of occurrences of event, instead of just a -+ timeout. This approach provides a much better granularity and flexibility. -+ -+ For performance reason, it is possible to use a kernel-level sampling buffer to minimize -+ the overhead incurred by sampling. The format of the buffer, i.e., what is recorded, how -+ it is recorded, and how it is exported to user-land is controlled by a kernel module called -+ a custom sampling format. The current implementation comes with a default format but -+ it is possible to create additional formats. There is an in-kernel registration -+ interface for formats. Each format is identified by a simple string which a tool -+ can pass when a monitoring session is created. -+ -+ The interface also provides support for event set and multiplexing to work around -+ hardware limitations in the number of available counters or in how events can be -+ combined. Each set defines as many counters as the hardware can support. The kernel -+ then multiplexes the sets. The interface supports time-base switching but also -+ overflow based switching, i.e., after n overflows of designated counters. -+ -+ Applications never manipulates the actual performance counter registers. Instead they see -+ a logical Performance Monitoring Unit (PMU) composed of a set of config register (PMC) -+ and a set of data registers (PMD). Note that PMD are not necessarily counters, they -+ can be buffers. The logical PMU is then mapped onto the actual PMU using a mapping -+ table which is implemented as a kernel module. The mapping is chosen once for each -+ new processor. It is visible in /sys/kernel/perfmon/pmu_desc. The kernel module -+ is automatically loaded on first use. -+ -+ A monitoring session, or context, is uniquely identified by a file descriptor -+ obtained when the context is created. File sharing semantics apply to access -+ the context inside a process. A context is never inherited across fork. The file -+ descriptor can be used to received counter overflow notifications or when the -+ sampling buffer is full. It is possible to use poll/select on the descriptor -+ to wait for notifications from multiplex contexts. Similarly, the descriptor -+ supports asynchronous notification via SIGIO. -+ -+ Counters are always exported as being 64-bit wide regardless of what the underlying -+ hardware implements. -+ -+II/ Kernel compilation -+ -+ To enable perfmon2, you need to enable CONFIG_PERFMON -+ -+III/ OProfile interactions -+ -+ The set of features offered by perfmon2 is rich enough to support migrating -+ Oprofile on top of it. That means that PMU programming and low-level interrupt -+ handling could be done by perfmon2. The Oprofile sampling buffer management code -+ in the kernel as well as how samples are exported to users could remain through -+ the use of a custom sampling buffer format. This is how Oprofile work on Itanium. -+ -+ The current interactions with Oprofile are: -+ - on X86: Both subsystems can be compiled into the same kernel. There is enforced -+ mutual exclusion between the two subsystems. When there is an Oprofile -+ session, no perfmon2 session can exist and vice-versa. Perfmon2 session -+ encapsulates both per-thread and system-wide sessions here. -+ -+ - On IA-64: Oprofile works on top of perfmon2. Oprofile being a system-wide monitoring -+ tool, the regular per-thread vs. system-wide session restrictions apply. -+ -+ - on PPC: no integration yet. You need to enable/disble one of the two subsystems -+ - on MIPS: no integration yet. You need to enable/disble one of the two subsystems -+ -+IV/ User tools -+ -+ We have released a simple monitoring tool to demonstrate the feature of the -+ interface. The tool is called pfmon and it comes with a simple helper library -+ called libpfm. The library comes with a set of examples to show how to use the -+ kernel perfmon2 interface. Visit http://perfmon2.sf.net for details. -+ -+ There maybe other tools available for perfmon2. -+ -+V/ How to program? -+ -+ The best way to learn how to program perfmon2, is to take a look at the source -+ code for the examples in libpfm. The source code is available from: -+ http://perfmon2.sf.net -+ -+VI/ System calls overview -+ -+ The interface is implemented by the following system calls: -+ -+ * int pfm_create_context(pfarg_ctx_t *ctx, char *fmt, void *arg, size_t arg_size) -+ -+ This function create a perfmon2 context. The type of context is per-thread by -+ default unless PFM_FL_SYSTEM_WIDE is passed in ctx. The sampling format name -+ is passed in fmt. Arguments to the format are passed in arg which is of size -+ arg_size. Upon successful return, the file descriptor identifying the context -+ is returned. -+ -+ * int pfm_write_pmds(int fd, pfarg_pmd_t *pmds, int n) -+ -+ This function is used to program the PMD registers. It is possible to pass -+ vectors of PMDs. -+ -+ * int pfm_write_pmcs(int fd, pfarg_pmc_t *pmds, int n) -+ -+ This function is used to program the PMC registers. It is possible to pass -+ vectors of PMDs. -+ -+ * int pfm_read_pmds(int fd, pfarg_pmd_t *pmds, int n) -+ -+ This function is used to read the PMD registers. It is possible to pass -+ vectors of PMDs. -+ -+ * int pfm_load_context(int fd, pfarg_load_t *load) -+ -+ This function is used to attach the context to a thread or CPU. -+ Thread means kernel-visible thread (NPTL). The thread identification -+ as obtained by gettid must be passed to load->load_target. -+ -+ To operate on another thread (not self), it is mandatory that the thread -+ be stopped via ptrace(). -+ -+ To attach to a CPU, the CPU number must be specified in load->load_target -+ AND the call must be issued on that CPU. To monitor a CPU, a thread MUST -+ be pinned on that CPU. -+ -+ Until the context is attached, the actual counters are not accessed. -+ -+ * int pfm_unload_context(int fd) -+ -+ The context is detached for the thread or CPU is was attached to. -+ As a consequence monitoring is stopped. -+ -+ When monitoring another thread, the thread MUST be stopped via ptrace() -+ for this function to succeed. -+ -+ * int pfm_start(int fd, pfarg_start_t *st) -+ -+ Start monitoring. The context must be attached for this function to succeed. -+ Optionally, it is possible to specify the event set on which to start using the -+ st argument, otherwise just pass NULL. -+ -+ When monitoring another thread, the thread MUST be stopped via ptrace() -+ for this function to succeed. -+ -+ * int pfm_stop(int fd) -+ -+ Stop monitoring. The context must be attached for this function to succeed. -+ -+ When monitoring another thread, the thread MUST be stopped via ptrace() -+ for this function to succeed. -+ -+ -+ * int pfm_create_evtsets(int fd, pfarg_setdesc_t *sets, int n) -+ -+ This function is used to create or change event sets. By default set 0 exists. -+ It is possible to create/change multiple sets in one call. -+ -+ The context must be detached for this call to succeed. -+ -+ Sets are identified by a 16-bit integer. They are sorted based on this -+ set and switching occurs in a round-robin fashion. -+ -+ * int pfm_delete_evtsets(int fd, pfarg_setdesc_t *sets, int n) -+ -+ Delete event sets. The context must be detached for this call to succeed. -+ -+ -+ * int pfm_getinfo_evtsets(int fd, pfarg_setinfo_t *sets, int n) -+ -+ Retrieve information about event sets. In particular it is possible -+ to get the number of activation of a set. It is possible to retrieve -+ information about multiple sets in one call. -+ -+ -+ * int pfm_restart(int fd) -+ -+ Indicate to the kernel that the application is done processing an overflow -+ notification. A consequence of this call could be that monitoring resumes. -+ -+ * int read(fd, pfm_msg_t *msg, sizeof(pfm_msg_t)) -+ -+ the regular read() system call can be used with the context file descriptor to -+ receive overflow notification messages. Non-blocking read() is supported. -+ -+ Each message carry information about the overflow such as which counter overflowed -+ and where the program was (interrupted instruction pointer). -+ -+ * int close(int fd) -+ -+ To destroy a context, the regular close() system call is used. -+ -+ -+VII/ /sys interface overview -+ -+ Refer to Documentation/ABI/testing/sysfs-perfmon-* for a detailed description -+ of the sysfs interface of perfmon2. -+ -+VIII/ debugfs interface overview -+ -+ Refer to Documentation/perfmon2-debugfs.txt for a detailed description of the -+ debug and statistics interface of perfmon2. -+ -+IX/ Documentation -+ -+ Visit http://perfmon2.sf.net -diff --git a/MAINTAINERS b/MAINTAINERS -index 8dae455..fb38c2a 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -3239,6 +3239,14 @@ M: balbir@linux.vnet.ibm.com - L: linux-kernel@vger.kernel.org - S: Maintained - -+PERFMON SUBSYSTEM -+P: Stephane Eranian -+M: eranian@gmail.com -+L: perfmon2-devel@lists.sf.net -+W: http://perfmon2.sf.net -+T: git kernel.org:/pub/scm/linux/kernel/git/eranian/linux-2.6 -+S: Maintained -+ - PERSONALITY HANDLING - P: Christoph Hellwig - M: hch@infradead.org -diff --git a/Makefile b/Makefile -index 16e3fbb..7bb1320 100644 ---- a/Makefile -+++ b/Makefile -@@ -620,6 +620,7 @@ export mod_strip_cmd - - ifeq ($(KBUILD_EXTMOD),) - core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ -+core-$(CONFIG_PERFMON) += perfmon/ - - vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ - $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ -diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig -index 48e496f..1d79b01 100644 ---- a/arch/ia64/Kconfig -+++ b/arch/ia64/Kconfig -@@ -470,14 +470,6 @@ config COMPAT_FOR_U64_ALIGNMENT - config IA64_MCA_RECOVERY - tristate "MCA recovery from errors other than TLB." - --config PERFMON -- bool "Performance monitor support" -- help -- Selects whether support for the IA-64 performance monitor hardware -- is included in the kernel. This makes some kernel data-structures a -- little bigger and slows down execution a bit, but it is generally -- a good idea to turn this on. If you're unsure, say Y. -- - config IA64_PALINFO - tristate "/proc/pal support" - help -@@ -549,6 +541,8 @@ source "drivers/firmware/Kconfig" - - source "fs/Kconfig.binfmt" - -+source "arch/ia64/perfmon/Kconfig" -+ - endmenu - - menu "Power management and ACPI" -diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile -index 905d25b..9aa622d 100644 ---- a/arch/ia64/Makefile -+++ b/arch/ia64/Makefile -@@ -57,6 +57,7 @@ core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/ - core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/ - core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/ - core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/ -+core-$(CONFIG_PERFMON) += arch/ia64/perfmon/ - core-$(CONFIG_IA64_SGI_UV) += arch/ia64/uv/ - core-$(CONFIG_KVM) += arch/ia64/kvm/ - -diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig -index 9f48397..ff9572a 100644 ---- a/arch/ia64/configs/generic_defconfig -+++ b/arch/ia64/configs/generic_defconfig -@@ -209,7 +209,6 @@ CONFIG_IA32_SUPPORT=y - CONFIG_COMPAT=y - CONFIG_COMPAT_FOR_U64_ALIGNMENT=y - CONFIG_IA64_MCA_RECOVERY=y --CONFIG_PERFMON=y - CONFIG_IA64_PALINFO=y - # CONFIG_IA64_MC_ERR_INJECT is not set - CONFIG_SGI_SN=y -@@ -234,6 +233,16 @@ CONFIG_BINFMT_ELF=y - CONFIG_BINFMT_MISC=m - - # -+# Hardware Performance Monitoring support -+# -+CONFIG_PERFMON=y -+CONFIG_IA64_PERFMON_COMPAT=y -+CONFIG_IA64_PERFMON_GENERIC=m -+CONFIG_IA64_PERFMON_ITANIUM=y -+CONFIG_IA64_PERFMON_MCKINLEY=y -+CONFIG_IA64_PERFMON_MONTECITO=y -+ -+# - # Power management and ACPI - # - CONFIG_PM=y -diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild -index ccbe8ae..cf64b3b 100644 ---- a/arch/ia64/include/asm/Kbuild -+++ b/arch/ia64/include/asm/Kbuild -@@ -5,10 +5,12 @@ header-y += fpu.h - header-y += fpswa.h - header-y += ia64regs.h - header-y += intel_intrin.h --header-y += perfmon_default_smpl.h - header-y += ptrace_offsets.h - header-y += rse.h - header-y += ucontext.h -+header-y += perfmon.h -+header-y += perfmon_compat.h -+header-y += perfmon_default_smpl.h - - unifdef-y += gcc_intrin.h - unifdef-y += intrinsics.h -diff --git a/arch/ia64/include/asm/hw_irq.h b/arch/ia64/include/asm/hw_irq.h -index 5c99cbc..4a45cb0 100644 ---- a/arch/ia64/include/asm/hw_irq.h -+++ b/arch/ia64/include/asm/hw_irq.h -@@ -67,9 +67,9 @@ extern int ia64_last_device_vector; - #define IA64_NUM_DEVICE_VECTORS (IA64_LAST_DEVICE_VECTOR - IA64_FIRST_DEVICE_VECTOR + 1) - - #define IA64_MCA_RENDEZ_VECTOR 0xe8 /* MCA rendez interrupt */ --#define IA64_PERFMON_VECTOR 0xee /* performance monitor interrupt vector */ - #define IA64_TIMER_VECTOR 0xef /* use highest-prio group 15 interrupt for timer */ - #define IA64_MCA_WAKEUP_VECTOR 0xf0 /* MCA wakeup (must be >MCA_RENDEZ_VECTOR) */ -+#define IA64_PERFMON_VECTOR 0xf1 /* performance monitor interrupt vector */ - #define IA64_IPI_LOCAL_TLB_FLUSH 0xfc /* SMP flush local TLB */ - #define IA64_IPI_RESCHEDULE 0xfd /* SMP reschedule */ - #define IA64_IPI_VECTOR 0xfe /* inter-processor interrupt vector */ -diff --git a/arch/ia64/include/asm/perfmon.h b/arch/ia64/include/asm/perfmon.h -index 7f3333d..150c4b4 100644 ---- a/arch/ia64/include/asm/perfmon.h -+++ b/arch/ia64/include/asm/perfmon.h -@@ -1,279 +1,59 @@ - /* -- * Copyright (C) 2001-2003 Hewlett-Packard Co -- * Stephane Eranian -- */ -- --#ifndef _ASM_IA64_PERFMON_H --#define _ASM_IA64_PERFMON_H -- --/* -- * perfmon comamnds supported on all CPU models -- */ --#define PFM_WRITE_PMCS 0x01 --#define PFM_WRITE_PMDS 0x02 --#define PFM_READ_PMDS 0x03 --#define PFM_STOP 0x04 --#define PFM_START 0x05 --#define PFM_ENABLE 0x06 /* obsolete */ --#define PFM_DISABLE 0x07 /* obsolete */ --#define PFM_CREATE_CONTEXT 0x08 --#define PFM_DESTROY_CONTEXT 0x09 /* obsolete use close() */ --#define PFM_RESTART 0x0a --#define PFM_PROTECT_CONTEXT 0x0b /* obsolete */ --#define PFM_GET_FEATURES 0x0c --#define PFM_DEBUG 0x0d --#define PFM_UNPROTECT_CONTEXT 0x0e /* obsolete */ --#define PFM_GET_PMC_RESET_VAL 0x0f --#define PFM_LOAD_CONTEXT 0x10 --#define PFM_UNLOAD_CONTEXT 0x11 -- --/* -- * PMU model specific commands (may not be supported on all PMU models) -- */ --#define PFM_WRITE_IBRS 0x20 --#define PFM_WRITE_DBRS 0x21 -- --/* -- * context flags -- */ --#define PFM_FL_NOTIFY_BLOCK 0x01 /* block task on user level notifications */ --#define PFM_FL_SYSTEM_WIDE 0x02 /* create a system wide context */ --#define PFM_FL_OVFL_NO_MSG 0x80 /* do not post overflow/end messages for notification */ -- --/* -- * event set flags -- */ --#define PFM_SETFL_EXCL_IDLE 0x01 /* exclude idle task (syswide only) XXX: DO NOT USE YET */ -- --/* -- * PMC flags -- */ --#define PFM_REGFL_OVFL_NOTIFY 0x1 /* send notification on overflow */ --#define PFM_REGFL_RANDOM 0x2 /* randomize sampling interval */ -- --/* -- * PMD/PMC/IBR/DBR return flags (ignored on input) -+ * Copyright (c) 2001-2007 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian - * -- * Those flags are used on output and must be checked in case EAGAIN is returned -- * by any of the calls using a pfarg_reg_t or pfarg_dbreg_t structure. -- */ --#define PFM_REG_RETFL_NOTAVAIL (1UL<<31) /* set if register is implemented but not available */ --#define PFM_REG_RETFL_EINVAL (1UL<<30) /* set if register entry is invalid */ --#define PFM_REG_RETFL_MASK (PFM_REG_RETFL_NOTAVAIL|PFM_REG_RETFL_EINVAL) -- --#define PFM_REG_HAS_ERROR(flag) (((flag) & PFM_REG_RETFL_MASK) != 0) -- --typedef unsigned char pfm_uuid_t[16]; /* custom sampling buffer identifier type */ -- --/* -- * Request structure used to define a context -- */ --typedef struct { -- pfm_uuid_t ctx_smpl_buf_id; /* which buffer format to use (if needed) */ -- unsigned long ctx_flags; /* noblock/block */ -- unsigned short ctx_nextra_sets; /* number of extra event sets (you always get 1) */ -- unsigned short ctx_reserved1; /* for future use */ -- int ctx_fd; /* return arg: unique identification for context */ -- void *ctx_smpl_vaddr; /* return arg: virtual address of sampling buffer, is used */ -- unsigned long ctx_reserved2[11];/* for future use */ --} pfarg_context_t; -- --/* -- * Request structure used to write/read a PMC or PMD -- */ --typedef struct { -- unsigned int reg_num; /* which register */ -- unsigned short reg_set; /* event set for this register */ -- unsigned short reg_reserved1; /* for future use */ -- -- unsigned long reg_value; /* initial pmc/pmd value */ -- unsigned long reg_flags; /* input: pmc/pmd flags, return: reg error */ -- -- unsigned long reg_long_reset; /* reset after buffer overflow notification */ -- unsigned long reg_short_reset; /* reset after counter overflow */ -- -- unsigned long reg_reset_pmds[4]; /* which other counters to reset on overflow */ -- unsigned long reg_random_seed; /* seed value when randomization is used */ -- unsigned long reg_random_mask; /* bitmask used to limit random value */ -- unsigned long reg_last_reset_val;/* return: PMD last reset value */ -- -- unsigned long reg_smpl_pmds[4]; /* which pmds are accessed when PMC overflows */ -- unsigned long reg_smpl_eventid; /* opaque sampling event identifier */ -- -- unsigned long reg_reserved2[3]; /* for future use */ --} pfarg_reg_t; -- --typedef struct { -- unsigned int dbreg_num; /* which debug register */ -- unsigned short dbreg_set; /* event set for this register */ -- unsigned short dbreg_reserved1; /* for future use */ -- unsigned long dbreg_value; /* value for debug register */ -- unsigned long dbreg_flags; /* return: dbreg error */ -- unsigned long dbreg_reserved2[1]; /* for future use */ --} pfarg_dbreg_t; -- --typedef struct { -- unsigned int ft_version; /* perfmon: major [16-31], minor [0-15] */ -- unsigned int ft_reserved; /* reserved for future use */ -- unsigned long reserved[4]; /* for future use */ --} pfarg_features_t; -- --typedef struct { -- pid_t load_pid; /* process to load the context into */ -- unsigned short load_set; /* first event set to load */ -- unsigned short load_reserved1; /* for future use */ -- unsigned long load_reserved2[3]; /* for future use */ --} pfarg_load_t; -- --typedef struct { -- int msg_type; /* generic message header */ -- int msg_ctx_fd; /* generic message header */ -- unsigned long msg_ovfl_pmds[4]; /* which PMDs overflowed */ -- unsigned short msg_active_set; /* active set at the time of overflow */ -- unsigned short msg_reserved1; /* for future use */ -- unsigned int msg_reserved2; /* for future use */ -- unsigned long msg_tstamp; /* for perf tuning/debug */ --} pfm_ovfl_msg_t; -- --typedef struct { -- int msg_type; /* generic message header */ -- int msg_ctx_fd; /* generic message header */ -- unsigned long msg_tstamp; /* for perf tuning */ --} pfm_end_msg_t; -- --typedef struct { -- int msg_type; /* type of the message */ -- int msg_ctx_fd; /* unique identifier for the context */ -- unsigned long msg_tstamp; /* for perf tuning */ --} pfm_gen_msg_t; -- --#define PFM_MSG_OVFL 1 /* an overflow happened */ --#define PFM_MSG_END 2 /* task to which context was attached ended */ -- --typedef union { -- pfm_ovfl_msg_t pfm_ovfl_msg; -- pfm_end_msg_t pfm_end_msg; -- pfm_gen_msg_t pfm_gen_msg; --} pfm_msg_t; -- --/* -- * Define the version numbers for both perfmon as a whole and the sampling buffer format. -+ * This file contains Itanium Processor Family specific definitions -+ * for the perfmon interface. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA - */ --#define PFM_VERSION_MAJ 2U --#define PFM_VERSION_MIN 0U --#define PFM_VERSION (((PFM_VERSION_MAJ&0xffff)<<16)|(PFM_VERSION_MIN & 0xffff)) --#define PFM_VERSION_MAJOR(x) (((x)>>16) & 0xffff) --#define PFM_VERSION_MINOR(x) ((x) & 0xffff) -- -+#ifndef _ASM_IA64_PERFMON_H_ -+#define _ASM_IA64_PERFMON_H_ - - /* -- * miscellaneous architected definitions -+ * arch-specific user visible interface definitions - */ --#define PMU_FIRST_COUNTER 4 /* first counting monitor (PMC/PMD) */ --#define PMU_MAX_PMCS 256 /* maximum architected number of PMC registers */ --#define PMU_MAX_PMDS 256 /* maximum architected number of PMD registers */ -- --#ifdef __KERNEL__ -- --extern long perfmonctl(int fd, int cmd, void *arg, int narg); -- --typedef struct { -- void (*handler)(int irq, void *arg, struct pt_regs *regs); --} pfm_intr_handler_desc_t; -- --extern void pfm_save_regs (struct task_struct *); --extern void pfm_load_regs (struct task_struct *); - --extern void pfm_exit_thread(struct task_struct *); --extern int pfm_use_debug_registers(struct task_struct *); --extern int pfm_release_debug_registers(struct task_struct *); --extern void pfm_syst_wide_update_task(struct task_struct *, unsigned long info, int is_ctxswin); --extern void pfm_inherit(struct task_struct *task, struct pt_regs *regs); --extern void pfm_init_percpu(void); --extern void pfm_handle_work(void); --extern int pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *h); --extern int pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *h); -+#define PFM_ARCH_MAX_PMCS (256+64) -+#define PFM_ARCH_MAX_PMDS (256+64) - -- -- --/* -- * Reset PMD register flags -- */ --#define PFM_PMD_SHORT_RESET 0 --#define PFM_PMD_LONG_RESET 1 -- --typedef union { -- unsigned int val; -- struct { -- unsigned int notify_user:1; /* notify user program of overflow */ -- unsigned int reset_ovfl_pmds:1; /* reset overflowed PMDs */ -- unsigned int block_task:1; /* block monitored task on kernel exit */ -- unsigned int mask_monitoring:1; /* mask monitors via PMCx.plm */ -- unsigned int reserved:28; /* for future use */ -- } bits; --} pfm_ovfl_ctrl_t; -- --typedef struct { -- unsigned char ovfl_pmd; /* index of overflowed PMD */ -- unsigned char ovfl_notify; /* =1 if monitor requested overflow notification */ -- unsigned short active_set; /* event set active at the time of the overflow */ -- pfm_ovfl_ctrl_t ovfl_ctrl; /* return: perfmon controls to set by handler */ -- -- unsigned long pmd_last_reset; /* last reset value of of the PMD */ -- unsigned long smpl_pmds[4]; /* bitmask of other PMD of interest on overflow */ -- unsigned long smpl_pmds_values[PMU_MAX_PMDS]; /* values for the other PMDs of interest */ -- unsigned long pmd_value; /* current 64-bit value of the PMD */ -- unsigned long pmd_eventid; /* eventid associated with PMD */ --} pfm_ovfl_arg_t; -- -- --typedef struct { -- char *fmt_name; -- pfm_uuid_t fmt_uuid; -- size_t fmt_arg_size; -- unsigned long fmt_flags; -- -- int (*fmt_validate)(struct task_struct *task, unsigned int flags, int cpu, void *arg); -- int (*fmt_getsize)(struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size); -- int (*fmt_init)(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *arg); -- int (*fmt_handler)(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp); -- int (*fmt_restart)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs); -- int (*fmt_restart_active)(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs); -- int (*fmt_exit)(struct task_struct *task, void *buf, struct pt_regs *regs); -- -- struct list_head fmt_list; --} pfm_buffer_fmt_t; -- --extern int pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt); --extern int pfm_unregister_buffer_fmt(pfm_uuid_t uuid); -+#define PFM_ARCH_PMD_STK_ARG 8 -+#define PFM_ARCH_PMC_STK_ARG 8 - - /* -- * perfmon interface exported to modules -+ * Itanium specific context flags -+ * -+ * bits[00-15]: generic flags (see asm/perfmon.h) -+ * bits[16-31]: arch-specific flags - */ --extern int pfm_mod_read_pmds(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs); --extern int pfm_mod_write_pmcs(struct task_struct *, void *req, unsigned int nreq, struct pt_regs *regs); --extern int pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs); --extern int pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs); -+#define PFM_ITA_FL_INSECURE 0x10000 /* clear psr.sp on non system, non self */ - - /* -- * describe the content of the local_cpu_date->pfm_syst_info field -+ * Itanium specific public event set flags (set_flags) -+ * -+ * event set flags layout: -+ * bits[00-15] : generic flags -+ * bits[16-31] : arch-specific flags - */ --#define PFM_CPUINFO_SYST_WIDE 0x1 /* if set a system wide session exists */ --#define PFM_CPUINFO_DCR_PP 0x2 /* if set the system wide session has started */ --#define PFM_CPUINFO_EXCL_IDLE 0x4 /* the system wide session excludes the idle task */ -+#define PFM_ITA_SETFL_EXCL_INTR 0x10000 /* exclude interrupt execution */ -+#define PFM_ITA_SETFL_INTR_ONLY 0x20000 /* include only interrupt execution */ -+#define PFM_ITA_SETFL_IDLE_EXCL 0x40000 /* stop monitoring in idle loop */ - - /* -- * sysctl control structure. visible to sampling formats -+ * compatibility for version v2.0 of the interface - */ --typedef struct { -- int debug; /* turn on/off debugging via syslog */ -- int debug_ovfl; /* turn on/off debug printk in overflow handler */ -- int fastctxsw; /* turn on/off fast (unsecure) ctxsw */ -- int expert_mode; /* turn on/off value checking */ --} pfm_sysctl_t; --extern pfm_sysctl_t pfm_sysctl; -- -- --#endif /* __KERNEL__ */ -+#include - --#endif /* _ASM_IA64_PERFMON_H */ -+#endif /* _ASM_IA64_PERFMON_H_ */ -diff --git a/arch/ia64/include/asm/perfmon_compat.h b/arch/ia64/include/asm/perfmon_compat.h -new file mode 100644 -index 0000000..5c14514 ---- /dev/null -+++ b/arch/ia64/include/asm/perfmon_compat.h -@@ -0,0 +1,167 @@ -+/* -+ * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This header file contains perfmon interface definition -+ * that are now obsolete and should be dropped in favor -+ * of their equivalent functions as explained below. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+ -+#ifndef _ASM_IA64_PERFMON_COMPAT_H_ -+#define _ASM_IA64_PERFMON_COMPAT_H_ -+ -+/* -+ * custom sampling buffer identifier type -+ */ -+typedef __u8 pfm_uuid_t[16]; -+ -+/* -+ * obsolete perfmon commands. Supported only on IA-64 for -+ * backward compatiblity reasons with perfmon v2.0. -+ */ -+#define PFM_WRITE_PMCS 0x01 /* use pfm_write_pmcs */ -+#define PFM_WRITE_PMDS 0x02 /* use pfm_write_pmds */ -+#define PFM_READ_PMDS 0x03 /* use pfm_read_pmds */ -+#define PFM_STOP 0x04 /* use pfm_stop */ -+#define PFM_START 0x05 /* use pfm_start */ -+#define PFM_ENABLE 0x06 /* obsolete */ -+#define PFM_DISABLE 0x07 /* obsolete */ -+#define PFM_CREATE_CONTEXT 0x08 /* use pfm_create_context */ -+#define PFM_DESTROY_CONTEXT 0x09 /* use close() */ -+#define PFM_RESTART 0x0a /* use pfm_restart */ -+#define PFM_PROTECT_CONTEXT 0x0b /* obsolete */ -+#define PFM_GET_FEATURES 0x0c /* use /proc/sys/perfmon */ -+#define PFM_DEBUG 0x0d /* /proc/sys/kernel/perfmon/debug */ -+#define PFM_UNPROTECT_CONTEXT 0x0e /* obsolete */ -+#define PFM_GET_PMC_RESET_VAL 0x0f /* use /proc/perfmon_map */ -+#define PFM_LOAD_CONTEXT 0x10 /* use pfm_load_context */ -+#define PFM_UNLOAD_CONTEXT 0x11 /* use pfm_unload_context */ -+ -+/* -+ * PMU model specific commands (may not be supported on all PMU models) -+ */ -+#define PFM_WRITE_IBRS 0x20 /* obsolete: use PFM_WRITE_PMCS[256-263]*/ -+#define PFM_WRITE_DBRS 0x21 /* obsolete: use PFM_WRITE_PMCS[264-271]*/ -+ -+/* -+ * argument to PFM_CREATE_CONTEXT -+ */ -+struct pfarg_context { -+ pfm_uuid_t ctx_smpl_buf_id; /* buffer format to use */ -+ unsigned long ctx_flags; /* noblock/block */ -+ unsigned int ctx_reserved1; /* for future use */ -+ int ctx_fd; /* return: fildesc */ -+ void *ctx_smpl_vaddr; /* return: vaddr of buffer */ -+ unsigned long ctx_reserved3[11];/* for future use */ -+}; -+ -+/* -+ * argument structure for PFM_WRITE_PMCS/PFM_WRITE_PMDS/PFM_WRITE_PMDS -+ */ -+struct pfarg_reg { -+ unsigned int reg_num; /* which register */ -+ unsigned short reg_set; /* event set for this register */ -+ unsigned short reg_reserved1; /* for future use */ -+ -+ unsigned long reg_value; /* initial pmc/pmd value */ -+ unsigned long reg_flags; /* input: flags, ret: error */ -+ -+ unsigned long reg_long_reset; /* reset value after notification */ -+ unsigned long reg_short_reset; /* reset after counter overflow */ -+ -+ unsigned long reg_reset_pmds[4]; /* registers to reset on overflow */ -+ unsigned long reg_random_seed; /* seed for randomization */ -+ unsigned long reg_random_mask; /* random range limit */ -+ unsigned long reg_last_reset_val;/* return: PMD last reset value */ -+ -+ unsigned long reg_smpl_pmds[4]; /* pmds to be saved on overflow */ -+ unsigned long reg_smpl_eventid; /* opaque sampling event id */ -+ unsigned long reg_ovfl_switch_cnt;/* #overflows to switch */ -+ -+ unsigned long reg_reserved2[2]; /* for future use */ -+}; -+ -+/* -+ * argument to PFM_WRITE_IBRS/PFM_WRITE_DBRS -+ */ -+struct pfarg_dbreg { -+ unsigned int dbreg_num; /* which debug register */ -+ unsigned short dbreg_set; /* event set */ -+ unsigned short dbreg_reserved1; /* for future use */ -+ unsigned long dbreg_value; /* value for debug register */ -+ unsigned long dbreg_flags; /* return: dbreg error */ -+ unsigned long dbreg_reserved2[1]; /* for future use */ -+}; -+ -+/* -+ * argument to PFM_GET_FEATURES -+ */ -+struct pfarg_features { -+ unsigned int ft_version; /* major [16-31], minor [0-15] */ -+ unsigned int ft_reserved; /* reserved for future use */ -+ unsigned long reserved[4]; /* for future use */ -+}; -+ -+typedef struct { -+ int msg_type; /* generic message header */ -+ int msg_ctx_fd; /* generic message header */ -+ unsigned long msg_ovfl_pmds[4]; /* which PMDs overflowed */ -+ unsigned short msg_active_set; /* active set on overflow */ -+ unsigned short msg_reserved1; /* for future use */ -+ unsigned int msg_reserved2; /* for future use */ -+ unsigned long msg_tstamp; /* for perf tuning/debug */ -+} pfm_ovfl_msg_t; -+ -+typedef struct { -+ int msg_type; /* generic message header */ -+ int msg_ctx_fd; /* generic message header */ -+ unsigned long msg_tstamp; /* for perf tuning */ -+} pfm_end_msg_t; -+ -+typedef struct { -+ int msg_type; /* type of the message */ -+ int msg_ctx_fd; /* context file descriptor */ -+ unsigned long msg_tstamp; /* for perf tuning */ -+} pfm_gen_msg_t; -+ -+typedef union { -+ int type; -+ pfm_ovfl_msg_t pfm_ovfl_msg; -+ pfm_end_msg_t pfm_end_msg; -+ pfm_gen_msg_t pfm_gen_msg; -+} pfm_msg_t; -+ -+/* -+ * PMD/PMC return flags in case of error (ignored on input) -+ * -+ * reg_flags layout: -+ * bit 00-15 : generic flags -+ * bits[16-23] : arch-specific flags (see asm/perfmon.h) -+ * bit 24-31 : error codes -+ * -+ * Those flags are used on output and must be checked in case EINVAL is -+ * returned by a command accepting a vector of values and each has a flag -+ * field, such as pfarg_reg or pfarg_reg -+ */ -+#define PFM_REG_RETFL_NOTAVAIL (1<<31) /* not implemented or unaccessible */ -+#define PFM_REG_RETFL_EINVAL (1<<30) /* entry is invalid */ -+#define PFM_REG_RETFL_MASK (PFM_REG_RETFL_NOTAVAIL|\ -+ PFM_REG_RETFL_EINVAL) -+ -+#define PFM_REG_HAS_ERROR(flag) (((flag) & PFM_REG_RETFL_MASK) != 0) -+ -+#endif /* _ASM_IA64_PERFMON_COMPAT_H_ */ -diff --git a/arch/ia64/include/asm/perfmon_default_smpl.h b/arch/ia64/include/asm/perfmon_default_smpl.h -index 48822c0..8234f32 100644 ---- a/arch/ia64/include/asm/perfmon_default_smpl.h -+++ b/arch/ia64/include/asm/perfmon_default_smpl.h -@@ -1,83 +1,106 @@ - /* -- * Copyright (C) 2002-2003 Hewlett-Packard Co -- * Stephane Eranian -+ * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian - * -- * This file implements the default sampling buffer format -- * for Linux/ia64 perfmon subsystem. -+ * This file implements the old default sampling buffer format -+ * for the perfmon2 subsystem. For IA-64 only. -+ * -+ * It requires the use of the perfmon_compat.h header. It is recommended -+ * that applications be ported to the new format instead. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA - */ --#ifndef __PERFMON_DEFAULT_SMPL_H__ --#define __PERFMON_DEFAULT_SMPL_H__ 1 -+#ifndef __ASM_IA64_PERFMON_DEFAULT_SMPL_H__ -+#define __ASM_IA64_PERFMON_DEFAULT_SMPL_H__ 1 -+ -+#ifndef __ia64__ -+#error "this file must be used for compatibility reasons only on IA-64" -+#endif - - #define PFM_DEFAULT_SMPL_UUID { \ -- 0x4d, 0x72, 0xbe, 0xc0, 0x06, 0x64, 0x41, 0x43, 0x82, 0xb4, 0xd3, 0xfd, 0x27, 0x24, 0x3c, 0x97} -+ 0x4d, 0x72, 0xbe, 0xc0, 0x06, 0x64, 0x41, 0x43, 0x82,\ -+ 0xb4, 0xd3, 0xfd, 0x27, 0x24, 0x3c, 0x97} - - /* - * format specific parameters (passed at context creation) - */ --typedef struct { -+struct pfm_default_smpl_arg { - unsigned long buf_size; /* size of the buffer in bytes */ - unsigned int flags; /* buffer specific flags */ - unsigned int res1; /* for future use */ - unsigned long reserved[2]; /* for future use */ --} pfm_default_smpl_arg_t; -+}; - - /* - * combined context+format specific structure. Can be passed -- * to PFM_CONTEXT_CREATE -+ * to PFM_CONTEXT_CREATE (not PFM_CONTEXT_CREATE2) - */ --typedef struct { -- pfarg_context_t ctx_arg; -- pfm_default_smpl_arg_t buf_arg; --} pfm_default_smpl_ctx_arg_t; -+struct pfm_default_smpl_ctx_arg { -+ struct pfarg_context ctx_arg; -+ struct pfm_default_smpl_arg buf_arg; -+}; - - /* - * This header is at the beginning of the sampling buffer returned to the user. - * It is directly followed by the first record. - */ --typedef struct { -- unsigned long hdr_count; /* how many valid entries */ -- unsigned long hdr_cur_offs; /* current offset from top of buffer */ -- unsigned long hdr_reserved2; /* reserved for future use */ -+struct pfm_default_smpl_hdr { -+ u64 hdr_count; /* how many valid entries */ -+ u64 hdr_cur_offs; /* current offset from top of buffer */ -+ u64 dr_reserved2; /* reserved for future use */ - -- unsigned long hdr_overflows; /* how many times the buffer overflowed */ -- unsigned long hdr_buf_size; /* how many bytes in the buffer */ -+ u64 hdr_overflows; /* how many times the buffer overflowed */ -+ u64 hdr_buf_size; /* how many bytes in the buffer */ - -- unsigned int hdr_version; /* contains perfmon version (smpl format diffs) */ -- unsigned int hdr_reserved1; /* for future use */ -- unsigned long hdr_reserved[10]; /* for future use */ --} pfm_default_smpl_hdr_t; -+ u32 hdr_version; /* smpl format version*/ -+ u32 hdr_reserved1; /* for future use */ -+ u64 hdr_reserved[10]; /* for future use */ -+}; - - /* - * Entry header in the sampling buffer. The header is directly followed -- * with the values of the PMD registers of interest saved in increasing -- * index order: PMD4, PMD5, and so on. How many PMDs are present depends -+ * with the values of the PMD registers of interest saved in increasing -+ * index order: PMD4, PMD5, and so on. How many PMDs are present depends - * on how the session was programmed. - * - * In the case where multiple counters overflow at the same time, multiple - * entries are written consecutively. - * -- * last_reset_value member indicates the initial value of the overflowed PMD. -+ * last_reset_value member indicates the initial value of the overflowed PMD. - */ --typedef struct { -- int pid; /* thread id (for NPTL, this is gettid()) */ -- unsigned char reserved1[3]; /* reserved for future use */ -- unsigned char ovfl_pmd; /* index of overflowed PMD */ -- -- unsigned long last_reset_val; /* initial value of overflowed PMD */ -- unsigned long ip; /* where did the overflow interrupt happened */ -- unsigned long tstamp; /* ar.itc when entering perfmon intr. handler */ -- -- unsigned short cpu; /* cpu on which the overfow occured */ -- unsigned short set; /* event set active when overflow ocurred */ -- int tgid; /* thread group id (for NPTL, this is getpid()) */ --} pfm_default_smpl_entry_t; -+struct pfm_default_smpl_entry { -+ pid_t pid; /* thread id (for NPTL, this is gettid()) */ -+ uint8_t reserved1[3]; /* for future use */ -+ uint8_t ovfl_pmd; /* overflow pmd for this sample */ -+ u64 last_reset_val; /* initial value of overflowed PMD */ -+ unsigned long ip; /* where did the overflow interrupt happened */ -+ u64 tstamp; /* overflow timetamp */ -+ u16 cpu; /* cpu on which the overfow occured */ -+ u16 set; /* event set active when overflow ocurred */ -+ pid_t tgid; /* thread group id (for NPTL, this is getpid()) */ -+}; - --#define PFM_DEFAULT_MAX_PMDS 64 /* how many pmds supported by data structures (sizeof(unsigned long) */ --#define PFM_DEFAULT_MAX_ENTRY_SIZE (sizeof(pfm_default_smpl_entry_t)+(sizeof(unsigned long)*PFM_DEFAULT_MAX_PMDS)) --#define PFM_DEFAULT_SMPL_MIN_BUF_SIZE (sizeof(pfm_default_smpl_hdr_t)+PFM_DEFAULT_MAX_ENTRY_SIZE) -+#define PFM_DEFAULT_MAX_PMDS 64 /* #pmds supported */ -+#define PFM_DEFAULT_MAX_ENTRY_SIZE (sizeof(struct pfm_default_smpl_entry)+\ -+ (sizeof(u64)*PFM_DEFAULT_MAX_PMDS)) -+#define PFM_DEFAULT_SMPL_MIN_BUF_SIZE (sizeof(struct pfm_default_smpl_hdr)+\ -+ PFM_DEFAULT_MAX_ENTRY_SIZE) - - #define PFM_DEFAULT_SMPL_VERSION_MAJ 2U --#define PFM_DEFAULT_SMPL_VERSION_MIN 0U --#define PFM_DEFAULT_SMPL_VERSION (((PFM_DEFAULT_SMPL_VERSION_MAJ&0xffff)<<16)|(PFM_DEFAULT_SMPL_VERSION_MIN & 0xffff)) -+#define PFM_DEFAULT_SMPL_VERSION_MIN 1U -+#define PFM_DEFAULT_SMPL_VERSION (((PFM_DEFAULT_SMPL_VERSION_MAJ&0xffff)<<16)|\ -+ (PFM_DEFAULT_SMPL_VERSION_MIN & 0xffff)) - --#endif /* __PERFMON_DEFAULT_SMPL_H__ */ -+#endif /* __ASM_IA64_PERFMON_DEFAULT_SMPL_H__ */ -diff --git a/arch/ia64/include/asm/perfmon_kern.h b/arch/ia64/include/asm/perfmon_kern.h -new file mode 100644 -index 0000000..fb40459 ---- /dev/null -+++ b/arch/ia64/include/asm/perfmon_kern.h -@@ -0,0 +1,356 @@ -+/* -+ * Copyright (c) 2001-2007 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This file contains Itanium Processor Family specific definitions -+ * for the perfmon interface. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#ifndef _ASM_IA64_PERFMON_KERN_H_ -+#define _ASM_IA64_PERFMON_KERN_H_ -+ -+#ifdef __KERNEL__ -+ -+#ifdef CONFIG_PERFMON -+#include -+#include -+ -+/* -+ * describe the content of the pfm_syst_info field -+ * layout: -+ * bits[00-15] : generic flags -+ * bits[16-31] : arch-specific flags -+ */ -+#define PFM_ITA_CPUINFO_IDLE_EXCL 0x10000 /* stop monitoring in idle loop */ -+ -+/* -+ * For some CPUs, the upper bits of a counter must be set in order for the -+ * overflow interrupt to happen. On overflow, the counter has wrapped around, -+ * and the upper bits are cleared. This function may be used to set them back. -+ */ -+static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx, -+ unsigned int cnum) -+{} -+ -+/* -+ * called from __pfm_interrupt_handler(). ctx is not NULL. -+ * ctx is locked. PMU interrupt is masked. -+ * -+ * must stop all monitoring to ensure handler has consistent view. -+ * must collect overflowed PMDs bitmask into povfls_pmds and -+ * npend_ovfls. If no interrupt detected then npend_ovfls -+ * must be set to zero. -+ */ -+static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ u64 tmp; -+ -+ /* -+ * do not overwrite existing value, must -+ * process those first (coming from context switch replay) -+ */ -+ if (set->npend_ovfls) -+ return; -+ -+ ia64_srlz_d(); -+ -+ tmp = ia64_get_pmc(0) & ~0xf; -+ -+ set->povfl_pmds[0] = tmp; -+ -+ set->npend_ovfls = ia64_popcnt(tmp); -+} -+ -+static inline int pfm_arch_init_pmu_config(void) -+{ -+ return 0; -+} -+ -+static inline void pfm_arch_resend_irq(struct pfm_context *ctx) -+{ -+ ia64_resend_irq(IA64_PERFMON_VECTOR); -+} -+ -+static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{} -+ -+static inline void pfm_arch_serialize(void) -+{ -+ ia64_srlz_d(); -+} -+ -+static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx) -+{ -+ PFM_DBG_ovfl("state=%d", ctx->state); -+ ia64_set_pmc(0, 0); -+ /* no serialization */ -+} -+ -+static inline void pfm_arch_write_pmc(struct pfm_context *ctx, -+ unsigned int cnum, u64 value) -+{ -+ if (cnum < 256) { -+ ia64_set_pmc(pfm_pmu_conf->pmc_desc[cnum].hw_addr, value); -+ } else if (cnum < 264) { -+ ia64_set_ibr(cnum-256, value); -+ ia64_dv_serialize_instruction(); -+ } else { -+ ia64_set_dbr(cnum-264, value); -+ ia64_dv_serialize_instruction(); -+ } -+} -+ -+/* -+ * On IA-64, for per-thread context which have the ITA_FL_INSECURE -+ * flag, it is possible to start/stop monitoring directly from user evel -+ * without calling pfm_start()/pfm_stop. This allows very lightweight -+ * control yet the kernel sometimes needs to know if monitoring is actually -+ * on or off. -+ * -+ * Tracking of this information is normally done by pfm_start/pfm_stop -+ * in flags.started. Here we need to compensate by checking actual -+ * psr bit. -+ */ -+static inline int pfm_arch_is_active(struct pfm_context *ctx) -+{ -+ return ctx->flags.started -+ || ia64_getreg(_IA64_REG_PSR) & (IA64_PSR_UP|IA64_PSR_PP); -+} -+ -+static inline void pfm_arch_write_pmd(struct pfm_context *ctx, -+ unsigned int cnum, u64 value) -+{ -+ /* -+ * for a counting PMD, overflow bit must be cleared -+ */ -+ if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_C64) -+ value &= pfm_pmu_conf->ovfl_mask; -+ -+ /* -+ * for counters, write to upper bits are ignored, no need to mask -+ */ -+ ia64_set_pmd(pfm_pmu_conf->pmd_desc[cnum].hw_addr, value); -+} -+ -+static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum) -+{ -+ return ia64_get_pmd(pfm_pmu_conf->pmd_desc[cnum].hw_addr); -+} -+ -+static inline u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum) -+{ -+ return ia64_get_pmc(pfm_pmu_conf->pmc_desc[cnum].hw_addr); -+} -+ -+static inline void pfm_arch_ctxswout_sys(struct task_struct *task, -+ struct pfm_context *ctx) -+{ -+ struct pt_regs *regs; -+ -+ regs = task_pt_regs(task); -+ ia64_psr(regs)->pp = 0; -+} -+ -+static inline void pfm_arch_ctxswin_sys(struct task_struct *task, -+ struct pfm_context *ctx) -+{ -+ struct pt_regs *regs; -+ -+ if (!(ctx->active_set->flags & PFM_ITA_SETFL_INTR_ONLY)) { -+ regs = task_pt_regs(task); -+ ia64_psr(regs)->pp = 1; -+ } -+} -+ -+/* -+ * On IA-64, the PMDs are NOT saved by pfm_arch_freeze_pmu() -+ * when entering the PMU interrupt handler, thus, we need -+ * to save them in pfm_switch_sets_from_intr() -+ */ -+static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ pfm_save_pmds(ctx, set); -+} -+ -+int pfm_arch_context_create(struct pfm_context *ctx, u32 ctx_flags); -+ -+static inline void pfm_arch_context_free(struct pfm_context *ctx) -+{} -+ -+int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx); -+void pfm_arch_ctxswin_thread(struct task_struct *task, -+ struct pfm_context *ctx); -+ -+void pfm_arch_unload_context(struct pfm_context *ctx); -+int pfm_arch_load_context(struct pfm_context *ctx); -+int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags); -+ -+void pfm_arch_mask_monitoring(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+void pfm_arch_unmask_monitoring(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+ -+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set); -+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set); -+ -+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx); -+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx); -+ -+int pfm_arch_init(void); -+void pfm_arch_init_percpu(void); -+char *pfm_arch_get_pmu_module_name(void); -+ -+int __pfm_use_dbregs(struct task_struct *task); -+int __pfm_release_dbregs(struct task_struct *task); -+int pfm_ia64_mark_dbregs_used(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+ -+void pfm_arch_show_session(struct seq_file *m); -+ -+static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds) -+{ -+ return 0; -+} -+ -+static inline void pfm_arch_pmu_release(void) -+{} -+ -+/* not necessary on IA-64 */ -+static inline void pfm_cacheflush(void *addr, unsigned int len) -+{} -+ -+/* -+ * miscellaneous architected definitions -+ */ -+#define PFM_ITA_FCNTR 4 /* first counting monitor (PMC/PMD) */ -+ -+/* -+ * private event set flags (set_priv_flags) -+ */ -+#define PFM_ITA_SETFL_USE_DBR 0x1000000 /* set uses debug registers */ -+ -+ -+/* -+ * Itanium-specific data structures -+ */ -+struct pfm_ia64_context_flags { -+ unsigned int use_dbr:1; /* use range restrictions (debug registers) */ -+ unsigned int insecure:1; /* insecure monitoring for non-self session */ -+ unsigned int reserved:30;/* for future use */ -+}; -+ -+struct pfm_arch_context { -+ struct pfm_ia64_context_flags flags; /* arch specific ctx flags */ -+ u64 ctx_saved_psr_up;/* storage for psr_up */ -+#ifdef CONFIG_IA64_PERFMON_COMPAT -+ void *ctx_smpl_vaddr; /* vaddr of user mapping */ -+#endif -+}; -+ -+#ifdef CONFIG_IA64_PERFMON_COMPAT -+ssize_t pfm_arch_compat_read(struct pfm_context *ctx, -+ char __user *buf, -+ int non_block, -+ size_t size); -+int pfm_ia64_compat_init(void); -+int pfm_smpl_buf_alloc_compat(struct pfm_context *ctx, -+ size_t rsize, struct file *filp); -+#else -+static inline ssize_t pfm_arch_compat_read(struct pfm_context *ctx, -+ char __user *buf, -+ int non_block, -+ size_t size) -+{ -+ return -EINVAL; -+} -+ -+static inline int pfm_smpl_buf_alloc_compat(struct pfm_context *ctx, -+ size_t rsize, struct file *filp) -+{ -+ return -EINVAL; -+} -+#endif -+ -+static inline void pfm_arch_arm_handle_work(struct task_struct *task) -+{ -+ /* -+ * On IA-64, we ran out of bits in the bottom 7 bits of the -+ * threadinfo bitmask.Thus we used a 2-stage approach by piggybacking -+ * on NOTIFY_RESUME and then in do_notify_resume() we demultiplex and -+ * call pfm_handle_work() if needed -+ */ -+ set_tsk_thread_flag(task, TIF_NOTIFY_RESUME); -+} -+ -+static inline void pfm_arch_disarm_handle_work(struct task_struct *task) -+{ -+ /* -+ * we cannot just clear TIF_NOTIFY_RESUME because other TIF flags are -+ * piggybackedonto it: TIF_PERFMON_WORK, TIF_RESTORE_RSE -+ * -+ * The tsk_clear_notify_resume() checks if any of those are set before -+ * clearing the * bit -+ */ -+ tsk_clear_notify_resume(task); -+} -+ -+static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg) -+{ -+ return 0; -+} -+ -+extern struct pfm_ia64_pmu_info *pfm_ia64_pmu_info; -+ -+#define PFM_ARCH_CTX_SIZE (sizeof(struct pfm_arch_context)) -+ -+/* -+ * IA-64 does not need extra alignment requirements for the sampling buffer -+ */ -+#define PFM_ARCH_SMPL_ALIGN_SIZE 0 -+ -+ -+static inline void pfm_release_dbregs(struct task_struct *task) -+{ -+ if (task->thread.flags & IA64_THREAD_DBG_VALID) -+ __pfm_release_dbregs(task); -+} -+ -+#define pfm_use_dbregs(_t) __pfm_use_dbregs(_t) -+ -+static inline int pfm_arch_get_base_syscall(void) -+{ -+ return __NR_pfm_create_context; -+} -+ -+struct pfm_arch_pmu_info { -+ unsigned long mask_pmcs[PFM_PMC_BV]; /* modify on when masking */ -+}; -+ -+DECLARE_PER_CPU(u32, pfm_syst_info); -+#else /* !CONFIG_PERFMON */ -+/* -+ * perfmon ia64-specific hooks -+ */ -+#define pfm_release_dbregs(_t) do { } while (0) -+#define pfm_use_dbregs(_t) (0) -+ -+#endif /* CONFIG_PERFMON */ -+ -+#endif /* __KERNEL__ */ -+#endif /* _ASM_IA64_PERFMON_KERN_H_ */ -diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h -index f88fa05..9d6af9c 100644 ---- a/arch/ia64/include/asm/processor.h -+++ b/arch/ia64/include/asm/processor.h -@@ -42,7 +42,6 @@ - - #define IA64_THREAD_FPH_VALID (__IA64_UL(1) << 0) /* floating-point high state valid? */ - #define IA64_THREAD_DBG_VALID (__IA64_UL(1) << 1) /* debug registers valid? */ --#define IA64_THREAD_PM_VALID (__IA64_UL(1) << 2) /* performance registers valid? */ - #define IA64_THREAD_UAC_NOPRINT (__IA64_UL(1) << 3) /* don't log unaligned accesses */ - #define IA64_THREAD_UAC_SIGBUS (__IA64_UL(1) << 4) /* generate SIGBUS on unaligned acc. */ - #define IA64_THREAD_MIGRATION (__IA64_UL(1) << 5) /* require migration -@@ -321,14 +320,6 @@ struct thread_struct { - #else - # define INIT_THREAD_IA32 - #endif /* CONFIG_IA32_SUPPORT */ --#ifdef CONFIG_PERFMON -- void *pfm_context; /* pointer to detailed PMU context */ -- unsigned long pfm_needs_checking; /* when >0, pending perfmon work on kernel exit */ --# define INIT_THREAD_PM .pfm_context = NULL, \ -- .pfm_needs_checking = 0UL, --#else --# define INIT_THREAD_PM --#endif - __u64 dbr[IA64_NUM_DBG_REGS]; - __u64 ibr[IA64_NUM_DBG_REGS]; - struct ia64_fpreg fph[96]; /* saved/loaded on demand */ -@@ -343,7 +334,6 @@ struct thread_struct { - .task_size = DEFAULT_TASK_SIZE, \ - .last_fph_cpu = -1, \ - INIT_THREAD_IA32 \ -- INIT_THREAD_PM \ - .dbr = {0, }, \ - .ibr = {0, }, \ - .fph = {{{{0}}}, } \ -diff --git a/arch/ia64/include/asm/system.h b/arch/ia64/include/asm/system.h -index 927a381..ab5aeea 100644 ---- a/arch/ia64/include/asm/system.h -+++ b/arch/ia64/include/asm/system.h -@@ -217,6 +217,7 @@ struct task_struct; - extern void ia64_save_extra (struct task_struct *task); - extern void ia64_load_extra (struct task_struct *task); - -+ - #ifdef CONFIG_VIRT_CPU_ACCOUNTING - extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct *next); - # define IA64_ACCOUNT_ON_SWITCH(p,n) ia64_account_on_switch(p,n) -@@ -224,16 +225,9 @@ extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct - # define IA64_ACCOUNT_ON_SWITCH(p,n) - #endif - --#ifdef CONFIG_PERFMON -- DECLARE_PER_CPU(unsigned long, pfm_syst_info); --# define PERFMON_IS_SYSWIDE() (__get_cpu_var(pfm_syst_info) & 0x1) --#else --# define PERFMON_IS_SYSWIDE() (0) --#endif -- --#define IA64_HAS_EXTRA_STATE(t) \ -- ((t)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID) \ -- || IS_IA32_PROCESS(task_pt_regs(t)) || PERFMON_IS_SYSWIDE()) -+#define IA64_HAS_EXTRA_STATE(t) \ -+ (((t)->thread.flags & IA64_THREAD_DBG_VALID) \ -+ || IS_IA32_PROCESS(task_pt_regs(t))) - - #define __switch_to(prev,next,last) do { \ - IA64_ACCOUNT_ON_SWITCH(prev, next); \ -@@ -241,6 +235,10 @@ extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct - ia64_save_extra(prev); \ - if (IA64_HAS_EXTRA_STATE(next)) \ - ia64_load_extra(next); \ -+ if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW)) \ -+ pfm_ctxsw_out(prev, next); \ -+ if (test_tsk_thread_flag(next, TIF_PERFMON_CTXSW)) \ -+ pfm_ctxsw_in(prev, next); \ - ia64_psr(task_pt_regs(next))->dfh = !ia64_is_local_fpu_owner(next); \ - (last) = ia64_switch_to((next)); \ - } while (0) -diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h -index 7c60fcd..3355332 100644 ---- a/arch/ia64/include/asm/thread_info.h -+++ b/arch/ia64/include/asm/thread_info.h -@@ -110,6 +110,8 @@ extern void tsk_clear_notify_resume(struct task_struct *tsk); - #define TIF_DB_DISABLED 19 /* debug trap disabled for fsyscall */ - #define TIF_FREEZE 20 /* is freezing for suspend */ - #define TIF_RESTORE_RSE 21 /* user RBS is newer than kernel RBS */ -+#define TIF_PERFMON_CTXSW 22 /* perfmon needs ctxsw calls */ -+#define TIF_PERFMON_WORK 23 /* work for pfm_handle_work() */ - - #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) - #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -@@ -123,6 +125,8 @@ extern void tsk_clear_notify_resume(struct task_struct *tsk); - #define _TIF_DB_DISABLED (1 << TIF_DB_DISABLED) - #define _TIF_FREEZE (1 << TIF_FREEZE) - #define _TIF_RESTORE_RSE (1 << TIF_RESTORE_RSE) -+#define _TIF_PERFMON_CTXSW (1 << TIF_PERFMON_CTXSW) -+#define _TIF_PERFMON_WORK (1 << TIF_PERFMON_WORK) - - /* "work to do on user-return" bits */ - #define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SYSCALL_AUDIT|\ -diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h -index d535833..29a43bc 100644 ---- a/arch/ia64/include/asm/unistd.h -+++ b/arch/ia64/include/asm/unistd.h -@@ -308,11 +308,23 @@ - #define __NR_dup3 1316 - #define __NR_pipe2 1317 - #define __NR_inotify_init1 1318 -+#define __NR_pfm_create_context 1319 -+#define __NR_pfm_write_pmcs (__NR_pfm_create_context+1) -+#define __NR_pfm_write_pmds (__NR_pfm_create_context+2) -+#define __NR_pfm_read_pmds (__NR_pfm_create_context+3) -+#define __NR_pfm_load_context (__NR_pfm_create_context+4) -+#define __NR_pfm_start (__NR_pfm_create_context+5) -+#define __NR_pfm_stop (__NR_pfm_create_context+6) -+#define __NR_pfm_restart (__NR_pfm_create_context+7) -+#define __NR_pfm_create_evtsets (__NR_pfm_create_context+8) -+#define __NR_pfm_getinfo_evtsets (__NR_pfm_create_context+9) -+#define __NR_pfm_delete_evtsets (__NR_pfm_create_context+10) -+#define __NR_pfm_unload_context (__NR_pfm_create_context+11) - - #ifdef __KERNEL__ - - --#define NR_syscalls 295 /* length of syscall table */ -+#define NR_syscalls 307 /* length of syscall table */ - - /* - * The following defines stop scripts/checksyscalls.sh from complaining about -diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile -index 87fea11..b5ac54c 100644 ---- a/arch/ia64/kernel/Makefile -+++ b/arch/ia64/kernel/Makefile -@@ -5,7 +5,7 @@ - extra-y := head.o init_task.o vmlinux.lds - - obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ -- irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \ -+ irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o ptrace.o sal.o \ - salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \ - unwind.o mca.o mca_asm.o topology.o - -@@ -23,7 +23,6 @@ obj-$(CONFIG_IOSAPIC) += iosapic.o - obj-$(CONFIG_MODULES) += module.o - obj-$(CONFIG_SMP) += smp.o smpboot.o - obj-$(CONFIG_NUMA) += numa.o --obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o - obj-$(CONFIG_IA64_CYCLONE) += cyclone.o - obj-$(CONFIG_CPU_FREQ) += cpufreq/ - obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o -diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S -index 0dd6c14..f1c3e41 100644 ---- a/arch/ia64/kernel/entry.S -+++ b/arch/ia64/kernel/entry.S -@@ -1697,6 +1697,18 @@ sys_call_table: - data8 sys_dup3 - data8 sys_pipe2 - data8 sys_inotify_init1 -+ data8 sys_pfm_create_context -+ data8 sys_pfm_write_pmcs // 1320 -+ data8 sys_pfm_write_pmds -+ data8 sys_pfm_read_pmds -+ data8 sys_pfm_load_context -+ data8 sys_pfm_start -+ data8 sys_pfm_stop // 1325 -+ data8 sys_pfm_restart -+ data8 sys_pfm_create_evtsets -+ data8 sys_pfm_getinfo_evtsets -+ data8 sys_pfm_delete_evtsets -+ data8 sys_pfm_unload_context // 1330 - - .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls - #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ -diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c -index 28d3d48..ede8024 100644 ---- a/arch/ia64/kernel/irq_ia64.c -+++ b/arch/ia64/kernel/irq_ia64.c -@@ -40,10 +40,6 @@ - #include - #include - --#ifdef CONFIG_PERFMON --# include --#endif -- - #define IRQ_DEBUG 0 - - #define IRQ_VECTOR_UNASSIGNED (0) -@@ -660,9 +656,6 @@ init_IRQ (void) - } - #endif - #endif --#ifdef CONFIG_PERFMON -- pfm_init_percpu(); --#endif - platform_irq_init(); - } - -diff --git a/arch/ia64/kernel/perfmon_default_smpl.c b/arch/ia64/kernel/perfmon_default_smpl.c -deleted file mode 100644 -index 5f637bb..0000000 ---- a/arch/ia64/kernel/perfmon_default_smpl.c -+++ /dev/null -@@ -1,296 +0,0 @@ --/* -- * Copyright (C) 2002-2003 Hewlett-Packard Co -- * Stephane Eranian -- * -- * This file implements the default sampling buffer format -- * for the Linux/ia64 perfmon-2 subsystem. -- */ --#include --#include --#include --#include --#include --#include -- --#include --#include -- --MODULE_AUTHOR("Stephane Eranian "); --MODULE_DESCRIPTION("perfmon default sampling format"); --MODULE_LICENSE("GPL"); -- --#define DEFAULT_DEBUG 1 -- --#ifdef DEFAULT_DEBUG --#define DPRINT(a) \ -- do { \ -- if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \ -- } while (0) -- --#define DPRINT_ovfl(a) \ -- do { \ -- if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d ", __func__, __LINE__, smp_processor_id()); printk a; } \ -- } while (0) -- --#else --#define DPRINT(a) --#define DPRINT_ovfl(a) --#endif -- --static int --default_validate(struct task_struct *task, unsigned int flags, int cpu, void *data) --{ -- pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t*)data; -- int ret = 0; -- -- if (data == NULL) { -- DPRINT(("[%d] no argument passed\n", task_pid_nr(task))); -- return -EINVAL; -- } -- -- DPRINT(("[%d] validate flags=0x%x CPU%d\n", task_pid_nr(task), flags, cpu)); -- -- /* -- * must hold at least the buffer header + one minimally sized entry -- */ -- if (arg->buf_size < PFM_DEFAULT_SMPL_MIN_BUF_SIZE) return -EINVAL; -- -- DPRINT(("buf_size=%lu\n", arg->buf_size)); -- -- return ret; --} -- --static int --default_get_size(struct task_struct *task, unsigned int flags, int cpu, void *data, unsigned long *size) --{ -- pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data; -- -- /* -- * size has been validated in default_validate -- */ -- *size = arg->buf_size; -- -- return 0; --} -- --static int --default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *data) --{ -- pfm_default_smpl_hdr_t *hdr; -- pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data; -- -- hdr = (pfm_default_smpl_hdr_t *)buf; -- -- hdr->hdr_version = PFM_DEFAULT_SMPL_VERSION; -- hdr->hdr_buf_size = arg->buf_size; -- hdr->hdr_cur_offs = sizeof(*hdr); -- hdr->hdr_overflows = 0UL; -- hdr->hdr_count = 0UL; -- -- DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n", -- task_pid_nr(task), -- buf, -- hdr->hdr_buf_size, -- sizeof(*hdr), -- hdr->hdr_version, -- hdr->hdr_cur_offs)); -- -- return 0; --} -- --static int --default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp) --{ -- pfm_default_smpl_hdr_t *hdr; -- pfm_default_smpl_entry_t *ent; -- void *cur, *last; -- unsigned long *e, entry_size; -- unsigned int npmds, i; -- unsigned char ovfl_pmd; -- unsigned char ovfl_notify; -- -- if (unlikely(buf == NULL || arg == NULL|| regs == NULL || task == NULL)) { -- DPRINT(("[%d] invalid arguments buf=%p arg=%p\n", task->pid, buf, arg)); -- return -EINVAL; -- } -- -- hdr = (pfm_default_smpl_hdr_t *)buf; -- cur = buf+hdr->hdr_cur_offs; -- last = buf+hdr->hdr_buf_size; -- ovfl_pmd = arg->ovfl_pmd; -- ovfl_notify = arg->ovfl_notify; -- -- /* -- * precheck for sanity -- */ -- if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full; -- -- npmds = hweight64(arg->smpl_pmds[0]); -- -- ent = (pfm_default_smpl_entry_t *)cur; -- -- prefetch(arg->smpl_pmds_values); -- -- entry_size = sizeof(*ent) + (npmds << 3); -- -- /* position for first pmd */ -- e = (unsigned long *)(ent+1); -- -- hdr->hdr_count++; -- -- DPRINT_ovfl(("[%d] count=%lu cur=%p last=%p free_bytes=%lu ovfl_pmd=%d ovfl_notify=%d npmds=%u\n", -- task->pid, -- hdr->hdr_count, -- cur, last, -- last-cur, -- ovfl_pmd, -- ovfl_notify, npmds)); -- -- /* -- * current = task running at the time of the overflow. -- * -- * per-task mode: -- * - this is ususally the task being monitored. -- * Under certain conditions, it might be a different task -- * -- * system-wide: -- * - this is not necessarily the task controlling the session -- */ -- ent->pid = current->pid; -- ent->ovfl_pmd = ovfl_pmd; -- ent->last_reset_val = arg->pmd_last_reset; //pmd[0].reg_last_reset_val; -- -- /* -- * where did the fault happen (includes slot number) -- */ -- ent->ip = regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3); -- -- ent->tstamp = stamp; -- ent->cpu = smp_processor_id(); -- ent->set = arg->active_set; -- ent->tgid = current->tgid; -- -- /* -- * selectively store PMDs in increasing index number -- */ -- if (npmds) { -- unsigned long *val = arg->smpl_pmds_values; -- for(i=0; i < npmds; i++) { -- *e++ = *val++; -- } -- } -- -- /* -- * update position for next entry -- */ -- hdr->hdr_cur_offs += entry_size; -- cur += entry_size; -- -- /* -- * post check to avoid losing the last sample -- */ -- if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full; -- -- /* -- * keep same ovfl_pmds, ovfl_notify -- */ -- arg->ovfl_ctrl.bits.notify_user = 0; -- arg->ovfl_ctrl.bits.block_task = 0; -- arg->ovfl_ctrl.bits.mask_monitoring = 0; -- arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; /* reset before returning from interrupt handler */ -- -- return 0; --full: -- DPRINT_ovfl(("sampling buffer full free=%lu, count=%lu, ovfl_notify=%d\n", last-cur, hdr->hdr_count, ovfl_notify)); -- -- /* -- * increment number of buffer overflow. -- * important to detect duplicate set of samples. -- */ -- hdr->hdr_overflows++; -- -- /* -- * if no notification requested, then we saturate the buffer -- */ -- if (ovfl_notify == 0) { -- arg->ovfl_ctrl.bits.notify_user = 0; -- arg->ovfl_ctrl.bits.block_task = 0; -- arg->ovfl_ctrl.bits.mask_monitoring = 1; -- arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; -- } else { -- arg->ovfl_ctrl.bits.notify_user = 1; -- arg->ovfl_ctrl.bits.block_task = 1; /* ignored for non-blocking context */ -- arg->ovfl_ctrl.bits.mask_monitoring = 1; -- arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; /* no reset now */ -- } -- return -1; /* we are full, sorry */ --} -- --static int --default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) --{ -- pfm_default_smpl_hdr_t *hdr; -- -- hdr = (pfm_default_smpl_hdr_t *)buf; -- -- hdr->hdr_count = 0UL; -- hdr->hdr_cur_offs = sizeof(*hdr); -- -- ctrl->bits.mask_monitoring = 0; -- ctrl->bits.reset_ovfl_pmds = 1; /* uses long-reset values */ -- -- return 0; --} -- --static int --default_exit(struct task_struct *task, void *buf, struct pt_regs *regs) --{ -- DPRINT(("[%d] exit(%p)\n", task_pid_nr(task), buf)); -- return 0; --} -- --static pfm_buffer_fmt_t default_fmt={ -- .fmt_name = "default_format", -- .fmt_uuid = PFM_DEFAULT_SMPL_UUID, -- .fmt_arg_size = sizeof(pfm_default_smpl_arg_t), -- .fmt_validate = default_validate, -- .fmt_getsize = default_get_size, -- .fmt_init = default_init, -- .fmt_handler = default_handler, -- .fmt_restart = default_restart, -- .fmt_restart_active = default_restart, -- .fmt_exit = default_exit, --}; -- --static int __init --pfm_default_smpl_init_module(void) --{ -- int ret; -- -- ret = pfm_register_buffer_fmt(&default_fmt); -- if (ret == 0) { -- printk("perfmon_default_smpl: %s v%u.%u registered\n", -- default_fmt.fmt_name, -- PFM_DEFAULT_SMPL_VERSION_MAJ, -- PFM_DEFAULT_SMPL_VERSION_MIN); -- } else { -- printk("perfmon_default_smpl: %s cannot register ret=%d\n", -- default_fmt.fmt_name, -- ret); -- } -- -- return ret; --} -- --static void __exit --pfm_default_smpl_cleanup_module(void) --{ -- int ret; -- ret = pfm_unregister_buffer_fmt(default_fmt.fmt_uuid); -- -- printk("perfmon_default_smpl: unregister %s=%d\n", default_fmt.fmt_name, ret); --} -- --module_init(pfm_default_smpl_init_module); --module_exit(pfm_default_smpl_cleanup_module); -- -diff --git a/arch/ia64/kernel/perfmon_generic.h b/arch/ia64/kernel/perfmon_generic.h -deleted file mode 100644 -index 6748947..0000000 ---- a/arch/ia64/kernel/perfmon_generic.h -+++ /dev/null -@@ -1,45 +0,0 @@ --/* -- * This file contains the generic PMU register description tables -- * and pmc checker used by perfmon.c. -- * -- * Copyright (C) 2002-2003 Hewlett Packard Co -- * Stephane Eranian -- */ -- --static pfm_reg_desc_t pfm_gen_pmc_desc[PMU_MAX_PMCS]={ --/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -- { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ --}; -- --static pfm_reg_desc_t pfm_gen_pmd_desc[PMU_MAX_PMDS]={ --/* pmd0 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, --/* pmd1 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, --/* pmd2 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, --/* pmd3 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, --/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, --/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, --/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, --/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, -- { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ --}; -- --/* -- * impl_pmcs, impl_pmds are computed at runtime to minimize errors! -- */ --static pmu_config_t pmu_conf_gen={ -- .pmu_name = "Generic", -- .pmu_family = 0xff, /* any */ -- .ovfl_val = (1UL << 32) - 1, -- .num_ibrs = 0, /* does not use */ -- .num_dbrs = 0, /* does not use */ -- .pmd_desc = pfm_gen_pmd_desc, -- .pmc_desc = pfm_gen_pmc_desc --}; -- -diff --git a/arch/ia64/kernel/perfmon_itanium.h b/arch/ia64/kernel/perfmon_itanium.h -deleted file mode 100644 -index d1d508a..0000000 ---- a/arch/ia64/kernel/perfmon_itanium.h -+++ /dev/null -@@ -1,115 +0,0 @@ --/* -- * This file contains the Itanium PMU register description tables -- * and pmc checker used by perfmon.c. -- * -- * Copyright (C) 2002-2003 Hewlett Packard Co -- * Stephane Eranian -- */ --static int pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); -- --static pfm_reg_desc_t pfm_ita_pmc_desc[PMU_MAX_PMCS]={ --/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc8 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc9 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc13 */ { PFM_REG_CONFIG , 0, 0x0003ffff00000001UL, -1UL, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -- { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ --}; -- --static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={ --/* pmd0 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, --/* pmd1 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, --/* pmd2 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, --/* pmd3 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, --/* pmd4 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, --/* pmd5 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, --/* pmd6 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, --/* pmd7 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, --/* pmd8 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, --/* pmd9 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, --/* pmd10 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, --/* pmd11 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, --/* pmd12 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, --/* pmd13 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, --/* pmd14 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, --/* pmd15 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, --/* pmd16 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, --/* pmd17 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, -- { PFM_REG_END , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ --}; -- --static int --pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) --{ -- int ret; -- int is_loaded; -- -- /* sanitfy check */ -- if (ctx == NULL) return -EINVAL; -- -- is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED; -- -- /* -- * we must clear the (instruction) debug registers if pmc13.ta bit is cleared -- * before they are written (fl_using_dbreg==0) to avoid picking up stale information. -- */ -- if (cnum == 13 && is_loaded && ((*val & 0x1) == 0UL) && ctx->ctx_fl_using_dbreg == 0) { -- -- DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing ibr\n", cnum, *val)); -- -- /* don't mix debug with perfmon */ -- if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; -- -- /* -- * a count of 0 will mark the debug registers as in use and also -- * ensure that they are properly cleared. -- */ -- ret = pfm_write_ibr_dbr(1, ctx, NULL, 0, regs); -- if (ret) return ret; -- } -- -- /* -- * we must clear the (data) debug registers if pmc11.pt bit is cleared -- * before they are written (fl_using_dbreg==0) to avoid picking up stale information. -- */ -- if (cnum == 11 && is_loaded && ((*val >> 28)& 0x1) == 0 && ctx->ctx_fl_using_dbreg == 0) { -- -- DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing dbr\n", cnum, *val)); -- -- /* don't mix debug with perfmon */ -- if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; -- -- /* -- * a count of 0 will mark the debug registers as in use and also -- * ensure that they are properly cleared. -- */ -- ret = pfm_write_ibr_dbr(0, ctx, NULL, 0, regs); -- if (ret) return ret; -- } -- return 0; --} -- --/* -- * impl_pmcs, impl_pmds are computed at runtime to minimize errors! -- */ --static pmu_config_t pmu_conf_ita={ -- .pmu_name = "Itanium", -- .pmu_family = 0x7, -- .ovfl_val = (1UL << 32) - 1, -- .pmd_desc = pfm_ita_pmd_desc, -- .pmc_desc = pfm_ita_pmc_desc, -- .num_ibrs = 8, -- .num_dbrs = 8, -- .use_rr_dbregs = 1, /* debug register are use for range retrictions */ --}; -- -- -diff --git a/arch/ia64/kernel/perfmon_mckinley.h b/arch/ia64/kernel/perfmon_mckinley.h -deleted file mode 100644 -index c4bec7a..0000000 ---- a/arch/ia64/kernel/perfmon_mckinley.h -+++ /dev/null -@@ -1,187 +0,0 @@ --/* -- * This file contains the McKinley PMU register description tables -- * and pmc checker used by perfmon.c. -- * -- * Copyright (C) 2002-2003 Hewlett Packard Co -- * Stephane Eranian -- */ --static int pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); -- --static pfm_reg_desc_t pfm_mck_pmc_desc[PMU_MAX_PMCS]={ --/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0000000000800000UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc8 */ { PFM_REG_CONFIG , 0, 0xffffffff3fffffffUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc9 */ { PFM_REG_CONFIG , 0, 0xffffffff3ffffffcUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL, pfm_mck_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc13 */ { PFM_REG_CONFIG , 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc14 */ { PFM_REG_CONFIG , 0, 0x0db60db60db60db6UL, 0x2492UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, --/* pmc15 */ { PFM_REG_CONFIG , 0, 0x00000000fffffff0UL, 0xfUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, -- { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ --}; -- --static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={ --/* pmd0 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, --/* pmd1 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, --/* pmd2 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, --/* pmd3 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, --/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, --/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, --/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, --/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, --/* pmd8 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, --/* pmd9 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, --/* pmd10 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, --/* pmd11 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, --/* pmd12 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, --/* pmd13 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, --/* pmd14 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, --/* pmd15 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, --/* pmd16 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, --/* pmd17 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, -- { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ --}; -- --/* -- * PMC reserved fields must have their power-up values preserved -- */ --static int --pfm_mck_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs) --{ -- unsigned long tmp1, tmp2, ival = *val; -- -- /* remove reserved areas from user value */ -- tmp1 = ival & PMC_RSVD_MASK(cnum); -- -- /* get reserved fields values */ -- tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum); -- -- *val = tmp1 | tmp2; -- -- DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n", -- cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val)); -- return 0; --} -- --/* -- * task can be NULL if the context is unloaded -- */ --static int --pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) --{ -- int ret = 0, check_case1 = 0; -- unsigned long val8 = 0, val14 = 0, val13 = 0; -- int is_loaded; -- -- /* first preserve the reserved fields */ -- pfm_mck_reserved(cnum, val, regs); -- -- /* sanitfy check */ -- if (ctx == NULL) return -EINVAL; -- -- is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED; -- -- /* -- * we must clear the debug registers if pmc13 has a value which enable -- * memory pipeline event constraints. In this case we need to clear the -- * the debug registers if they have not yet been accessed. This is required -- * to avoid picking stale state. -- * PMC13 is "active" if: -- * one of the pmc13.cfg_dbrpXX field is different from 0x3 -- * AND -- * at the corresponding pmc13.ena_dbrpXX is set. -- */ -- DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, *val, ctx->ctx_fl_using_dbreg, is_loaded)); -- -- if (cnum == 13 && is_loaded -- && (*val & 0x1e00000000000UL) && (*val & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) { -- -- DPRINT(("pmc[%d]=0x%lx has active pmc13 settings, clearing dbr\n", cnum, *val)); -- -- /* don't mix debug with perfmon */ -- if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; -- -- /* -- * a count of 0 will mark the debug registers as in use and also -- * ensure that they are properly cleared. -- */ -- ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs); -- if (ret) return ret; -- } -- /* -- * we must clear the (instruction) debug registers if any pmc14.ibrpX bit is enabled -- * before they are (fl_using_dbreg==0) to avoid picking up stale information. -- */ -- if (cnum == 14 && is_loaded && ((*val & 0x2222UL) != 0x2222UL) && ctx->ctx_fl_using_dbreg == 0) { -- -- DPRINT(("pmc[%d]=0x%lx has active pmc14 settings, clearing ibr\n", cnum, *val)); -- -- /* don't mix debug with perfmon */ -- if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; -- -- /* -- * a count of 0 will mark the debug registers as in use and also -- * ensure that they are properly cleared. -- */ -- ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs); -- if (ret) return ret; -- -- } -- -- switch(cnum) { -- case 4: *val |= 1UL << 23; /* force power enable bit */ -- break; -- case 8: val8 = *val; -- val13 = ctx->ctx_pmcs[13]; -- val14 = ctx->ctx_pmcs[14]; -- check_case1 = 1; -- break; -- case 13: val8 = ctx->ctx_pmcs[8]; -- val13 = *val; -- val14 = ctx->ctx_pmcs[14]; -- check_case1 = 1; -- break; -- case 14: val8 = ctx->ctx_pmcs[8]; -- val13 = ctx->ctx_pmcs[13]; -- val14 = *val; -- check_case1 = 1; -- break; -- } -- /* check illegal configuration which can produce inconsistencies in tagging -- * i-side events in L1D and L2 caches -- */ -- if (check_case1) { -- ret = ((val13 >> 45) & 0xf) == 0 -- && ((val8 & 0x1) == 0) -- && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0) -- ||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0)); -- -- if (ret) DPRINT((KERN_DEBUG "perfmon: failure check_case1\n")); -- } -- -- return ret ? -EINVAL : 0; --} -- --/* -- * impl_pmcs, impl_pmds are computed at runtime to minimize errors! -- */ --static pmu_config_t pmu_conf_mck={ -- .pmu_name = "Itanium 2", -- .pmu_family = 0x1f, -- .flags = PFM_PMU_IRQ_RESEND, -- .ovfl_val = (1UL << 47) - 1, -- .pmd_desc = pfm_mck_pmd_desc, -- .pmc_desc = pfm_mck_pmc_desc, -- .num_ibrs = 8, -- .num_dbrs = 8, -- .use_rr_dbregs = 1 /* debug register are use for range restrictions */ --}; -- -- -diff --git a/arch/ia64/kernel/perfmon_montecito.h b/arch/ia64/kernel/perfmon_montecito.h -deleted file mode 100644 -index 7f8da4c..0000000 ---- a/arch/ia64/kernel/perfmon_montecito.h -+++ /dev/null -@@ -1,269 +0,0 @@ --/* -- * This file contains the Montecito PMU register description tables -- * and pmc checker used by perfmon.c. -- * -- * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. -- * Contributed by Stephane Eranian -- */ --static int pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); -- --#define RDEP_MONT_ETB (RDEP(38)|RDEP(39)|RDEP(48)|RDEP(49)|RDEP(50)|RDEP(51)|RDEP(52)|RDEP(53)|RDEP(54)|\ -- RDEP(55)|RDEP(56)|RDEP(57)|RDEP(58)|RDEP(59)|RDEP(60)|RDEP(61)|RDEP(62)|RDEP(63)) --#define RDEP_MONT_DEAR (RDEP(32)|RDEP(33)|RDEP(36)) --#define RDEP_MONT_IEAR (RDEP(34)|RDEP(35)) -- --static pfm_reg_desc_t pfm_mont_pmc_desc[PMU_MAX_PMCS]={ --/* pmc0 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}}, --/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}}, --/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}}, --/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {0,0, 0, 0}}, --/* pmc4 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(4),0, 0, 0}, {0,0, 0, 0}}, --/* pmc5 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(5),0, 0, 0}, {0,0, 0, 0}}, --/* pmc6 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(6),0, 0, 0}, {0,0, 0, 0}}, --/* pmc7 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(7),0, 0, 0}, {0,0, 0, 0}}, --/* pmc8 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(8),0, 0, 0}, {0,0, 0, 0}}, --/* pmc9 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(9),0, 0, 0}, {0,0, 0, 0}}, --/* pmc10 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(10),0, 0, 0}, {0,0, 0, 0}}, --/* pmc11 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(11),0, 0, 0}, {0,0, 0, 0}}, --/* pmc12 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(12),0, 0, 0}, {0,0, 0, 0}}, --/* pmc13 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(13),0, 0, 0}, {0,0, 0, 0}}, --/* pmc14 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(14),0, 0, 0}, {0,0, 0, 0}}, --/* pmc15 */ { PFM_REG_COUNTING, 6, 0x2000000, 0x7c7fff7f, NULL, pfm_mont_pmc_check, {RDEP(15),0, 0, 0}, {0,0, 0, 0}}, --/* pmc16 */ { PFM_REG_NOTIMPL, }, --/* pmc17 */ { PFM_REG_NOTIMPL, }, --/* pmc18 */ { PFM_REG_NOTIMPL, }, --/* pmc19 */ { PFM_REG_NOTIMPL, }, --/* pmc20 */ { PFM_REG_NOTIMPL, }, --/* pmc21 */ { PFM_REG_NOTIMPL, }, --/* pmc22 */ { PFM_REG_NOTIMPL, }, --/* pmc23 */ { PFM_REG_NOTIMPL, }, --/* pmc24 */ { PFM_REG_NOTIMPL, }, --/* pmc25 */ { PFM_REG_NOTIMPL, }, --/* pmc26 */ { PFM_REG_NOTIMPL, }, --/* pmc27 */ { PFM_REG_NOTIMPL, }, --/* pmc28 */ { PFM_REG_NOTIMPL, }, --/* pmc29 */ { PFM_REG_NOTIMPL, }, --/* pmc30 */ { PFM_REG_NOTIMPL, }, --/* pmc31 */ { PFM_REG_NOTIMPL, }, --/* pmc32 */ { PFM_REG_CONFIG, 0, 0x30f01ffffffffffUL, 0x30f01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, --/* pmc33 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, --/* pmc34 */ { PFM_REG_CONFIG, 0, 0xf01ffffffffffUL, 0xf01ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, --/* pmc35 */ { PFM_REG_CONFIG, 0, 0x0, 0x1ffffffffffUL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, --/* pmc36 */ { PFM_REG_CONFIG, 0, 0xfffffff0, 0xf, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, --/* pmc37 */ { PFM_REG_MONITOR, 4, 0x0, 0x3fff, NULL, pfm_mont_pmc_check, {RDEP_MONT_IEAR, 0, 0, 0}, {0, 0, 0, 0}}, --/* pmc38 */ { PFM_REG_CONFIG, 0, 0xdb6, 0x2492, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, --/* pmc39 */ { PFM_REG_MONITOR, 6, 0x0, 0xffcf, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}}, --/* pmc40 */ { PFM_REG_MONITOR, 6, 0x2000000, 0xf01cf, NULL, pfm_mont_pmc_check, {RDEP_MONT_DEAR,0, 0, 0}, {0,0, 0, 0}}, --/* pmc41 */ { PFM_REG_CONFIG, 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mont_pmc_check, {0,0, 0, 0}, {0,0, 0, 0}}, --/* pmc42 */ { PFM_REG_MONITOR, 6, 0x0, 0x7ff4f, NULL, pfm_mont_pmc_check, {RDEP_MONT_ETB,0, 0, 0}, {0,0, 0, 0}}, -- { PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */ --}; -- --static pfm_reg_desc_t pfm_mont_pmd_desc[PMU_MAX_PMDS]={ --/* pmd0 */ { PFM_REG_NOTIMPL, }, --/* pmd1 */ { PFM_REG_NOTIMPL, }, --/* pmd2 */ { PFM_REG_NOTIMPL, }, --/* pmd3 */ { PFM_REG_NOTIMPL, }, --/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(4),0, 0, 0}}, --/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(5),0, 0, 0}}, --/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(6),0, 0, 0}}, --/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(7),0, 0, 0}}, --/* pmd8 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(8),0, 0, 0}}, --/* pmd9 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(9),0, 0, 0}}, --/* pmd10 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(10),0, 0, 0}}, --/* pmd11 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(11),0, 0, 0}}, --/* pmd12 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(12),0, 0, 0}}, --/* pmd13 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(13),0, 0, 0}}, --/* pmd14 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(14),0, 0, 0}}, --/* pmd15 */ { PFM_REG_COUNTING, 0, 0x0, -1, NULL, NULL, {0,0, 0, 0}, {RDEP(15),0, 0, 0}}, --/* pmd16 */ { PFM_REG_NOTIMPL, }, --/* pmd17 */ { PFM_REG_NOTIMPL, }, --/* pmd18 */ { PFM_REG_NOTIMPL, }, --/* pmd19 */ { PFM_REG_NOTIMPL, }, --/* pmd20 */ { PFM_REG_NOTIMPL, }, --/* pmd21 */ { PFM_REG_NOTIMPL, }, --/* pmd22 */ { PFM_REG_NOTIMPL, }, --/* pmd23 */ { PFM_REG_NOTIMPL, }, --/* pmd24 */ { PFM_REG_NOTIMPL, }, --/* pmd25 */ { PFM_REG_NOTIMPL, }, --/* pmd26 */ { PFM_REG_NOTIMPL, }, --/* pmd27 */ { PFM_REG_NOTIMPL, }, --/* pmd28 */ { PFM_REG_NOTIMPL, }, --/* pmd29 */ { PFM_REG_NOTIMPL, }, --/* pmd30 */ { PFM_REG_NOTIMPL, }, --/* pmd31 */ { PFM_REG_NOTIMPL, }, --/* pmd32 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(33)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}}, --/* pmd33 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(36),0, 0, 0}, {RDEP(40),0, 0, 0}}, --/* pmd34 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(35),0, 0, 0}, {RDEP(37),0, 0, 0}}, --/* pmd35 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(34),0, 0, 0}, {RDEP(37),0, 0, 0}}, --/* pmd36 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP(32)|RDEP(33),0, 0, 0}, {RDEP(40),0, 0, 0}}, --/* pmd37 */ { PFM_REG_NOTIMPL, }, --/* pmd38 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, --/* pmd39 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, --/* pmd40 */ { PFM_REG_NOTIMPL, }, --/* pmd41 */ { PFM_REG_NOTIMPL, }, --/* pmd42 */ { PFM_REG_NOTIMPL, }, --/* pmd43 */ { PFM_REG_NOTIMPL, }, --/* pmd44 */ { PFM_REG_NOTIMPL, }, --/* pmd45 */ { PFM_REG_NOTIMPL, }, --/* pmd46 */ { PFM_REG_NOTIMPL, }, --/* pmd47 */ { PFM_REG_NOTIMPL, }, --/* pmd48 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, --/* pmd49 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, --/* pmd50 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, --/* pmd51 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, --/* pmd52 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, --/* pmd53 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, --/* pmd54 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, --/* pmd55 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, --/* pmd56 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, --/* pmd57 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, --/* pmd58 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, --/* pmd59 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, --/* pmd60 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, --/* pmd61 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, --/* pmd62 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, --/* pmd63 */ { PFM_REG_BUFFER, 0, 0x0, -1, NULL, NULL, {RDEP_MONT_ETB,0, 0, 0}, {RDEP(39),0, 0, 0}}, -- { PFM_REG_END , 0, 0x0, -1, NULL, NULL, {0,}, {0,}}, /* end marker */ --}; -- --/* -- * PMC reserved fields must have their power-up values preserved -- */ --static int --pfm_mont_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs) --{ -- unsigned long tmp1, tmp2, ival = *val; -- -- /* remove reserved areas from user value */ -- tmp1 = ival & PMC_RSVD_MASK(cnum); -- -- /* get reserved fields values */ -- tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum); -- -- *val = tmp1 | tmp2; -- -- DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n", -- cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val)); -- return 0; --} -- --/* -- * task can be NULL if the context is unloaded -- */ --static int --pfm_mont_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) --{ -- int ret = 0; -- unsigned long val32 = 0, val38 = 0, val41 = 0; -- unsigned long tmpval; -- int check_case1 = 0; -- int is_loaded; -- -- /* first preserve the reserved fields */ -- pfm_mont_reserved(cnum, val, regs); -- -- tmpval = *val; -- -- /* sanity check */ -- if (ctx == NULL) return -EINVAL; -- -- is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED; -- -- /* -- * we must clear the debug registers if pmc41 has a value which enable -- * memory pipeline event constraints. In this case we need to clear the -- * the debug registers if they have not yet been accessed. This is required -- * to avoid picking stale state. -- * PMC41 is "active" if: -- * one of the pmc41.cfg_dtagXX field is different from 0x3 -- * AND -- * at the corresponding pmc41.en_dbrpXX is set. -- * AND -- * ctx_fl_using_dbreg == 0 (i.e., dbr not yet used) -- */ -- DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, tmpval, ctx->ctx_fl_using_dbreg, is_loaded)); -- -- if (cnum == 41 && is_loaded -- && (tmpval & 0x1e00000000000UL) && (tmpval & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) { -- -- DPRINT(("pmc[%d]=0x%lx has active pmc41 settings, clearing dbr\n", cnum, tmpval)); -- -- /* don't mix debug with perfmon */ -- if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; -- -- /* -- * a count of 0 will mark the debug registers if: -- * AND -- */ -- ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs); -- if (ret) return ret; -- } -- /* -- * we must clear the (instruction) debug registers if: -- * pmc38.ig_ibrpX is 0 (enabled) -- * AND -- * ctx_fl_using_dbreg == 0 (i.e., dbr not yet used) -- */ -- if (cnum == 38 && is_loaded && ((tmpval & 0x492UL) != 0x492UL) && ctx->ctx_fl_using_dbreg == 0) { -- -- DPRINT(("pmc38=0x%lx has active pmc38 settings, clearing ibr\n", tmpval)); -- -- /* don't mix debug with perfmon */ -- if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; -- -- /* -- * a count of 0 will mark the debug registers as in use and also -- * ensure that they are properly cleared. -- */ -- ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs); -- if (ret) return ret; -- -- } -- switch(cnum) { -- case 32: val32 = *val; -- val38 = ctx->ctx_pmcs[38]; -- val41 = ctx->ctx_pmcs[41]; -- check_case1 = 1; -- break; -- case 38: val38 = *val; -- val32 = ctx->ctx_pmcs[32]; -- val41 = ctx->ctx_pmcs[41]; -- check_case1 = 1; -- break; -- case 41: val41 = *val; -- val32 = ctx->ctx_pmcs[32]; -- val38 = ctx->ctx_pmcs[38]; -- check_case1 = 1; -- break; -- } -- /* check illegal configuration which can produce inconsistencies in tagging -- * i-side events in L1D and L2 caches -- */ -- if (check_case1) { -- ret = (((val41 >> 45) & 0xf) == 0 && ((val32>>57) & 0x1) == 0) -- && ((((val38>>1) & 0x3) == 0x2 || ((val38>>1) & 0x3) == 0) -- || (((val38>>4) & 0x3) == 0x2 || ((val38>>4) & 0x3) == 0)); -- if (ret) { -- DPRINT(("invalid config pmc38=0x%lx pmc41=0x%lx pmc32=0x%lx\n", val38, val41, val32)); -- return -EINVAL; -- } -- } -- *val = tmpval; -- return 0; --} -- --/* -- * impl_pmcs, impl_pmds are computed at runtime to minimize errors! -- */ --static pmu_config_t pmu_conf_mont={ -- .pmu_name = "Montecito", -- .pmu_family = 0x20, -- .flags = PFM_PMU_IRQ_RESEND, -- .ovfl_val = (1UL << 47) - 1, -- .pmd_desc = pfm_mont_pmd_desc, -- .pmc_desc = pfm_mont_pmc_desc, -- .num_ibrs = 8, -- .num_dbrs = 8, -- .use_rr_dbregs = 1 /* debug register are use for range retrictions */ --}; -diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c -index 3ab8373..a7dfb39 100644 ---- a/arch/ia64/kernel/process.c -+++ b/arch/ia64/kernel/process.c -@@ -28,6 +28,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -45,10 +46,6 @@ - - #include "entry.h" - --#ifdef CONFIG_PERFMON --# include --#endif -- - #include "sigframe.h" - - void (*ia64_mark_idle)(int); -@@ -162,10 +159,8 @@ show_regs (struct pt_regs *regs) - - void tsk_clear_notify_resume(struct task_struct *tsk) - { --#ifdef CONFIG_PERFMON -- if (tsk->thread.pfm_needs_checking) -+ if (test_ti_thread_flag(task_thread_info(tsk), TIF_PERFMON_WORK)) - return; --#endif - if (test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_RSE)) - return; - clear_ti_thread_flag(task_thread_info(tsk), TIF_NOTIFY_RESUME); -@@ -188,14 +183,9 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall) - return; - } - --#ifdef CONFIG_PERFMON -- if (current->thread.pfm_needs_checking) -- /* -- * Note: pfm_handle_work() allow us to call it with interrupts -- * disabled, and may enable interrupts within the function. -- */ -- pfm_handle_work(); --#endif -+ /* process perfmon asynchronous work (e.g. block thread or reset) */ -+ if (test_thread_flag(TIF_PERFMON_WORK)) -+ pfm_handle_work(task_pt_regs(current)); - - /* deal with pending signal delivery */ - if (test_thread_flag(TIF_SIGPENDING)) { -@@ -212,22 +202,15 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall) - local_irq_disable(); /* force interrupt disable */ - } - --static int pal_halt = 1; - static int can_do_pal_halt = 1; - - static int __init nohalt_setup(char * str) - { -- pal_halt = can_do_pal_halt = 0; -+ can_do_pal_halt = 0; - return 1; - } - __setup("nohalt", nohalt_setup); - --void --update_pal_halt_status(int status) --{ -- can_do_pal_halt = pal_halt && status; --} -- - /* - * We use this if we don't have any better idle routine.. - */ -@@ -236,6 +219,22 @@ default_idle (void) - { - local_irq_enable(); - while (!need_resched()) { -+#ifdef CONFIG_PERFMON -+ u64 psr = 0; -+ /* -+ * If requested, we stop the PMU to avoid -+ * measuring across the core idle loop. -+ * -+ * dcr.pp is not modified on purpose -+ * it is used when coming out of -+ * safe_halt() via interrupt -+ */ -+ if ((__get_cpu_var(pfm_syst_info) & PFM_ITA_CPUINFO_IDLE_EXCL)) { -+ psr = ia64_getreg(_IA64_REG_PSR); -+ if (psr & IA64_PSR_PP) -+ ia64_rsm(IA64_PSR_PP); -+ } -+#endif - if (can_do_pal_halt) { - local_irq_disable(); - if (!need_resched()) { -@@ -244,6 +243,12 @@ default_idle (void) - local_irq_enable(); - } else - cpu_relax(); -+#ifdef CONFIG_PERFMON -+ if ((__get_cpu_var(pfm_syst_info) & PFM_ITA_CPUINFO_IDLE_EXCL)) { -+ if (psr & IA64_PSR_PP) -+ ia64_ssm(IA64_PSR_PP); -+ } -+#endif - } - } - -@@ -344,22 +349,9 @@ cpu_idle (void) - void - ia64_save_extra (struct task_struct *task) - { --#ifdef CONFIG_PERFMON -- unsigned long info; --#endif -- - if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0) - ia64_save_debug_regs(&task->thread.dbr[0]); - --#ifdef CONFIG_PERFMON -- if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) -- pfm_save_regs(task); -- -- info = __get_cpu_var(pfm_syst_info); -- if (info & PFM_CPUINFO_SYST_WIDE) -- pfm_syst_wide_update_task(task, info, 0); --#endif -- - #ifdef CONFIG_IA32_SUPPORT - if (IS_IA32_PROCESS(task_pt_regs(task))) - ia32_save_state(task); -@@ -369,22 +361,9 @@ ia64_save_extra (struct task_struct *task) - void - ia64_load_extra (struct task_struct *task) - { --#ifdef CONFIG_PERFMON -- unsigned long info; --#endif -- - if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0) - ia64_load_debug_regs(&task->thread.dbr[0]); - --#ifdef CONFIG_PERFMON -- if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) -- pfm_load_regs(task); -- -- info = __get_cpu_var(pfm_syst_info); -- if (info & PFM_CPUINFO_SYST_WIDE) -- pfm_syst_wide_update_task(task, info, 1); --#endif -- - #ifdef CONFIG_IA32_SUPPORT - if (IS_IA32_PROCESS(task_pt_regs(task))) - ia32_load_state(task); -@@ -510,8 +489,7 @@ copy_thread (int nr, unsigned long clone_flags, - * call behavior where scratch registers are preserved across - * system calls (unless used by the system call itself). - */ --# define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID \ -- | IA64_THREAD_PM_VALID) -+# define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID) - # define THREAD_FLAGS_TO_SET 0 - p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR) - | THREAD_FLAGS_TO_SET); -@@ -533,10 +511,8 @@ copy_thread (int nr, unsigned long clone_flags, - } - #endif - --#ifdef CONFIG_PERFMON -- if (current->thread.pfm_context) -- pfm_inherit(p, child_ptregs); --#endif -+ pfm_copy_thread(p); -+ - return retval; - } - -@@ -745,15 +721,13 @@ exit_thread (void) - { - - ia64_drop_fpu(current); --#ifdef CONFIG_PERFMON -- /* if needed, stop monitoring and flush state to perfmon context */ -- if (current->thread.pfm_context) -- pfm_exit_thread(current); -+ -+ /* if needed, stop monitoring and flush state to perfmon context */ -+ pfm_exit_thread(); - - /* free debug register resources */ -- if (current->thread.flags & IA64_THREAD_DBG_VALID) -- pfm_release_debug_registers(current); --#endif -+ pfm_release_dbregs(current); -+ - if (IS_IA32_PROCESS(task_pt_regs(current))) - ia32_drop_ia64_partial_page_list(current); - } -diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c -index 2a9943b..bb1ca1e 100644 ---- a/arch/ia64/kernel/ptrace.c -+++ b/arch/ia64/kernel/ptrace.c -@@ -20,6 +20,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -30,9 +31,6 @@ - #include - #include - #include --#ifdef CONFIG_PERFMON --#include --#endif - - #include "entry.h" - -@@ -2124,7 +2122,6 @@ access_uarea(struct task_struct *child, unsigned long addr, - "address 0x%lx\n", addr); - return -1; - } --#ifdef CONFIG_PERFMON - /* - * Check if debug registers are used by perfmon. This - * test must be done once we know that we can do the -@@ -2142,9 +2139,8 @@ access_uarea(struct task_struct *child, unsigned long addr, - * IA64_THREAD_DBG_VALID. The registers are restored - * by the PMU context switch code. - */ -- if (pfm_use_debug_registers(child)) -+ if (pfm_use_dbregs(child)) - return -1; --#endif - - if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) { - child->thread.flags |= IA64_THREAD_DBG_VALID; -diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c -index de636b2..677fa68 100644 ---- a/arch/ia64/kernel/setup.c -+++ b/arch/ia64/kernel/setup.c -@@ -45,6 +45,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -1051,6 +1052,8 @@ cpu_init (void) - } - platform_cpu_init(); - pm_idle = default_idle; -+ -+ pfm_init_percpu(); - } - - void __init -diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c -index d8f05e5..3d7a739 100644 ---- a/arch/ia64/kernel/smpboot.c -+++ b/arch/ia64/kernel/smpboot.c -@@ -39,6 +39,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -381,10 +382,6 @@ smp_callin (void) - extern void ia64_init_itm(void); - extern volatile int time_keeper_id; - --#ifdef CONFIG_PERFMON -- extern void pfm_init_percpu(void); --#endif -- - cpuid = smp_processor_id(); - phys_id = hard_smp_processor_id(); - itc_master = time_keeper_id; -@@ -410,10 +407,6 @@ smp_callin (void) - - ia64_mca_cmc_vector_setup(); /* Setup vector on AP */ - --#ifdef CONFIG_PERFMON -- pfm_init_percpu(); --#endif -- - local_irq_enable(); - - if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) { -@@ -751,6 +744,7 @@ int __cpu_disable(void) - cpu_clear(cpu, cpu_online_map); - local_flush_tlb_all(); - cpu_clear(cpu, cpu_callin_map); -+ pfm_cpu_disable(); - return 0; - } - -diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c -index bcbb6d8..a0ed33a 100644 ---- a/arch/ia64/kernel/sys_ia64.c -+++ b/arch/ia64/kernel/sys_ia64.c -@@ -284,3 +284,11 @@ sys_pciconfig_write (unsigned long bus, unsigned long dfn, unsigned long off, un - } - - #endif /* CONFIG_PCI */ -+ -+#ifndef CONFIG_IA64_PERFMON_COMPAT -+asmlinkage long -+sys_perfmonctl (int fd, int cmd, void __user *arg, int count) -+{ -+ return -ENOSYS; -+} -+#endif -diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile -index 98771e2..077fd09 100644 ---- a/arch/ia64/lib/Makefile -+++ b/arch/ia64/lib/Makefile -@@ -13,7 +13,6 @@ lib-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \ - - obj-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o - obj-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o --lib-$(CONFIG_PERFMON) += carta_random.o - - AFLAGS___divdi3.o = - AFLAGS___udivdi3.o = -DUNSIGNED -diff --git a/arch/ia64/oprofile/init.c b/arch/ia64/oprofile/init.c -index 125a602..892de6a 100644 ---- a/arch/ia64/oprofile/init.c -+++ b/arch/ia64/oprofile/init.c -@@ -12,8 +12,8 @@ - #include - #include - --extern int perfmon_init(struct oprofile_operations * ops); --extern void perfmon_exit(void); -+extern int op_perfmon_init(struct oprofile_operations * ops); -+extern void op_perfmon_exit(void); - extern void ia64_backtrace(struct pt_regs * const regs, unsigned int depth); - - int __init oprofile_arch_init(struct oprofile_operations * ops) -@@ -22,7 +22,7 @@ int __init oprofile_arch_init(struct oprofile_operations * ops) - - #ifdef CONFIG_PERFMON - /* perfmon_init() can fail, but we have no way to report it */ -- ret = perfmon_init(ops); -+ ret = op_perfmon_init(ops); - #endif - ops->backtrace = ia64_backtrace; - -@@ -33,6 +33,6 @@ int __init oprofile_arch_init(struct oprofile_operations * ops) - void oprofile_arch_exit(void) - { - #ifdef CONFIG_PERFMON -- perfmon_exit(); -+ op_perfmon_exit(); - #endif - } -diff --git a/arch/ia64/oprofile/perfmon.c b/arch/ia64/oprofile/perfmon.c -index bc41dd3..6fa9d17 100644 ---- a/arch/ia64/oprofile/perfmon.c -+++ b/arch/ia64/oprofile/perfmon.c -@@ -10,25 +10,30 @@ - #include - #include - #include --#include -+#include -+#include - #include - #include - - static int allow_ints; - - static int --perfmon_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, -- struct pt_regs *regs, unsigned long stamp) -+perfmon_handler(struct pfm_context *ctx, -+ unsigned long ip, u64 stamp, void *data) - { -- int event = arg->pmd_eventid; -+ struct pt_regs *regs; -+ struct pfm_ovfl_arg *arg; -+ -+ regs = data; -+ arg = &ctx->ovfl_arg; - -- arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; -+ arg->ovfl_ctrl = PFM_OVFL_CTRL_RESET; - - /* the owner of the oprofile event buffer may have exited - * without perfmon being shutdown (e.g. SIGSEGV) - */ - if (allow_ints) -- oprofile_add_sample(regs, event); -+ oprofile_add_sample(regs, arg->pmd_eventid); - return 0; - } - -@@ -45,17 +50,13 @@ static void perfmon_stop(void) - allow_ints = 0; - } - -- --#define OPROFILE_FMT_UUID { \ -- 0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69, 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c } -- --static pfm_buffer_fmt_t oprofile_fmt = { -- .fmt_name = "oprofile_format", -- .fmt_uuid = OPROFILE_FMT_UUID, -- .fmt_handler = perfmon_handler, -+static struct pfm_smpl_fmt oprofile_fmt = { -+ .fmt_name = "OProfile", -+ .fmt_handler = perfmon_handler, -+ .fmt_flags = PFM_FMT_BUILTIN_FLAG, -+ .owner = THIS_MODULE - }; - -- - static char * get_cpu_type(void) - { - __u8 family = local_cpu_data->family; -@@ -75,9 +76,9 @@ static char * get_cpu_type(void) - - static int using_perfmon; - --int perfmon_init(struct oprofile_operations * ops) -+int __init op_perfmon_init(struct oprofile_operations * ops) - { -- int ret = pfm_register_buffer_fmt(&oprofile_fmt); -+ int ret = pfm_fmt_register(&oprofile_fmt); - if (ret) - return -ENODEV; - -@@ -90,10 +91,10 @@ int perfmon_init(struct oprofile_operations * ops) - } - - --void perfmon_exit(void) -+void __exit op_perfmon_exit(void) - { - if (!using_perfmon) - return; - -- pfm_unregister_buffer_fmt(oprofile_fmt.fmt_uuid); -+ pfm_fmt_unregister(&oprofile_fmt); - } -diff --git a/arch/ia64/perfmon/Kconfig b/arch/ia64/perfmon/Kconfig -new file mode 100644 -index 0000000..99c68bd ---- /dev/null -+++ b/arch/ia64/perfmon/Kconfig -@@ -0,0 +1,67 @@ -+menu "Hardware Performance Monitoring support" -+config PERFMON -+ bool "Perfmon2 performance monitoring interface" -+ default n -+ help -+ Enables the perfmon2 interface to access the hardware -+ performance counters. See for -+ more details. -+ -+config PERFMON_DEBUG -+ bool "Perfmon debugging" -+ default n -+ depends on PERFMON -+ help -+ Enables perfmon debugging support -+ -+config PERFMON_DEBUG_FS -+ bool "Enable perfmon statistics reporting via debugfs" -+ default y -+ depends on PERFMON && DEBUG_FS -+ help -+ Enable collection and reporting of perfmon timing statistics under -+ debugfs. This is used for debugging and performance analysis of the -+ subsystem. The debugfs filesystem must be mounted. -+ -+config IA64_PERFMON_COMPAT -+ bool "Enable old perfmon-2 compatbility mode" -+ default n -+ depends on PERFMON -+ help -+ Enable this option to allow performance tools which used the old -+ perfmon-2 interface to continue to work. Old tools are those using -+ the obsolete commands and arguments. Check your programs and look -+ in include/asm-ia64/perfmon_compat.h for more information. -+ -+config IA64_PERFMON_GENERIC -+ tristate "Generic IA-64 PMU support" -+ depends on PERFMON -+ default n -+ help -+ Enables generic IA-64 PMU support. -+ The generic PMU is defined by the IA-64 architecture document. -+ This option should only be necessary when running with a PMU that -+ is not yet explicitely supported. Even then, there is no guarantee -+ that this support will work. -+ -+config IA64_PERFMON_ITANIUM -+ tristate "Itanium (Merced) Performance Monitoring support" -+ depends on PERFMON -+ default n -+ help -+ Enables Itanium (Merced) PMU support. -+ -+config IA64_PERFMON_MCKINLEY -+ tristate "Itanium 2 (McKinley) Performance Monitoring support" -+ depends on PERFMON -+ default n -+ help -+ Enables Itanium 2 (McKinley, Madison, Deerfield) PMU support. -+ -+config IA64_PERFMON_MONTECITO -+ tristate "Itanium 2 9000 (Montecito) Performance Monitoring support" -+ depends on PERFMON -+ default n -+ help -+ Enables support for Itanium 2 9000 (Montecito) PMU. -+endmenu -diff --git a/arch/ia64/perfmon/Makefile b/arch/ia64/perfmon/Makefile -new file mode 100644 -index 0000000..c9cdf9f ---- /dev/null -+++ b/arch/ia64/perfmon/Makefile -@@ -0,0 +1,11 @@ -+# -+# Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. -+# Contributed by Stephane Eranian -+# -+obj-$(CONFIG_PERFMON) += perfmon.o -+obj-$(CONFIG_IA64_PERFMON_COMPAT) += perfmon_default_smpl.o \ -+ perfmon_compat.o -+obj-$(CONFIG_IA64_PERFMON_GENERIC) += perfmon_generic.o -+obj-$(CONFIG_IA64_PERFMON_ITANIUM) += perfmon_itanium.o -+obj-$(CONFIG_IA64_PERFMON_MCKINLEY) += perfmon_mckinley.o -+obj-$(CONFIG_IA64_PERFMON_MONTECITO) += perfmon_montecito.o -diff --git a/arch/ia64/perfmon/perfmon.c b/arch/ia64/perfmon/perfmon.c -new file mode 100644 -index 0000000..3f59410 ---- /dev/null -+++ b/arch/ia64/perfmon/perfmon.c -@@ -0,0 +1,946 @@ -+/* -+ * This file implements the IA-64 specific -+ * support for the perfmon2 interface -+ * -+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+ -+struct pfm_arch_session { -+ u32 pfs_sys_use_dbr; /* syswide session uses dbr */ -+ u32 pfs_ptrace_use_dbr; /* a thread uses dbr via ptrace()*/ -+}; -+ -+DEFINE_PER_CPU(u32, pfm_syst_info); -+ -+static struct pfm_arch_session pfm_arch_sessions; -+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_arch_sessions_lock); -+ -+static inline void pfm_clear_psr_pp(void) -+{ -+ ia64_rsm(IA64_PSR_PP); -+} -+ -+static inline void pfm_set_psr_pp(void) -+{ -+ ia64_ssm(IA64_PSR_PP); -+} -+ -+static inline void pfm_clear_psr_up(void) -+{ -+ ia64_rsm(IA64_PSR_UP); -+} -+ -+static inline void pfm_set_psr_up(void) -+{ -+ ia64_ssm(IA64_PSR_UP); -+} -+ -+static inline void pfm_set_psr_l(u64 val) -+{ -+ ia64_setreg(_IA64_REG_PSR_L, val); -+} -+ -+static inline void pfm_restore_ibrs(u64 *ibrs, unsigned int nibrs) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < nibrs; i++) { -+ ia64_set_ibr(i, ibrs[i]); -+ ia64_dv_serialize_instruction(); -+ } -+ ia64_srlz_i(); -+} -+ -+static inline void pfm_restore_dbrs(u64 *dbrs, unsigned int ndbrs) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < ndbrs; i++) { -+ ia64_set_dbr(i, dbrs[i]); -+ ia64_dv_serialize_data(); -+ } -+ ia64_srlz_d(); -+} -+ -+irqreturn_t pmu_interrupt_handler(int irq, void *arg) -+{ -+ struct pt_regs *regs; -+ regs = get_irq_regs(); -+ irq_enter(); -+ pfm_interrupt_handler(instruction_pointer(regs), regs); -+ irq_exit(); -+ return IRQ_HANDLED; -+} -+static struct irqaction perfmon_irqaction = { -+ .handler = pmu_interrupt_handler, -+ .flags = IRQF_DISABLED, /* means keep interrupts masked */ -+ .name = "perfmon" -+}; -+ -+void pfm_arch_quiesce_pmu_percpu(void) -+{ -+ u64 dcr; -+ /* -+ * make sure no measurement is active -+ * (may inherit programmed PMCs from EFI). -+ */ -+ pfm_clear_psr_pp(); -+ pfm_clear_psr_up(); -+ -+ /* -+ * ensure dcr.pp is cleared -+ */ -+ dcr = ia64_getreg(_IA64_REG_CR_DCR); -+ ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); -+ -+ /* -+ * we run with the PMU not frozen at all times -+ */ -+ ia64_set_pmc(0, 0); -+ ia64_srlz_d(); -+} -+ -+void pfm_arch_init_percpu(void) -+{ -+ pfm_arch_quiesce_pmu_percpu(); -+ /* -+ * program PMU interrupt vector -+ */ -+ ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); -+ ia64_srlz_d(); -+} -+ -+int pfm_arch_context_create(struct pfm_context *ctx, u32 ctx_flags) -+{ -+ struct pfm_arch_context *ctx_arch; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ -+ ctx_arch->flags.use_dbr = 0; -+ ctx_arch->flags.insecure = (ctx_flags & PFM_ITA_FL_INSECURE) ? 1: 0; -+ -+ PFM_DBG("insecure=%d", ctx_arch->flags.insecure); -+ -+ return 0; -+} -+ -+/* -+ * Called from pfm_ctxsw(). Task is guaranteed to be current. -+ * Context is locked. Interrupts are masked. Monitoring may be active. -+ * PMU access is guaranteed. PMC and PMD registers are live in PMU. -+ * -+ * Return: -+ * non-zero : did not save PMDs (as part of stopping the PMU) -+ * 0 : saved PMDs (no need to save them in caller) -+ */ -+int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx) -+{ -+ struct pfm_arch_context *ctx_arch; -+ struct pfm_event_set *set; -+ u64 psr, tmp; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ set = ctx->active_set; -+ -+ /* -+ * save current PSR: needed because we modify it -+ */ -+ ia64_srlz_d(); -+ psr = ia64_getreg(_IA64_REG_PSR); -+ -+ /* -+ * stop monitoring: -+ * This is the last instruction which may generate an overflow -+ * -+ * we do not clear ipsr.up -+ */ -+ pfm_clear_psr_up(); -+ ia64_srlz_d(); -+ -+ /* -+ * extract overflow status bits -+ */ -+ tmp = ia64_get_pmc(0) & ~0xf; -+ -+ /* -+ * keep a copy of psr.up (for reload) -+ */ -+ ctx_arch->ctx_saved_psr_up = psr & IA64_PSR_UP; -+ -+ /* -+ * save overflow status bits -+ */ -+ set->povfl_pmds[0] = tmp; -+ -+ /* -+ * record how many pending overflows -+ * XXX: assume identity mapping for counters -+ */ -+ set->npend_ovfls = ia64_popcnt(tmp); -+ -+ /* -+ * make sure the PMU is unfrozen for the next task -+ */ -+ if (set->npend_ovfls) { -+ ia64_set_pmc(0, 0); -+ ia64_srlz_d(); -+ } -+ return 1; -+} -+ -+/* -+ * Called from pfm_ctxsw(). Task is guaranteed to be current. -+ * set cannot be NULL. Context is locked. Interrupts are masked. -+ * Caller has already restored all PMD and PMC registers. -+ * -+ * must reactivate monitoring -+ */ -+void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx) -+{ -+ struct pfm_arch_context *ctx_arch; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ -+ /* -+ * when monitoring is not explicitly started -+ * then psr_up = 0, in which case we do not -+ * need to restore -+ */ -+ if (likely(ctx_arch->ctx_saved_psr_up)) { -+ pfm_set_psr_up(); -+ ia64_srlz_d(); -+ } -+} -+ -+int pfm_arch_reserve_session(struct pfm_context *ctx, u32 cpu) -+{ -+ struct pfm_arch_context *ctx_arch; -+ int is_system; -+ int ret = 0; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ is_system = ctx->flags.system; -+ -+ spin_lock(&pfm_arch_sessions_lock); -+ -+ if (is_system && ctx_arch->flags.use_dbr) { -+ PFM_DBG("syswide context uses dbregs"); -+ -+ if (pfm_arch_sessions.pfs_ptrace_use_dbr) { -+ PFM_DBG("cannot reserve syswide context: " -+ "dbregs in use by ptrace"); -+ ret = -EBUSY; -+ } else { -+ pfm_arch_sessions.pfs_sys_use_dbr++; -+ } -+ } -+ spin_unlock(&pfm_arch_sessions_lock); -+ -+ return ret; -+} -+ -+void pfm_arch_release_session(struct pfm_context *ctx, u32 cpu) -+{ -+ struct pfm_arch_context *ctx_arch; -+ int is_system; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ is_system = ctx->flags.system; -+ -+ spin_lock(&pfm_arch_sessions_lock); -+ -+ if (is_system && ctx_arch->flags.use_dbr) -+ pfm_arch_sessions.pfs_sys_use_dbr--; -+ spin_unlock(&pfm_arch_sessions_lock); -+} -+ -+/* -+ * function called from pfm_load_context_*(). Task is not guaranteed to be -+ * current task. If not then other task is guaranteed stopped and off any CPU. -+ * context is locked and interrupts are masked. -+ * -+ * On PFM_LOAD_CONTEXT, the interface guarantees monitoring is stopped. -+ * -+ * For system-wide task is NULL -+ */ -+int pfm_arch_load_context(struct pfm_context *ctx) -+{ -+ struct pfm_arch_context *ctx_arch; -+ struct pt_regs *regs; -+ int ret = 0; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ -+ /* -+ * cannot load a context which is using range restrictions, -+ * into a thread that is being debugged. -+ * -+ * if one set out of several is using the debug registers, then -+ * we assume the context as whole is using them. -+ */ -+ if (ctx_arch->flags.use_dbr) { -+ if (ctx->flags.system) { -+ spin_lock(&pfm_arch_sessions_lock); -+ -+ if (pfm_arch_sessions.pfs_ptrace_use_dbr) { -+ PFM_DBG("cannot reserve syswide context: " -+ "dbregs in use by ptrace"); -+ ret = -EBUSY; -+ } else { -+ pfm_arch_sessions.pfs_sys_use_dbr++; -+ PFM_DBG("pfs_sys_use_dbr=%u", -+ pfm_arch_sessions.pfs_sys_use_dbr); -+ } -+ spin_unlock(&pfm_arch_sessions_lock); -+ -+ } else if (ctx->task->thread.flags & IA64_THREAD_DBG_VALID) { -+ PFM_DBG("load_pid [%d] thread is debugged, cannot " -+ "use range restrictions", ctx->task->pid); -+ ret = -EBUSY; -+ } -+ if (ret) -+ return ret; -+ } -+ -+ /* -+ * We need to intervene on context switch to toggle the -+ * psr.pp bit in system-wide. As such, we set the TIF -+ * flag so that pfm_arch_ctxswout_sys() and the -+ * pfm_arch_ctxswin_sys() functions get called -+ * from pfm_ctxsw_sys(); -+ */ -+ if (ctx->flags.system) { -+ set_thread_flag(TIF_PERFMON_CTXSW); -+ PFM_DBG("[%d] set TIF", current->pid); -+ return 0; -+ } -+ -+ regs = task_pt_regs(ctx->task); -+ -+ /* -+ * self-monitoring systematically allows user level control -+ */ -+ if (ctx->task != current) { -+ /* -+ * when not current, task is stopped, so this is safe -+ */ -+ ctx_arch->ctx_saved_psr_up = 0; -+ ia64_psr(regs)->up = ia64_psr(regs)->pp = 0; -+ } else -+ ctx_arch->flags.insecure = 1; -+ -+ /* -+ * allow user level control (start/stop/read pmd) if: -+ * - self-monitoring -+ * - requested at context creation (PFM_IA64_FL_INSECURE) -+ * -+ * There is not security hole with PFM_IA64_FL_INSECURE because -+ * when not self-monitored, the caller must have permissions to -+ * attached to the task. -+ */ -+ if (ctx_arch->flags.insecure) { -+ ia64_psr(regs)->sp = 0; -+ PFM_DBG("clearing psr.sp for [%d]", ctx->task->pid); -+ } -+ return 0; -+} -+ -+int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags) -+{ -+#define PFM_SETFL_BOTH_SWITCH (PFM_SETFL_OVFL_SWITCH|PFM_SETFL_TIME_SWITCH) -+#define PFM_ITA_SETFL_BOTH_INTR (PFM_ITA_SETFL_INTR_ONLY|\ -+ PFM_ITA_SETFL_EXCL_INTR) -+ -+/* exclude return value field */ -+#define PFM_SETFL_ALL_MASK (PFM_ITA_SETFL_BOTH_INTR \ -+ | PFM_SETFL_BOTH_SWITCH \ -+ | PFM_ITA_SETFL_IDLE_EXCL) -+ -+ if ((flags & ~PFM_SETFL_ALL_MASK)) { -+ PFM_DBG("invalid flags=0x%x", flags); -+ return -EINVAL; -+ } -+ -+ if ((flags & PFM_ITA_SETFL_BOTH_INTR) == PFM_ITA_SETFL_BOTH_INTR) { -+ PFM_DBG("both excl intr and ontr only are set"); -+ return -EINVAL; -+ } -+ -+ if ((flags & PFM_ITA_SETFL_IDLE_EXCL) && !ctx->flags.system) { -+ PFM_DBG("idle exclude flag only for system-wide context"); -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+/* -+ * function called from pfm_unload_context_*(). Context is locked. -+ * interrupts are masked. task is not guaranteed to be current task. -+ * Access to PMU is not guaranteed. -+ * -+ * function must do whatever arch-specific action is required on unload -+ * of a context. -+ * -+ * called for both system-wide and per-thread. task is NULL for ssytem-wide -+ */ -+void pfm_arch_unload_context(struct pfm_context *ctx) -+{ -+ struct pfm_arch_context *ctx_arch; -+ struct pt_regs *regs; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ -+ if (ctx->flags.system) { -+ /* -+ * disable context switch hook -+ */ -+ clear_thread_flag(TIF_PERFMON_CTXSW); -+ -+ if (ctx_arch->flags.use_dbr) { -+ spin_lock(&pfm_arch_sessions_lock); -+ pfm_arch_sessions.pfs_sys_use_dbr--; -+ PFM_DBG("sys_use_dbr=%u", pfm_arch_sessions.pfs_sys_use_dbr); -+ spin_unlock(&pfm_arch_sessions_lock); -+ } -+ } else { -+ regs = task_pt_regs(ctx->task); -+ -+ /* -+ * cancel user level control for per-task context -+ */ -+ ia64_psr(regs)->sp = 1; -+ PFM_DBG("setting psr.sp for [%d]", ctx->task->pid); -+ } -+} -+ -+/* -+ * mask monitoring by setting the privilege level to 0 -+ * we cannot use psr.pp/psr.up for this, it is controlled by -+ * the user -+ */ -+void pfm_arch_mask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ struct pfm_arch_pmu_info *arch_info; -+ unsigned long mask; -+ unsigned int i; -+ -+ arch_info = pfm_pmu_info(); -+ /* -+ * as an optimization we look at the first 64 PMC -+ * registers only starting at PMC4. -+ */ -+ mask = arch_info->mask_pmcs[0] >> PFM_ITA_FCNTR; -+ for (i = PFM_ITA_FCNTR; mask; i++, mask >>= 1) { -+ if (likely(mask & 0x1)) -+ ia64_set_pmc(i, set->pmcs[i] & ~0xfUL); -+ } -+ /* -+ * make changes visisble -+ */ -+ ia64_srlz_d(); -+} -+ -+/* -+ * function called from pfm_switch_sets(), pfm_context_load_thread(), -+ * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets() -+ * context is locked. Interrupts are masked. set cannot be NULL. -+ * Access to the PMU is guaranteed. -+ * -+ * function must restore all PMD registers from set. -+ */ -+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ struct pfm_arch_context *ctx_arch; -+ unsigned long *mask; -+ u16 i, num; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ -+ if (ctx_arch->flags.insecure) { -+ num = ctx->regs.num_rw_pmd; -+ mask = ctx->regs.rw_pmds; -+ } else { -+ num = set->nused_pmds; -+ mask = set->used_pmds; -+ } -+ /* -+ * must restore all implemented read-write PMDS to avoid leaking -+ * information especially when PFM_IA64_FL_INSECURE is set. -+ * -+ * XXX: should check PFM_IA64_FL_INSECURE==0 and use used_pmd instead -+ */ -+ for (i = 0; num; i++) { -+ if (likely(test_bit(i, mask))) { -+ pfm_arch_write_pmd(ctx, i, set->pmds[i].value); -+ num--; -+ } -+ } -+ ia64_srlz_d(); -+} -+ -+/* -+ * function called from pfm_switch_sets(), pfm_context_load_thread(), -+ * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets() -+ * context is locked. Interrupts are masked. set cannot be NULL. -+ * Access to the PMU is guaranteed. -+ * -+ * function must restore all PMC registers from set if needed -+ */ -+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ struct pfm_arch_pmu_info *arch_info; -+ u64 mask2 = 0, val, plm; -+ unsigned long impl_mask, mask_pmcs; -+ unsigned int i; -+ -+ arch_info = pfm_pmu_info(); -+ /* -+ * as an optimization we only look at the first 64 -+ * PMC registers. In fact, we should never scan the -+ * entire impl_pmcs because ibr/dbr are implemented -+ * separately. -+ * -+ * always skip PMC0-PMC3. PMC0 taken care of when saving -+ * state. PMC1-PMC3 not used until we get counters in -+ * the 60 and above index range. -+ */ -+ impl_mask = ctx->regs.pmcs[0] >> PFM_ITA_FCNTR; -+ mask_pmcs = arch_info->mask_pmcs[0] >> PFM_ITA_FCNTR; -+ plm = ctx->state == PFM_CTX_MASKED ? ~0xf : ~0x0; -+ -+ for (i = PFM_ITA_FCNTR; -+ impl_mask; -+ i++, impl_mask >>= 1, mask_pmcs >>= 1) { -+ if (likely(impl_mask & 0x1)) { -+ mask2 = mask_pmcs & 0x1 ? plm : ~0; -+ val = set->pmcs[i] & mask2; -+ ia64_set_pmc(i, val); -+ PFM_DBG_ovfl("pmc%u=0x%lx", i, val); -+ } -+ } -+ /* -+ * restore DBR/IBR -+ */ -+ if (set->priv_flags & PFM_ITA_SETFL_USE_DBR) { -+ pfm_restore_ibrs(set->pmcs+256, 8); -+ pfm_restore_dbrs(set->pmcs+264, 8); -+ } -+ ia64_srlz_d(); -+} -+ -+void pfm_arch_unmask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ u64 psr; -+ int is_system; -+ -+ is_system = ctx->flags.system; -+ -+ psr = ia64_getreg(_IA64_REG_PSR); -+ -+ /* -+ * monitoring is masked via the PMC.plm -+ * -+ * As we restore their value, we do not want each counter to -+ * restart right away. We stop monitoring using the PSR, -+ * restore the PMC (and PMD) and then re-establish the psr -+ * as it was. Note that there can be no pending overflow at -+ * this point, because monitoring is still MASKED. -+ * -+ * Because interrupts are masked we can avoid changing -+ * DCR.pp. -+ */ -+ if (is_system) -+ pfm_clear_psr_pp(); -+ else -+ pfm_clear_psr_up(); -+ -+ ia64_srlz_d(); -+ -+ pfm_arch_restore_pmcs(ctx, set); -+ -+ /* -+ * restore psr -+ * -+ * monitoring may start right now but interrupts -+ * are still masked -+ */ -+ pfm_set_psr_l(psr); -+ ia64_srlz_d(); -+} -+ -+/* -+ * Called from pfm_stop() -+ * -+ * For per-thread: -+ * task is not necessarily current. If not current task, then -+ * task is guaranteed stopped and off any cpu. Access to PMU -+ * is not guaranteed. Interrupts are masked. Context is locked. -+ * Set is the active set. -+ * -+ * must disable active monitoring. ctx cannot be NULL -+ */ -+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx) -+{ -+ struct pfm_arch_context *ctx_arch; -+ struct pt_regs *regs; -+ u64 dcr, psr; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ regs = task_pt_regs(task); -+ -+ if (!ctx->flags.system) { -+ /* -+ * in ZOMBIE state we always have task == current due to -+ * pfm_exit_thread() -+ */ -+ ia64_psr(regs)->up = 0; -+ ctx_arch->ctx_saved_psr_up = 0; -+ -+ /* -+ * in case of ZOMBIE state, there is no unload to clear -+ * insecure monitoring, so we do it in stop instead. -+ */ -+ if (ctx->state == PFM_CTX_ZOMBIE) -+ ia64_psr(regs)->sp = 1; -+ -+ if (task == current) { -+ pfm_clear_psr_up(); -+ ia64_srlz_d(); -+ } -+ } else if (ctx->flags.started) { /* do not stop twice */ -+ dcr = ia64_getreg(_IA64_REG_CR_DCR); -+ psr = ia64_getreg(_IA64_REG_PSR); -+ -+ ia64_psr(regs)->pp = 0; -+ ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); -+ pfm_clear_psr_pp(); -+ ia64_srlz_d(); -+ -+ if (ctx->active_set->flags & PFM_ITA_SETFL_IDLE_EXCL) { -+ PFM_DBG("disabling idle exclude"); -+ __get_cpu_var(pfm_syst_info) &= ~PFM_ITA_CPUINFO_IDLE_EXCL; -+ } -+ } -+} -+ -+/* -+ * called from pfm_start() -+ * -+ * Interrupts are masked. Context is locked. Set is the active set. -+ * -+ * For per-thread: -+ * Task is not necessarily current. If not current task, then task -+ * is guaranteed stopped and off any cpu. No access to PMU is task -+ * is not current. -+ * -+ * For system-wide: -+ * task is always current -+ * -+ * must enable active monitoring. -+ */ -+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx) -+{ -+ struct pfm_arch_context *ctx_arch; -+ struct pt_regs *regs; -+ u64 dcr, dcr_pp, psr_pp; -+ u32 flags; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ regs = task_pt_regs(task); -+ flags = ctx->active_set->flags; -+ -+ /* -+ * per-thread mode -+ */ -+ if (!ctx->flags.system) { -+ -+ ia64_psr(regs)->up = 1; -+ -+ if (task == current) { -+ pfm_set_psr_up(); -+ ia64_srlz_d(); -+ } else { -+ /* -+ * activate monitoring at next ctxswin -+ */ -+ ctx_arch->ctx_saved_psr_up = IA64_PSR_UP; -+ } -+ return; -+ } -+ -+ /* -+ * system-wide mode -+ */ -+ dcr = ia64_getreg(_IA64_REG_CR_DCR); -+ if (flags & PFM_ITA_SETFL_INTR_ONLY) { -+ dcr_pp = 1; -+ psr_pp = 0; -+ } else if (flags & PFM_ITA_SETFL_EXCL_INTR) { -+ dcr_pp = 0; -+ psr_pp = 1; -+ } else { -+ dcr_pp = psr_pp = 1; -+ } -+ PFM_DBG("dcr_pp=%lu psr_pp=%lu", dcr_pp, psr_pp); -+ -+ /* -+ * update dcr_pp and psr_pp -+ */ -+ if (dcr_pp) -+ ia64_setreg(_IA64_REG_CR_DCR, dcr | IA64_DCR_PP); -+ else -+ ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); -+ -+ if (psr_pp) { -+ pfm_set_psr_pp(); -+ ia64_psr(regs)->pp = 1; -+ } else { -+ pfm_clear_psr_pp(); -+ ia64_psr(regs)->pp = 0; -+ } -+ ia64_srlz_d(); -+ -+ if (ctx->active_set->flags & PFM_ITA_SETFL_IDLE_EXCL) { -+ PFM_DBG("enable idle exclude"); -+ __get_cpu_var(pfm_syst_info) |= PFM_ITA_CPUINFO_IDLE_EXCL; -+ } -+} -+ -+/* -+ * Only call this function when a process is trying to -+ * write the debug registers (reading is always allowed) -+ * called from arch/ia64/kernel/ptrace.c:access_uarea() -+ */ -+int __pfm_use_dbregs(struct task_struct *task) -+{ -+ struct pfm_arch_context *ctx_arch; -+ struct pfm_context *ctx; -+ unsigned long flags; -+ int ret = 0; -+ -+ PFM_DBG("called for [%d]", task->pid); -+ -+ ctx = task->pfm_context; -+ -+ /* -+ * do it only once -+ */ -+ if (task->thread.flags & IA64_THREAD_DBG_VALID) { -+ PFM_DBG("IA64_THREAD_DBG_VALID already set"); -+ return 0; -+ } -+ if (ctx) { -+ spin_lock_irqsave(&ctx->lock, flags); -+ ctx_arch = pfm_ctx_arch(ctx); -+ -+ if (ctx_arch->flags.use_dbr == 1) { -+ PFM_DBG("PMU using dbregs already, no ptrace access"); -+ ret = -1; -+ } -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ if (ret) -+ return ret; -+ } -+ -+ spin_lock(&pfm_arch_sessions_lock); -+ -+ /* -+ * We cannot allow setting breakpoints when system wide monitoring -+ * sessions are using the debug registers. -+ */ -+ if (!pfm_arch_sessions.pfs_sys_use_dbr) -+ pfm_arch_sessions.pfs_ptrace_use_dbr++; -+ else -+ ret = -1; -+ -+ PFM_DBG("ptrace_use_dbr=%u sys_use_dbr=%u by [%d] ret = %d", -+ pfm_arch_sessions.pfs_ptrace_use_dbr, -+ pfm_arch_sessions.pfs_sys_use_dbr, -+ task->pid, ret); -+ -+ spin_unlock(&pfm_arch_sessions_lock); -+ if (ret) -+ return ret; -+#ifndef CONFIG_SMP -+ /* -+ * in UP, we need to check whether the current -+ * owner of the PMU is not using the debug registers -+ * for monitoring. Because we are using a lazy -+ * save on ctxswout, we must force a save in this -+ * case because the debug registers are being -+ * modified by another task. We save the current -+ * PMD registers, and clear ownership. In ctxswin, -+ * full state will be reloaded. -+ * -+ * Note: we overwrite task. -+ */ -+ task = __get_cpu_var(pmu_owner); -+ ctx = __get_cpu_var(pmu_ctx); -+ -+ if (task == NULL) -+ return 0; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ -+ if (ctx_arch->flags.use_dbr) -+ pfm_save_pmds_release(ctx); -+#endif -+ return 0; -+} -+ -+/* -+ * This function is called for every task that exits with the -+ * IA64_THREAD_DBG_VALID set. This indicates a task which was -+ * able to use the debug registers for debugging purposes via -+ * ptrace(). Therefore we know it was not using them for -+ * perfmormance monitoring, so we only decrement the number -+ * of "ptraced" debug register users to keep the count up to date -+ */ -+int __pfm_release_dbregs(struct task_struct *task) -+{ -+ int ret; -+ -+ spin_lock(&pfm_arch_sessions_lock); -+ -+ if (pfm_arch_sessions.pfs_ptrace_use_dbr == 0) { -+ PFM_ERR("invalid release for [%d] ptrace_use_dbr=0", task->pid); -+ ret = -1; -+ } else { -+ pfm_arch_sessions.pfs_ptrace_use_dbr--; -+ ret = 0; -+ } -+ spin_unlock(&pfm_arch_sessions_lock); -+ -+ return ret; -+} -+ -+int pfm_ia64_mark_dbregs_used(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ struct pfm_arch_context *ctx_arch; -+ struct task_struct *task; -+ struct thread_struct *thread; -+ int ret = 0, state; -+ int i, can_access_pmu = 0; -+ int is_loaded, is_system; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ state = ctx->state; -+ task = ctx->task; -+ is_loaded = state == PFM_CTX_LOADED || state == PFM_CTX_MASKED; -+ is_system = ctx->flags.system; -+ can_access_pmu = __get_cpu_var(pmu_owner) == task || is_system; -+ -+ if (is_loaded == 0) -+ goto done; -+ -+ if (is_system == 0) { -+ thread = &(task->thread); -+ -+ /* -+ * cannot use debug registers for montioring if they are -+ * already used for debugging -+ */ -+ if (thread->flags & IA64_THREAD_DBG_VALID) { -+ PFM_DBG("debug registers already in use for [%d]", -+ task->pid); -+ return -EBUSY; -+ } -+ } -+ -+ /* -+ * check for debug registers in system wide mode -+ */ -+ spin_lock(&pfm_arch_sessions_lock); -+ -+ if (is_system) { -+ if (pfm_arch_sessions.pfs_ptrace_use_dbr) -+ ret = -EBUSY; -+ else -+ pfm_arch_sessions.pfs_sys_use_dbr++; -+ } -+ -+ spin_unlock(&pfm_arch_sessions_lock); -+ -+ if (ret != 0) -+ return ret; -+ -+ /* -+ * clear hardware registers to make sure we don't -+ * pick up stale state. -+ */ -+ if (can_access_pmu) { -+ PFM_DBG("clearing ibrs, dbrs"); -+ for (i = 0; i < 8; i++) { -+ ia64_set_ibr(i, 0); -+ ia64_dv_serialize_instruction(); -+ } -+ ia64_srlz_i(); -+ for (i = 0; i < 8; i++) { -+ ia64_set_dbr(i, 0); -+ ia64_dv_serialize_data(); -+ } -+ ia64_srlz_d(); -+ } -+done: -+ /* -+ * debug registers are now in use -+ */ -+ ctx_arch->flags.use_dbr = 1; -+ set->priv_flags |= PFM_ITA_SETFL_USE_DBR; -+ PFM_DBG("set%u use_dbr=1", set->id); -+ return 0; -+} -+EXPORT_SYMBOL(pfm_ia64_mark_dbregs_used); -+ -+char *pfm_arch_get_pmu_module_name(void) -+{ -+ switch (local_cpu_data->family) { -+ case 0x07: -+ return "perfmon_itanium"; -+ case 0x1f: -+ return "perfmon_mckinley"; -+ case 0x20: -+ return "perfmon_montecito"; -+ default: -+ return "perfmon_generic"; -+ } -+ return NULL; -+} -+ -+/* -+ * global arch-specific intialization, called only once -+ */ -+int __init pfm_arch_init(void) -+{ -+ int ret; -+ -+ spin_lock_init(&pfm_arch_sessions_lock); -+ -+#ifdef CONFIG_IA64_PERFMON_COMPAT -+ ret = pfm_ia64_compat_init(); -+ if (ret) -+ return ret; -+#endif -+ register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); -+ -+ -+ return 0; -+} -diff --git a/arch/ia64/perfmon/perfmon_compat.c b/arch/ia64/perfmon/perfmon_compat.c -new file mode 100644 -index 0000000..2fd3d3c ---- /dev/null -+++ b/arch/ia64/perfmon/perfmon_compat.c -@@ -0,0 +1,1210 @@ -+/* -+ * This file implements the IA-64 specific -+ * support for the perfmon2 interface -+ * -+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+asmlinkage long sys_pfm_stop(int fd); -+asmlinkage long sys_pfm_start(int fd, struct pfarg_start __user *st); -+asmlinkage long sys_pfm_unload_context(int fd); -+asmlinkage long sys_pfm_restart(int fd); -+asmlinkage long sys_pfm_load_context(int fd, struct pfarg_load __user *ld); -+ -+ssize_t pfm_sysfs_res_show(char *buf, size_t sz, int what); -+ -+extern ssize_t __pfm_read(struct pfm_context *ctx, -+ union pfarg_msg *msg_buf, -+ int non_block); -+/* -+ * function providing some help for backward compatiblity with old IA-64 -+ * applications. In the old model, certain attributes of a counter were -+ * passed via the PMC, now they are passed via the PMD. -+ */ -+static int pfm_compat_update_pmd(struct pfm_context *ctx, u16 set_id, u16 cnum, -+ u32 rflags, -+ unsigned long *smpl_pmds, -+ unsigned long *reset_pmds, -+ u64 eventid) -+{ -+ struct pfm_event_set *set; -+ int is_counting; -+ unsigned long *impl_pmds; -+ u32 flags = 0; -+ u16 max_pmd; -+ -+ impl_pmds = ctx->regs.pmds; -+ max_pmd = ctx->regs.max_pmd; -+ -+ /* -+ * given that we do not maintain PMC ->PMD dependencies -+ * we cannot figure out what to do in case PMCxx != PMDxx -+ */ -+ if (cnum > max_pmd) -+ return 0; -+ -+ /* -+ * assumes PMCxx controls PMDxx which is always true for counters -+ * on Itanium PMUs. -+ */ -+ is_counting = pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_C64; -+ set = pfm_find_set(ctx, set_id, 0); -+ -+ /* -+ * for v2.0, we only allowed counting PMD to generate -+ * user-level notifications. Same thing with randomization. -+ */ -+ if (is_counting) { -+ if (rflags & PFM_REGFL_OVFL_NOTIFY) -+ flags |= PFM_REGFL_OVFL_NOTIFY; -+ if (rflags & PFM_REGFL_RANDOM) -+ flags |= PFM_REGFL_RANDOM; -+ /* -+ * verify validity of smpl_pmds -+ */ -+ if (unlikely(bitmap_subset(smpl_pmds, -+ impl_pmds, max_pmd) == 0)) { -+ PFM_DBG("invalid smpl_pmds=0x%llx for pmd%u", -+ (unsigned long long)smpl_pmds[0], cnum); -+ return -EINVAL; -+ } -+ /* -+ * verify validity of reset_pmds -+ */ -+ if (unlikely(bitmap_subset(reset_pmds, -+ impl_pmds, max_pmd) == 0)) { -+ PFM_DBG("invalid reset_pmds=0x%lx for pmd%u", -+ reset_pmds[0], cnum); -+ return -EINVAL; -+ } -+ /* -+ * ensures that a PFM_READ_PMDS succeeds with a -+ * corresponding PFM_WRITE_PMDS -+ */ -+ __set_bit(cnum, set->used_pmds); -+ -+ } else if (rflags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) { -+ PFM_DBG("cannot set ovfl_notify or random on pmd%u", cnum); -+ return -EINVAL; -+ } -+ -+ set->pmds[cnum].flags = flags; -+ -+ if (is_counting) { -+ bitmap_copy(set->pmds[cnum].reset_pmds, -+ reset_pmds, -+ max_pmd); -+ -+ bitmap_copy(set->pmds[cnum].smpl_pmds, -+ smpl_pmds, -+ max_pmd); -+ -+ set->pmds[cnum].eventid = eventid; -+ -+ /* -+ * update ovfl_notify -+ */ -+ if (rflags & PFM_REGFL_OVFL_NOTIFY) -+ __set_bit(cnum, set->ovfl_notify); -+ else -+ __clear_bit(cnum, set->ovfl_notify); -+ -+ } -+ PFM_DBG("pmd%u flags=0x%x eventid=0x%lx r_pmds=0x%lx s_pmds=0x%lx", -+ cnum, flags, -+ eventid, -+ reset_pmds[0], -+ smpl_pmds[0]); -+ -+ return 0; -+} -+ -+ -+int __pfm_write_ibrs_old(struct pfm_context *ctx, void *arg, int count) -+{ -+ struct pfarg_dbreg *req = arg; -+ struct pfarg_pmc pmc; -+ int i, ret = 0; -+ -+ memset(&pmc, 0, sizeof(pmc)); -+ -+ for (i = 0; i < count; i++, req++) { -+ pmc.reg_num = 256+req->dbreg_num; -+ pmc.reg_value = req->dbreg_value; -+ pmc.reg_flags = 0; -+ pmc.reg_set = req->dbreg_set; -+ -+ ret = __pfm_write_pmcs(ctx, &pmc, 1); -+ -+ req->dbreg_flags &= ~PFM_REG_RETFL_MASK; -+ req->dbreg_flags |= pmc.reg_flags; -+ -+ if (ret) -+ return ret; -+ } -+ return 0; -+} -+ -+static long pfm_write_ibrs_old(int fd, void __user *ureq, int count) -+{ -+ struct pfm_context *ctx; -+ struct task_struct *task; -+ struct file *filp; -+ struct pfarg_dbreg *req = NULL; -+ void *fptr, *resume; -+ unsigned long flags; -+ size_t sz; -+ int ret, fput_needed; -+ -+ if (count < 1 || count >= PFM_MAX_ARG_COUNT(req)) -+ return -EINVAL; -+ -+ sz = count*sizeof(*req); -+ -+ filp = fget_light(fd, &fput_needed); -+ if (unlikely(filp == NULL)) { -+ PFM_DBG("invalid fd %d", fd); -+ return -EBADF; -+ } -+ -+ ctx = filp->private_data; -+ ret = -EBADF; -+ -+ if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) { -+ PFM_DBG("fd %d not related to perfmon", fd); -+ goto error; -+ } -+ -+ ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); -+ if (ret) -+ goto error; -+ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ task = ctx->task; -+ -+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume); -+ if (ret == 0) -+ ret = __pfm_write_ibrs_old(ctx, req, count); -+ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ if (resume) -+ pfm_resume_task(task, resume); -+ -+ if (copy_to_user(ureq, req, sz)) -+ ret = -EFAULT; -+ -+ kfree(fptr); -+error: -+ fput_light(filp, fput_needed); -+ return ret; -+} -+ -+int __pfm_write_dbrs_old(struct pfm_context *ctx, void *arg, int count) -+{ -+ struct pfarg_dbreg *req = arg; -+ struct pfarg_pmc pmc; -+ int i, ret = 0; -+ -+ memset(&pmc, 0, sizeof(pmc)); -+ -+ for (i = 0; i < count; i++, req++) { -+ pmc.reg_num = 264+req->dbreg_num; -+ pmc.reg_value = req->dbreg_value; -+ pmc.reg_flags = 0; -+ pmc.reg_set = req->dbreg_set; -+ -+ ret = __pfm_write_pmcs(ctx, &pmc, 1); -+ -+ req->dbreg_flags &= ~PFM_REG_RETFL_MASK; -+ req->dbreg_flags |= pmc.reg_flags; -+ if (ret) -+ return ret; -+ } -+ return 0; -+} -+ -+static long pfm_write_dbrs_old(int fd, void __user *ureq, int count) -+{ -+ struct pfm_context *ctx; -+ struct task_struct *task; -+ struct file *filp; -+ struct pfarg_dbreg *req = NULL; -+ void *fptr, *resume; -+ unsigned long flags; -+ size_t sz; -+ int ret, fput_needed; -+ -+ if (count < 1 || count >= PFM_MAX_ARG_COUNT(req)) -+ return -EINVAL; -+ -+ sz = count*sizeof(*req); -+ -+ filp = fget_light(fd, &fput_needed); -+ if (unlikely(filp == NULL)) { -+ PFM_DBG("invalid fd %d", fd); -+ return -EBADF; -+ } -+ -+ ctx = filp->private_data; -+ ret = -EBADF; -+ -+ if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) { -+ PFM_DBG("fd %d not related to perfmon", fd); -+ goto error; -+ } -+ -+ ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); -+ if (ret) -+ goto error; -+ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ task = ctx->task; -+ -+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume); -+ if (ret == 0) -+ ret = __pfm_write_dbrs_old(ctx, req, count); -+ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ if (resume) -+ pfm_resume_task(task, resume); -+ -+ if (copy_to_user(ureq, req, sz)) -+ ret = -EFAULT; -+ -+ kfree(fptr); -+error: -+ fput_light(filp, fput_needed); -+ return ret; -+} -+ -+int __pfm_write_pmcs_old(struct pfm_context *ctx, struct pfarg_reg *req_old, -+ int count) -+{ -+ struct pfarg_pmc req; -+ unsigned int i; -+ int ret, error_code; -+ -+ memset(&req, 0, sizeof(req)); -+ -+ for (i = 0; i < count; i++, req_old++) { -+ req.reg_num = req_old->reg_num; -+ req.reg_set = req_old->reg_set; -+ req.reg_flags = 0; -+ req.reg_value = req_old->reg_value; -+ -+ ret = __pfm_write_pmcs(ctx, (void *)&req, 1); -+ req_old->reg_flags &= ~PFM_REG_RETFL_MASK; -+ req_old->reg_flags |= req.reg_flags; -+ -+ if (ret) -+ return ret; -+ -+ ret = pfm_compat_update_pmd(ctx, req_old->reg_set, -+ req_old->reg_num, -+ (u32)req_old->reg_flags, -+ req_old->reg_smpl_pmds, -+ req_old->reg_reset_pmds, -+ req_old->reg_smpl_eventid); -+ -+ error_code = ret ? PFM_REG_RETFL_EINVAL : 0; -+ req_old->reg_flags &= ~PFM_REG_RETFL_MASK; -+ req_old->reg_flags |= error_code; -+ -+ if (ret) -+ return ret; -+ } -+ return 0; -+} -+ -+static long pfm_write_pmcs_old(int fd, void __user *ureq, int count) -+{ -+ struct pfm_context *ctx; -+ struct task_struct *task; -+ struct file *filp; -+ struct pfarg_reg *req = NULL; -+ void *fptr, *resume; -+ unsigned long flags; -+ size_t sz; -+ int ret, fput_needed; -+ -+ if (count < 1 || count >= PFM_MAX_ARG_COUNT(req)) -+ return -EINVAL; -+ -+ sz = count*sizeof(*req); -+ -+ filp = fget_light(fd, &fput_needed); -+ if (unlikely(filp == NULL)) { -+ PFM_DBG("invalid fd %d", fd); -+ return -EBADF; -+ } -+ -+ ctx = filp->private_data; -+ ret = -EBADF; -+ -+ if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) { -+ PFM_DBG("fd %d not related to perfmon", fd); -+ goto error; -+ } -+ -+ ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); -+ if (ret) -+ goto error; -+ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ task = ctx->task; -+ -+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume); -+ if (ret == 0) -+ ret = __pfm_write_pmcs_old(ctx, req, count); -+ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ if (resume) -+ pfm_resume_task(task, resume); -+ -+ if (copy_to_user(ureq, req, sz)) -+ ret = -EFAULT; -+ -+ kfree(fptr); -+ -+error: -+ fput_light(filp, fput_needed); -+ return ret; -+} -+ -+int __pfm_write_pmds_old(struct pfm_context *ctx, struct pfarg_reg *req_old, -+ int count) -+{ -+ struct pfarg_pmd req; -+ int i, ret; -+ -+ memset(&req, 0, sizeof(req)); -+ -+ for (i = 0; i < count; i++, req_old++) { -+ req.reg_num = req_old->reg_num; -+ req.reg_set = req_old->reg_set; -+ req.reg_value = req_old->reg_value; -+ /* flags passed with pmcs in v2.0 */ -+ -+ req.reg_long_reset = req_old->reg_long_reset; -+ req.reg_short_reset = req_old->reg_short_reset; -+ req.reg_random_mask = req_old->reg_random_mask; -+ /* -+ * reg_random_seed is ignored since v2.3 -+ */ -+ -+ /* -+ * skip last_reset_val not used for writing -+ * skip smpl_pmds, reset_pmds, eventid, ovfl_swtch_cnt -+ * as set in pfm_write_pmcs_old. -+ * -+ * ovfl_switch_cnt ignored, not implemented in v2.0 -+ */ -+ ret = __pfm_write_pmds(ctx, (void *)&req, 1, 1); -+ -+ req_old->reg_flags &= ~PFM_REG_RETFL_MASK; -+ req_old->reg_flags |= req.reg_flags; -+ -+ if (ret) -+ return ret; -+ } -+ return 0; -+} -+ -+static long pfm_write_pmds_old(int fd, void __user *ureq, int count) -+{ -+ struct pfm_context *ctx; -+ struct task_struct *task; -+ struct file *filp; -+ struct pfarg_reg *req = NULL; -+ void *fptr, *resume; -+ unsigned long flags; -+ size_t sz; -+ int ret, fput_needed; -+ -+ if (count < 1 || count >= PFM_MAX_ARG_COUNT(req)) -+ return -EINVAL; -+ -+ sz = count*sizeof(*req); -+ -+ filp = fget_light(fd, &fput_needed); -+ if (unlikely(filp == NULL)) { -+ PFM_DBG("invalid fd %d", fd); -+ return -EBADF; -+ } -+ -+ ctx = filp->private_data; -+ ret = -EBADF; -+ -+ if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) { -+ PFM_DBG("fd %d not related to perfmon", fd); -+ goto error; -+ } -+ -+ ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); -+ if (ret) -+ goto error; -+ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ task = ctx->task; -+ -+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume); -+ if (ret == 0) -+ ret = __pfm_write_pmds_old(ctx, req, count); -+ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ if (copy_to_user(ureq, req, sz)) -+ ret = -EFAULT; -+ -+ if (resume) -+ pfm_resume_task(task, resume); -+ -+ kfree(fptr); -+error: -+ fput_light(filp, fput_needed); -+ return ret; -+} -+ -+int __pfm_read_pmds_old(struct pfm_context *ctx, struct pfarg_reg *req_old, -+ int count) -+{ -+ struct pfarg_pmd req; -+ int i, ret; -+ -+ memset(&req, 0, sizeof(req)); -+ -+ for (i = 0; i < count; i++, req_old++) { -+ req.reg_num = req_old->reg_num; -+ req.reg_set = req_old->reg_set; -+ -+ /* skip value not used for reading */ -+ req.reg_flags = req_old->reg_flags; -+ -+ /* skip short/long_reset not used for reading */ -+ /* skip last_reset_val not used for reading */ -+ /* skip ovfl_switch_cnt not used for reading */ -+ -+ ret = __pfm_read_pmds(ctx, (void *)&req, 1); -+ -+ req_old->reg_flags &= ~PFM_REG_RETFL_MASK; -+ req_old->reg_flags |= req.reg_flags; -+ if (ret) -+ return ret; -+ -+ /* update fields */ -+ req_old->reg_value = req.reg_value; -+ -+ req_old->reg_last_reset_val = req.reg_last_reset_val; -+ req_old->reg_ovfl_switch_cnt = req.reg_ovfl_switch_cnt; -+ } -+ return 0; -+} -+ -+static long pfm_read_pmds_old(int fd, void __user *ureq, int count) -+{ -+ struct pfm_context *ctx; -+ struct task_struct *task; -+ struct file *filp; -+ struct pfarg_reg *req = NULL; -+ void *fptr, *resume; -+ unsigned long flags; -+ size_t sz; -+ int ret, fput_needed; -+ -+ if (count < 1 || count >= PFM_MAX_ARG_COUNT(req)) -+ return -EINVAL; -+ -+ sz = count*sizeof(*req); -+ -+ filp = fget_light(fd, &fput_needed); -+ if (unlikely(filp == NULL)) { -+ PFM_DBG("invalid fd %d", fd); -+ return -EBADF; -+ } -+ -+ ctx = filp->private_data; -+ ret = -EBADF; -+ -+ if (unlikely(!ctx || filp->f_op != &pfm_file_ops)) { -+ PFM_DBG("fd %d not related to perfmon", fd); -+ goto error; -+ } -+ -+ ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); -+ if (ret) -+ goto error; -+ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ task = ctx->task; -+ -+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume); -+ if (ret == 0) -+ ret = __pfm_read_pmds_old(ctx, req, count); -+ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ if (resume) -+ pfm_resume_task(task, resume); -+ -+ if (copy_to_user(ureq, req, sz)) -+ ret = -EFAULT; -+ -+ kfree(fptr); -+error: -+ fput_light(filp, fput_needed); -+ return ret; -+} -+ -+/* -+ * OBSOLETE: use /proc/perfmon_map instead -+ */ -+static long pfm_get_default_pmcs_old(int fd, void __user *ureq, int count) -+{ -+ struct pfarg_reg *req = NULL; -+ void *fptr; -+ size_t sz; -+ int ret, i; -+ unsigned int cnum; -+ -+ if (count < 1) -+ return -EINVAL; -+ -+ /* -+ * ensure the pfm_pmu_conf does not disappear while -+ * we use it -+ */ -+ ret = pfm_pmu_conf_get(1); -+ if (ret) -+ return ret; -+ -+ sz = count*sizeof(*ureq); -+ -+ ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); -+ if (ret) -+ goto error; -+ -+ -+ for (i = 0; i < count; i++, req++) { -+ cnum = req->reg_num; -+ -+ if (i >= PFM_MAX_PMCS || -+ (pfm_pmu_conf->pmc_desc[cnum].type & PFM_REG_I) == 0) { -+ req->reg_flags = PFM_REG_RETFL_EINVAL; -+ break; -+ } -+ req->reg_value = pfm_pmu_conf->pmc_desc[cnum].dfl_val; -+ req->reg_flags = 0; -+ -+ PFM_DBG("pmc[%u]=0x%lx", cnum, req->reg_value); -+ } -+ -+ if (copy_to_user(ureq, req, sz)) -+ ret = -EFAULT; -+ -+ kfree(fptr); -+error: -+ pfm_pmu_conf_put(); -+ -+ return ret; -+} -+ -+/* -+ * allocate a sampling buffer and remaps it into the user address space of -+ * the task. This is only in compatibility mode -+ * -+ * function called ONLY on current task -+ */ -+int pfm_smpl_buf_alloc_compat(struct pfm_context *ctx, size_t rsize, -+ struct file *filp) -+{ -+ struct mm_struct *mm = current->mm; -+ struct vm_area_struct *vma = NULL; -+ struct pfm_arch_context *ctx_arch; -+ size_t size; -+ int ret; -+ extern struct vm_operations_struct pfm_buf_map_vm_ops; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ -+ /* -+ * allocate buffer + map desc -+ */ -+ ret = pfm_smpl_buf_alloc(ctx, rsize); -+ if (ret) -+ return ret; -+ -+ size = ctx->smpl_size; -+ -+ -+ /* allocate vma */ -+ vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); -+ if (!vma) { -+ PFM_DBG("Cannot allocate vma"); -+ goto error_kmem; -+ } -+ memset(vma, 0, sizeof(*vma)); -+ -+ /* -+ * partially initialize the vma for the sampling buffer -+ */ -+ vma->vm_mm = mm; -+ vma->vm_flags = VM_READ | VM_MAYREAD | VM_RESERVED; -+ vma->vm_page_prot = PAGE_READONLY; -+ vma->vm_ops = &pfm_buf_map_vm_ops; -+ vma->vm_file = filp; -+ vma->vm_private_data = ctx; -+ vma->vm_pgoff = 0; -+ -+ /* -+ * simulate effect of mmap() -+ */ -+ get_file(filp); -+ -+ /* -+ * Let's do the difficult operations next. -+ * -+ * now we atomically find some area in the address space and -+ * remap the buffer into it. -+ */ -+ down_write(¤t->mm->mmap_sem); -+ -+ /* find some free area in address space, must have mmap sem held */ -+ vma->vm_start = get_unmapped_area(NULL, 0, size, 0, -+ MAP_PRIVATE|MAP_ANONYMOUS); -+ if (vma->vm_start == 0) { -+ PFM_DBG("cannot find unmapped area of size %zu", size); -+ up_write(¤t->mm->mmap_sem); -+ goto error; -+ } -+ vma->vm_end = vma->vm_start + size; -+ -+ PFM_DBG("aligned_size=%zu mapped @0x%lx", size, vma->vm_start); -+ /* -+ * now insert the vma in the vm list for the process, must be -+ * done with mmap lock held -+ */ -+ insert_vm_struct(mm, vma); -+ -+ mm->total_vm += size >> PAGE_SHIFT; -+ -+ up_write(¤t->mm->mmap_sem); -+ -+ /* -+ * IMPORTANT: we do not issue the fput() -+ * because we want to increase the ref count -+ * on the descriptor to simulate what mmap() -+ * would do -+ */ -+ -+ /* -+ * used to propagate vaddr to syscall stub -+ */ -+ ctx_arch->ctx_smpl_vaddr = (void *)vma->vm_start; -+ -+ return 0; -+error: -+ kmem_cache_free(vm_area_cachep, vma); -+error_kmem: -+ pfm_smpl_buf_space_release(ctx, ctx->smpl_size); -+ vfree(ctx->smpl_addr); -+ return -ENOMEM; -+} -+ -+#define PFM_DEFAULT_SMPL_UUID { \ -+ 0x4d, 0x72, 0xbe, 0xc0, 0x06, 0x64, 0x41, 0x43, 0x82,\ -+ 0xb4, 0xd3, 0xfd, 0x27, 0x24, 0x3c, 0x97} -+ -+static pfm_uuid_t old_default_uuid = PFM_DEFAULT_SMPL_UUID; -+static pfm_uuid_t null_uuid; -+ -+/* -+ * function invoked in case, pfm_context_create fails -+ * at the last operation, copy_to_user. It needs to -+ * undo memory allocations and free the file descriptor -+ */ -+static void pfm_undo_create_context_fd(int fd, struct pfm_context *ctx) -+{ -+ struct files_struct *files = current->files; -+ struct file *file; -+ int fput_needed; -+ -+ file = fget_light(fd, &fput_needed); -+ /* -+ * there is no fd_uninstall(), so we do it -+ * here. put_unused_fd() does not remove the -+ * effect of fd_install(). -+ */ -+ -+ spin_lock(&files->file_lock); -+ files->fd_array[fd] = NULL; -+ spin_unlock(&files->file_lock); -+ -+ fput_light(file, fput_needed); -+ -+ /* -+ * decrement ref count and kill file -+ */ -+ put_filp(file); -+ -+ put_unused_fd(fd); -+ -+ pfm_free_context(ctx); -+} -+ -+static int pfm_get_smpl_arg_old(pfm_uuid_t uuid, void __user *fmt_uarg, -+ size_t usize, void **arg, -+ struct pfm_smpl_fmt **fmt) -+{ -+ struct pfm_smpl_fmt *f; -+ void *addr = NULL; -+ size_t sz; -+ int ret; -+ -+ if (!memcmp(uuid, null_uuid, sizeof(pfm_uuid_t))) -+ return 0; -+ -+ if (memcmp(uuid, old_default_uuid, sizeof(pfm_uuid_t))) { -+ PFM_DBG("compatibility mode supports only default sampling format"); -+ return -EINVAL; -+ } -+ /* -+ * find fmt and increase refcount -+ */ -+ f = pfm_smpl_fmt_get("default-old"); -+ if (f == NULL) { -+ PFM_DBG("default-old buffer format not found"); -+ return -EINVAL; -+ } -+ -+ /* -+ * expected format argument size -+ */ -+ sz = f->fmt_arg_size; -+ -+ /* -+ * check user size matches expected size -+ * usize = -1 is for IA-64 backward compatibility -+ */ -+ ret = -EINVAL; -+ if (sz != usize && usize != -1) { -+ PFM_DBG("invalid arg size %zu, format expects %zu", -+ usize, sz); -+ goto error; -+ } -+ -+ ret = -ENOMEM; -+ addr = kmalloc(sz, GFP_KERNEL); -+ if (addr == NULL) -+ goto error; -+ -+ ret = -EFAULT; -+ if (copy_from_user(addr, fmt_uarg, sz)) -+ goto error; -+ -+ *arg = addr; -+ *fmt = f; -+ return 0; -+ -+error: -+ kfree(addr); -+ pfm_smpl_fmt_put(f); -+ return ret; -+} -+ -+static long pfm_create_context_old(int fd, void __user *ureq, int count) -+{ -+ struct pfm_context *new_ctx; -+ struct pfm_arch_context *ctx_arch; -+ struct pfm_smpl_fmt *fmt = NULL; -+ struct pfarg_context req_old; -+ void __user *usmpl_arg; -+ void *smpl_arg = NULL; -+ struct pfarg_ctx req; -+ int ret; -+ -+ if (count != 1) -+ return -EINVAL; -+ -+ if (copy_from_user(&req_old, ureq, sizeof(req_old))) -+ return -EFAULT; -+ -+ memset(&req, 0, sizeof(req)); -+ -+ /* -+ * sampling format args are following pfarg_context -+ */ -+ usmpl_arg = ureq+sizeof(req_old); -+ -+ ret = pfm_get_smpl_arg_old(req_old.ctx_smpl_buf_id, usmpl_arg, -1, -+ &smpl_arg, &fmt); -+ if (ret) -+ return ret; -+ -+ req.ctx_flags = req_old.ctx_flags; -+ -+ /* -+ * returns file descriptor if >=0, or error code */ -+ ret = __pfm_create_context(&req, fmt, smpl_arg, PFM_COMPAT, &new_ctx); -+ if (ret >= 0) { -+ ctx_arch = pfm_ctx_arch(new_ctx); -+ req_old.ctx_fd = ret; -+ req_old.ctx_smpl_vaddr = ctx_arch->ctx_smpl_vaddr; -+ } -+ -+ if (copy_to_user(ureq, &req_old, sizeof(req_old))) { -+ pfm_undo_create_context_fd(req_old.ctx_fd, new_ctx); -+ ret = -EFAULT; -+ } -+ -+ kfree(smpl_arg); -+ -+ return ret; -+} -+ -+/* -+ * obsolete call: use /proc/perfmon -+ */ -+static long pfm_get_features_old(int fd, void __user *arg, int count) -+{ -+ struct pfarg_features req; -+ int ret = 0; -+ -+ if (count != 1) -+ return -EINVAL; -+ -+ memset(&req, 0, sizeof(req)); -+ -+ req.ft_version = PFM_VERSION; -+ -+ if (copy_to_user(arg, &req, sizeof(req))) -+ ret = -EFAULT; -+ -+ return ret; -+} -+ -+static long pfm_debug_old(int fd, void __user *arg, int count) -+{ -+ int m; -+ -+ if (count != 1) -+ return -EINVAL; -+ -+ if (get_user(m, (int __user *)arg)) -+ return -EFAULT; -+ -+ -+ pfm_controls.debug = m == 0 ? 0 : 1; -+ -+ PFM_INFO("debugging %s (timing reset)", -+ pfm_controls.debug ? "on" : "off"); -+ -+ if (m == 0) -+ for_each_online_cpu(m) { -+ memset(&per_cpu(pfm_stats, m), 0, -+ sizeof(struct pfm_stats)); -+ } -+ return 0; -+} -+ -+static long pfm_unload_context_old(int fd, void __user *arg, int count) -+{ -+ if (count) -+ return -EINVAL; -+ -+ return sys_pfm_unload_context(fd); -+} -+ -+static long pfm_restart_old(int fd, void __user *arg, int count) -+{ -+ if (count) -+ return -EINVAL; -+ -+ return sys_pfm_restart(fd); -+} -+ -+static long pfm_stop_old(int fd, void __user *arg, int count) -+{ -+ if (count) -+ return -EINVAL; -+ -+ return sys_pfm_stop(fd); -+} -+ -+static long pfm_start_old(int fd, void __user *arg, int count) -+{ -+ if (count > 1) -+ return -EINVAL; -+ -+ return sys_pfm_start(fd, arg); -+} -+ -+static long pfm_load_context_old(int fd, void __user *ureq, int count) -+{ -+ if (count != 1) -+ return -EINVAL; -+ -+ return sys_pfm_load_context(fd, ureq); -+} -+ -+/* -+ * perfmon command descriptions -+ */ -+struct pfm_cmd_desc { -+ long (*cmd_func)(int fd, void __user *arg, int count); -+}; -+ -+/* -+ * functions MUST be listed in the increasing order of -+ * their index (see permfon.h) -+ */ -+#define PFM_CMD(name) \ -+ { .cmd_func = name, \ -+ } -+#define PFM_CMD_NONE \ -+ { .cmd_func = NULL \ -+ } -+ -+static struct pfm_cmd_desc pfm_cmd_tab[] = { -+/* 0 */PFM_CMD_NONE, -+/* 1 */PFM_CMD(pfm_write_pmcs_old), -+/* 2 */PFM_CMD(pfm_write_pmds_old), -+/* 3 */PFM_CMD(pfm_read_pmds_old), -+/* 4 */PFM_CMD(pfm_stop_old), -+/* 5 */PFM_CMD(pfm_start_old), -+/* 6 */PFM_CMD_NONE, -+/* 7 */PFM_CMD_NONE, -+/* 8 */PFM_CMD(pfm_create_context_old), -+/* 9 */PFM_CMD_NONE, -+/* 10 */PFM_CMD(pfm_restart_old), -+/* 11 */PFM_CMD_NONE, -+/* 12 */PFM_CMD(pfm_get_features_old), -+/* 13 */PFM_CMD(pfm_debug_old), -+/* 14 */PFM_CMD_NONE, -+/* 15 */PFM_CMD(pfm_get_default_pmcs_old), -+/* 16 */PFM_CMD(pfm_load_context_old), -+/* 17 */PFM_CMD(pfm_unload_context_old), -+/* 18 */PFM_CMD_NONE, -+/* 19 */PFM_CMD_NONE, -+/* 20 */PFM_CMD_NONE, -+/* 21 */PFM_CMD_NONE, -+/* 22 */PFM_CMD_NONE, -+/* 23 */PFM_CMD_NONE, -+/* 24 */PFM_CMD_NONE, -+/* 25 */PFM_CMD_NONE, -+/* 26 */PFM_CMD_NONE, -+/* 27 */PFM_CMD_NONE, -+/* 28 */PFM_CMD_NONE, -+/* 29 */PFM_CMD_NONE, -+/* 30 */PFM_CMD_NONE, -+/* 31 */PFM_CMD_NONE, -+/* 32 */PFM_CMD(pfm_write_ibrs_old), -+/* 33 */PFM_CMD(pfm_write_dbrs_old), -+}; -+#define PFM_CMD_COUNT ARRAY_SIZE(pfm_cmd_tab) -+ -+/* -+ * system-call entry point (must return long) -+ */ -+asmlinkage long sys_perfmonctl(int fd, int cmd, void __user *arg, int count) -+{ -+ if (perfmon_disabled) -+ return -ENOSYS; -+ -+ if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT -+ || pfm_cmd_tab[cmd].cmd_func == NULL)) { -+ PFM_DBG("invalid cmd=%d", cmd); -+ return -EINVAL; -+ } -+ return (long)pfm_cmd_tab[cmd].cmd_func(fd, arg, count); -+} -+ -+/* -+ * Called from pfm_read() for a perfmon v2.0 context. -+ * -+ * compatibility mode pfm_read() routine. We need a separate -+ * routine because the definition of the message has changed. -+ * The pfm_msg and pfarg_msg structures are different. -+ * -+ * return: sizeof(pfm_msg_t) on success, -errno otherwise -+ */ -+ssize_t pfm_arch_compat_read(struct pfm_context *ctx, -+ char __user *buf, -+ int non_block, -+ size_t size) -+{ -+ union pfarg_msg msg_buf; -+ pfm_msg_t old_msg_buf; -+ pfm_ovfl_msg_t *o_msg; -+ struct pfarg_ovfl_msg *n_msg; -+ int ret; -+ -+ PFM_DBG("msg=%p size=%zu", buf, size); -+ -+ /* -+ * cannot extract partial messages. -+ * check even when there is no message -+ * -+ * cannot extract more than one message per call. Bytes -+ * above sizeof(msg) are ignored. -+ */ -+ if (size < sizeof(old_msg_buf)) { -+ PFM_DBG("message is too small size=%zu must be >=%zu)", -+ size, -+ sizeof(old_msg_buf)); -+ return -EINVAL; -+ } -+ -+ ret = __pfm_read(ctx, &msg_buf, non_block); -+ if (ret < 1) -+ return ret; -+ -+ /* -+ * force return value to old message size -+ */ -+ ret = sizeof(old_msg_buf); -+ -+ o_msg = &old_msg_buf.pfm_ovfl_msg; -+ n_msg = &msg_buf.pfm_ovfl_msg; -+ -+ switch (msg_buf.type) { -+ case PFM_MSG_OVFL: -+ o_msg->msg_type = PFM_MSG_OVFL; -+ o_msg->msg_ctx_fd = 0; -+ o_msg->msg_active_set = n_msg->msg_active_set; -+ o_msg->msg_tstamp = 0; -+ -+ o_msg->msg_ovfl_pmds[0] = n_msg->msg_ovfl_pmds[0]; -+ o_msg->msg_ovfl_pmds[1] = n_msg->msg_ovfl_pmds[1]; -+ o_msg->msg_ovfl_pmds[2] = n_msg->msg_ovfl_pmds[2]; -+ o_msg->msg_ovfl_pmds[3] = n_msg->msg_ovfl_pmds[3]; -+ break; -+ case PFM_MSG_END: -+ o_msg->msg_type = PFM_MSG_END; -+ o_msg->msg_ctx_fd = 0; -+ o_msg->msg_tstamp = 0; -+ break; -+ default: -+ PFM_DBG("unknown msg type=%d", msg_buf.type); -+ } -+ if (copy_to_user(buf, &old_msg_buf, sizeof(old_msg_buf))) -+ ret = -EFAULT; -+ PFM_DBG_ovfl("ret=%d", ret); -+ return ret; -+} -+ -+/* -+ * legacy /proc/perfmon simplified interface (we only maintain the -+ * global information (no more per-cpu stats, use -+ * /sys/devices/system/cpu/cpuXX/perfmon -+ */ -+static struct proc_dir_entry *perfmon_proc; -+ -+static void *pfm_proc_start(struct seq_file *m, loff_t *pos) -+{ -+ if (*pos == 0) -+ return (void *)1; -+ -+ return NULL; -+} -+ -+static void *pfm_proc_next(struct seq_file *m, void *v, loff_t *pos) -+{ -+ ++*pos; -+ return pfm_proc_start(m, pos); -+} -+ -+static void pfm_proc_stop(struct seq_file *m, void *v) -+{ -+} -+ -+/* -+ * this is a simplified version of the legacy /proc/perfmon. -+ * We have retained ONLY the key information that tools are actually -+ * using -+ */ -+static void pfm_proc_show_header(struct seq_file *m) -+{ -+ char buf[128]; -+ -+ pfm_sysfs_res_show(buf, sizeof(buf), 3); -+ -+ seq_printf(m, "perfmon version : %u.%u\n", -+ PFM_VERSION_MAJ, PFM_VERSION_MIN); -+ -+ seq_printf(m, "model : %s", buf); -+} -+ -+static int pfm_proc_show(struct seq_file *m, void *v) -+{ -+ pfm_proc_show_header(m); -+ return 0; -+} -+ -+struct seq_operations pfm_proc_seq_ops = { -+ .start = pfm_proc_start, -+ .next = pfm_proc_next, -+ .stop = pfm_proc_stop, -+ .show = pfm_proc_show -+}; -+ -+static int pfm_proc_open(struct inode *inode, struct file *file) -+{ -+ return seq_open(file, &pfm_proc_seq_ops); -+} -+ -+ -+static struct file_operations pfm_proc_fops = { -+ .open = pfm_proc_open, -+ .read = seq_read, -+ .llseek = seq_lseek, -+ .release = seq_release, -+}; -+ -+/* -+ * called from pfm_arch_init(), global initialization, called once -+ */ -+int __init pfm_ia64_compat_init(void) -+{ -+ /* -+ * create /proc/perfmon -+ */ -+ perfmon_proc = create_proc_entry("perfmon", S_IRUGO, NULL); -+ if (perfmon_proc == NULL) { -+ PFM_ERR("cannot create /proc entry, perfmon disabled"); -+ return -1; -+ } -+ perfmon_proc->proc_fops = &pfm_proc_fops; -+ return 0; -+} -diff --git a/arch/ia64/perfmon/perfmon_default_smpl.c b/arch/ia64/perfmon/perfmon_default_smpl.c -new file mode 100644 -index 0000000..b408a13 ---- /dev/null -+++ b/arch/ia64/perfmon/perfmon_default_smpl.c -@@ -0,0 +1,273 @@ -+/* -+ * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This file implements the old default sampling buffer format -+ * for the Linux/ia64 perfmon-2 subsystem. This is for backward -+ * compatibility only. use the new default format in perfmon/ -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef MODULE -+#define FMT_FLAGS 0 -+#else -+#define FMT_FLAGS PFM_FMTFL_IS_BUILTIN -+#endif -+ -+#include -+#include -+ -+MODULE_AUTHOR("Stephane Eranian "); -+MODULE_DESCRIPTION("perfmon old default sampling format"); -+MODULE_LICENSE("GPL"); -+ -+static int pfm_default_fmt_validate(u32 flags, u16 npmds, void *data) -+{ -+ struct pfm_default_smpl_arg *arg = data; -+ size_t min_buf_size; -+ -+ if (data == NULL) { -+ PFM_DBG("no argument passed"); -+ return -EINVAL; -+ } -+ -+ /* -+ * compute min buf size. All PMD are manipulated as 64bit entities -+ */ -+ min_buf_size = sizeof(struct pfm_default_smpl_hdr) -+ + (sizeof(struct pfm_default_smpl_entry) + (npmds*sizeof(u64))); -+ -+ PFM_DBG("validate flags=0x%x npmds=%u min_buf_size=%lu " -+ "buf_size=%lu CPU%d", flags, npmds, min_buf_size, -+ arg->buf_size, smp_processor_id()); -+ -+ /* -+ * must hold at least the buffer header + one minimally sized entry -+ */ -+ if (arg->buf_size < min_buf_size) -+ return -EINVAL; -+ -+ return 0; -+} -+ -+static int pfm_default_fmt_get_size(unsigned int flags, void *data, -+ size_t *size) -+{ -+ struct pfm_default_smpl_arg *arg = data; -+ -+ /* -+ * size has been validated in default_validate -+ */ -+ *size = arg->buf_size; -+ -+ return 0; -+} -+ -+static int pfm_default_fmt_init(struct pfm_context *ctx, void *buf, -+ u32 flags, u16 npmds, void *data) -+{ -+ struct pfm_default_smpl_hdr *hdr; -+ struct pfm_default_smpl_arg *arg = data; -+ -+ hdr = buf; -+ -+ hdr->hdr_version = PFM_DEFAULT_SMPL_VERSION; -+ hdr->hdr_buf_size = arg->buf_size; -+ hdr->hdr_cur_offs = sizeof(*hdr); -+ hdr->hdr_overflows = 0; -+ hdr->hdr_count = 0; -+ -+ PFM_DBG("buffer=%p buf_size=%lu hdr_size=%lu " -+ "hdr_version=%u cur_offs=%lu", -+ buf, -+ hdr->hdr_buf_size, -+ sizeof(*hdr), -+ hdr->hdr_version, -+ hdr->hdr_cur_offs); -+ -+ return 0; -+} -+ -+static int pfm_default_fmt_handler(struct pfm_context *ctx, -+ unsigned long ip, u64 tstamp, void *data) -+{ -+ struct pfm_default_smpl_hdr *hdr; -+ struct pfm_default_smpl_entry *ent; -+ void *cur, *last, *buf; -+ u64 *e; -+ size_t entry_size; -+ u16 npmds, i, ovfl_pmd; -+ struct pfm_ovfl_arg *arg; -+ -+ hdr = ctx->smpl_addr; -+ arg = &ctx->ovfl_arg; -+ -+ buf = hdr; -+ cur = buf+hdr->hdr_cur_offs; -+ last = buf+hdr->hdr_buf_size; -+ ovfl_pmd = arg->ovfl_pmd; -+ -+ /* -+ * precheck for sanity -+ */ -+ if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) -+ goto full; -+ -+ npmds = arg->num_smpl_pmds; -+ -+ ent = cur; -+ -+ prefetch(arg->smpl_pmds_values); -+ -+ entry_size = sizeof(*ent) + (npmds << 3); -+ -+ /* position for first pmd */ -+ e = (unsigned long *)(ent+1); -+ -+ hdr->hdr_count++; -+ -+ PFM_DBG_ovfl("count=%lu cur=%p last=%p free_bytes=%lu " -+ "ovfl_pmd=%d npmds=%u", -+ hdr->hdr_count, -+ cur, last, -+ last-cur, -+ ovfl_pmd, -+ npmds); -+ -+ /* -+ * current = task running at the time of the overflow. -+ * -+ * per-task mode: -+ * - this is ususally the task being monitored. -+ * Under certain conditions, it might be a different task -+ * -+ * system-wide: -+ * - this is not necessarily the task controlling the session -+ */ -+ ent->pid = current->pid; -+ ent->ovfl_pmd = ovfl_pmd; -+ ent->last_reset_val = arg->pmd_last_reset; -+ -+ /* -+ * where did the fault happen (includes slot number) -+ */ -+ ent->ip = ip; -+ -+ ent->tstamp = tstamp; -+ ent->cpu = smp_processor_id(); -+ ent->set = arg->active_set; -+ ent->tgid = current->tgid; -+ -+ /* -+ * selectively store PMDs in increasing index number -+ */ -+ if (npmds) { -+ u64 *val = arg->smpl_pmds_values; -+ for (i = 0; i < npmds; i++) -+ *e++ = *val++; -+ } -+ -+ /* -+ * update position for next entry -+ */ -+ hdr->hdr_cur_offs += entry_size; -+ cur += entry_size; -+ -+ /* -+ * post check to avoid losing the last sample -+ */ -+ if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) -+ goto full; -+ -+ /* -+ * reset before returning from interrupt handler -+ */ -+ arg->ovfl_ctrl = PFM_OVFL_CTRL_RESET; -+ return 0; -+full: -+ PFM_DBG_ovfl("smpl buffer full free=%lu, count=%lu", -+ last-cur, hdr->hdr_count); -+ -+ /* -+ * increment number of buffer overflow. -+ * important to detect duplicate set of samples. -+ */ -+ hdr->hdr_overflows++; -+ -+ /* -+ * request notification and masking of monitoring. -+ * Notification is still subject to the overflowed -+ */ -+ arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY | PFM_OVFL_CTRL_MASK; -+ -+ return -ENOBUFS; /* we are full, sorry */ -+} -+ -+static int pfm_default_fmt_restart(int is_active, u32 *ovfl_ctrl, void *buf) -+{ -+ struct pfm_default_smpl_hdr *hdr; -+ -+ hdr = buf; -+ -+ hdr->hdr_count = 0; -+ hdr->hdr_cur_offs = sizeof(*hdr); -+ -+ *ovfl_ctrl = PFM_OVFL_CTRL_RESET; -+ -+ return 0; -+} -+ -+static int pfm_default_fmt_exit(void *buf) -+{ -+ return 0; -+} -+ -+static struct pfm_smpl_fmt default_fmt = { -+ .fmt_name = "default-old", -+ .fmt_version = 0x10000, -+ .fmt_arg_size = sizeof(struct pfm_default_smpl_arg), -+ .fmt_validate = pfm_default_fmt_validate, -+ .fmt_getsize = pfm_default_fmt_get_size, -+ .fmt_init = pfm_default_fmt_init, -+ .fmt_handler = pfm_default_fmt_handler, -+ .fmt_restart = pfm_default_fmt_restart, -+ .fmt_exit = pfm_default_fmt_exit, -+ .fmt_flags = FMT_FLAGS, -+ .owner = THIS_MODULE -+}; -+ -+static int pfm_default_fmt_init_module(void) -+{ -+ int ret; -+ -+ return pfm_fmt_register(&default_fmt); -+ return ret; -+} -+ -+static void pfm_default_fmt_cleanup_module(void) -+{ -+ pfm_fmt_unregister(&default_fmt); -+} -+ -+module_init(pfm_default_fmt_init_module); -+module_exit(pfm_default_fmt_cleanup_module); -diff --git a/arch/ia64/perfmon/perfmon_generic.c b/arch/ia64/perfmon/perfmon_generic.c -new file mode 100644 -index 0000000..47b1870 ---- /dev/null -+++ b/arch/ia64/perfmon/perfmon_generic.c -@@ -0,0 +1,148 @@ -+/* -+ * This file contains the generic PMU register description tables -+ * and pmc checker used by perfmon.c. -+ * -+ * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. -+ * contributed by Stephane Eranian -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+ -+MODULE_AUTHOR("Stephane Eranian "); -+MODULE_DESCRIPTION("Generic IA-64 PMU description tables"); -+MODULE_LICENSE("GPL"); -+ -+#define RDEP(x) (1UL << (x)) -+ -+#define PFM_IA64GEN_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)) -+#define PFM_IA64GEN_RSVD (0xffffffffffff0080UL) -+#define PFM_IA64GEN_NO64 (1UL<<5) -+ -+/* forward declaration */ -+static struct pfm_pmu_config pfm_ia64gen_pmu_conf; -+ -+static struct pfm_arch_pmu_info pfm_ia64gen_pmu_info = { -+ .mask_pmcs = {PFM_IA64GEN_MASK_PMCS,}, -+}; -+ -+static struct pfm_regmap_desc pfm_ia64gen_pmc_desc[] = { -+/* pmc0 */ PMX_NA, -+/* pmc1 */ PMX_NA, -+/* pmc2 */ PMX_NA, -+/* pmc3 */ PMX_NA, -+/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 4), -+/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 5), -+/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 6), -+/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7", 0x0, PFM_IA64GEN_RSVD, PFM_IA64GEN_NO64, 7) -+}; -+#define PFM_IA64GEN_NUM_PMCS ARRAY_SIZE(pfm_ia64gen_pmc_desc) -+ -+static struct pfm_regmap_desc pfm_ia64gen_pmd_desc[] = { -+/* pmd0 */ PMX_NA, -+/* pmd1 */ PMX_NA, -+/* pmd2 */ PMX_NA, -+/* pmd3 */ PMX_NA, -+/* pmd4 */ PMD_DP(PFM_REG_C, "PMD4", 4, 1ull << 4), -+/* pmd5 */ PMD_DP(PFM_REG_C, "PMD5", 5, 1ull << 5), -+/* pmd6 */ PMD_DP(PFM_REG_C, "PMD6", 6, 1ull << 6), -+/* pmd7 */ PMD_DP(PFM_REG_C, "PMD7", 7, 1ull << 7) -+}; -+#define PFM_IA64GEN_NUM_PMDS ARRAY_SIZE(pfm_ia64gen_pmd_desc) -+ -+static int pfm_ia64gen_pmc_check(struct pfm_context *ctx, -+ struct pfm_event_set *set, -+ struct pfarg_pmc *req) -+{ -+#define PFM_IA64GEN_PMC_PM_POS6 (1UL<<6) -+ u64 tmpval; -+ int is_system; -+ -+ is_system = ctx->flags.system; -+ tmpval = req->reg_value; -+ -+ switch (req->reg_num) { -+ case 4: -+ case 5: -+ case 6: -+ case 7: -+ /* set pmc.oi for 64-bit emulation */ -+ tmpval |= 1UL << 5; -+ -+ if (is_system) -+ tmpval |= PFM_IA64GEN_PMC_PM_POS6; -+ else -+ tmpval &= ~PFM_IA64GEN_PMC_PM_POS6; -+ break; -+ -+ } -+ req->reg_value = tmpval; -+ -+ return 0; -+} -+ -+/* -+ * matches anything -+ */ -+static int pfm_ia64gen_probe_pmu(void) -+{ -+ u64 pm_buffer[16]; -+ pal_perf_mon_info_u_t pm_info; -+ -+ /* -+ * call PAL_PERFMON_INFO to retrieve counter width which -+ * is implementation specific -+ */ -+ if (ia64_pal_perf_mon_info(pm_buffer, &pm_info)) -+ return -1; -+ -+ pfm_ia64gen_pmu_conf.counter_width = pm_info.pal_perf_mon_info_s.width; -+ -+ return 0; -+} -+ -+/* -+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors! -+ */ -+static struct pfm_pmu_config pfm_ia64gen_pmu_conf = { -+ .pmu_name = "Generic IA-64", -+ .counter_width = 0, /* computed from PAL_PERFMON_INFO */ -+ .pmd_desc = pfm_ia64gen_pmd_desc, -+ .pmc_desc = pfm_ia64gen_pmc_desc, -+ .probe_pmu = pfm_ia64gen_probe_pmu, -+ .num_pmc_entries = PFM_IA64GEN_NUM_PMCS, -+ .num_pmd_entries = PFM_IA64GEN_NUM_PMDS, -+ .pmc_write_check = pfm_ia64gen_pmc_check, -+ .version = "1.0", -+ .flags = PFM_PMU_BUILTIN_FLAG, -+ .owner = THIS_MODULE, -+ .pmu_info = &pfm_ia64gen_pmu_info -+ /* no read/write checkers */ -+}; -+ -+static int __init pfm_gen_pmu_init_module(void) -+{ -+ return pfm_pmu_register(&pfm_ia64gen_pmu_conf); -+} -+ -+static void __exit pfm_gen_pmu_cleanup_module(void) -+{ -+ pfm_pmu_unregister(&pfm_ia64gen_pmu_conf); -+} -+ -+module_init(pfm_gen_pmu_init_module); -+module_exit(pfm_gen_pmu_cleanup_module); -diff --git a/arch/ia64/perfmon/perfmon_itanium.c b/arch/ia64/perfmon/perfmon_itanium.c -new file mode 100644 -index 0000000..094b31b ---- /dev/null -+++ b/arch/ia64/perfmon/perfmon_itanium.c -@@ -0,0 +1,232 @@ -+/* -+ * This file contains the Itanium PMU register description tables -+ * and pmc checker used by perfmon.c. -+ * -+ * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+ -+MODULE_AUTHOR("Stephane Eranian "); -+MODULE_DESCRIPTION("Itanium (Merced) PMU description tables"); -+MODULE_LICENSE("GPL"); -+ -+#define RDEP(x) (1ULL << (x)) -+ -+#define PFM_ITA_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)|RDEP(10)|RDEP(11)|\ -+ RDEP(12)) -+ -+#define PFM_ITA_NO64 (1ULL<<5) -+ -+static struct pfm_arch_pmu_info pfm_ita_pmu_info = { -+ .mask_pmcs = {PFM_ITA_MASK_PMCS,}, -+}; -+/* reserved bits are 1 in the mask */ -+#define PFM_ITA_RSVD 0xfffffffffc8000a0UL -+/* -+ * For debug registers, writing xBR(y) means we use also xBR(y+1). Hence using -+ * PMC256+y means we use PMC256+y+1. Yet, we do not have dependency information -+ * but this is fine because they are handled separately in the IA-64 specific -+ * code. -+ */ -+static struct pfm_regmap_desc pfm_ita_pmc_desc[] = { -+/* pmc0 */ PMX_NA, -+/* pmc1 */ PMX_NA, -+/* pmc2 */ PMX_NA, -+/* pmc3 */ PMX_NA, -+/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 4), -+/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 5), -+/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 6), -+/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7" , 0x20, PFM_ITA_RSVD, PFM_ITA_NO64, 7), -+/* pmc8 */ PMC_D(PFM_REG_W , "PMC8" , 0xfffffffe3ffffff8UL, 0xfff00000001c0000UL, 0, 8), -+/* pmc9 */ PMC_D(PFM_REG_W , "PMC9" , 0xfffffffe3ffffff8UL, 0xfff00000001c0000UL, 0, 9), -+/* pmc10 */ PMC_D(PFM_REG_W , "PMC10", 0x0, 0xfffffffff3f0ff30UL, 0, 10), -+/* pmc11 */ PMC_D(PFM_REG_W , "PMC11", 0x10000000UL, 0xffffffffecf0ff30UL, 0, 11), -+/* pmc12 */ PMC_D(PFM_REG_W , "PMC12", 0x0, 0xffffffffffff0030UL, 0, 12), -+/* pmc13 */ PMC_D(PFM_REG_W , "PMC13", 0x3ffff00000001UL, 0xfffffffffffffffeUL, 0, 13), -+/* pmc14 */ PMX_NA, -+/* pmc15 */ PMX_NA, -+/* pmc16 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc24 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc32 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc40 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc48 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc56 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc64 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc72 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc80 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc88 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc96 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc104 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc112 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc120 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc128 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc136 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc144 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc152 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc160 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc168 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc176 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc184 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc192 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc200 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc208 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc216 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc224 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc232 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc240 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc248 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc256 */ PMC_D(PFM_REG_W , "IBR0", 0x0, 0, 0, 0), -+/* pmc257 */ PMC_D(PFM_REG_W , "IBR1", 0x0, 0x8000000000000000UL, 0, 1), -+/* pmc258 */ PMC_D(PFM_REG_W , "IBR2", 0x0, 0, 0, 2), -+/* pmc259 */ PMC_D(PFM_REG_W , "IBR3", 0x0, 0x8000000000000000UL, 0, 3), -+/* pmc260 */ PMC_D(PFM_REG_W , "IBR4", 0x0, 0, 0, 4), -+/* pmc261 */ PMC_D(PFM_REG_W , "IBR5", 0x0, 0x8000000000000000UL, 0, 5), -+/* pmc262 */ PMC_D(PFM_REG_W , "IBR6", 0x0, 0, 0, 6), -+/* pmc263 */ PMC_D(PFM_REG_W , "IBR7", 0x0, 0x8000000000000000UL, 0, 7), -+/* pmc264 */ PMC_D(PFM_REG_W , "DBR0", 0x0, 0, 0, 0), -+/* pmc265 */ PMC_D(PFM_REG_W , "DBR1", 0x0, 0xc000000000000000UL, 0, 1), -+/* pmc266 */ PMC_D(PFM_REG_W , "DBR2", 0x0, 0, 0, 2), -+/* pmc267 */ PMC_D(PFM_REG_W , "DBR3", 0x0, 0xc000000000000000UL, 0, 3), -+/* pmc268 */ PMC_D(PFM_REG_W , "DBR4", 0x0, 0, 0, 4), -+/* pmc269 */ PMC_D(PFM_REG_W , "DBR5", 0x0, 0xc000000000000000UL, 0, 5), -+/* pmc270 */ PMC_D(PFM_REG_W , "DBR6", 0x0, 0, 0, 6), -+/* pmc271 */ PMC_D(PFM_REG_W , "DBR7", 0x0, 0xc000000000000000UL, 0, 7) -+}; -+#define PFM_ITA_NUM_PMCS ARRAY_SIZE(pfm_ita_pmc_desc) -+ -+static struct pfm_regmap_desc pfm_ita_pmd_desc[] = { -+/* pmd0 */ PMD_DP(PFM_REG_I , "PMD0", 0, 1ull << 10), -+/* pmd1 */ PMD_DP(PFM_REG_I , "PMD1", 1, 1ull << 10), -+/* pmd2 */ PMD_DP(PFM_REG_I , "PMD2", 2, 1ull << 11), -+/* pmd3 */ PMD_DP(PFM_REG_I , "PMD3", 3, 1ull << 11), -+/* pmd4 */ PMD_DP(PFM_REG_C , "PMD4", 4, 1ull << 4), -+/* pmd5 */ PMD_DP(PFM_REG_C , "PMD5", 5, 1ull << 5), -+/* pmd6 */ PMD_DP(PFM_REG_C , "PMD6", 6, 1ull << 6), -+/* pmd7 */ PMD_DP(PFM_REG_C , "PMD7", 7, 1ull << 7), -+/* pmd8 */ PMD_DP(PFM_REG_I , "PMD8", 8, 1ull << 12), -+/* pmd9 */ PMD_DP(PFM_REG_I , "PMD9", 9, 1ull << 12), -+/* pmd10 */ PMD_DP(PFM_REG_I , "PMD10", 10, 1ull << 12), -+/* pmd11 */ PMD_DP(PFM_REG_I , "PMD11", 11, 1ull << 12), -+/* pmd12 */ PMD_DP(PFM_REG_I , "PMD12", 12, 1ull << 12), -+/* pmd13 */ PMD_DP(PFM_REG_I , "PMD13", 13, 1ull << 12), -+/* pmd14 */ PMD_DP(PFM_REG_I , "PMD14", 14, 1ull << 12), -+/* pmd15 */ PMD_DP(PFM_REG_I , "PMD15", 15, 1ull << 12), -+/* pmd16 */ PMD_DP(PFM_REG_I , "PMD16", 16, 1ull << 12), -+/* pmd17 */ PMD_DP(PFM_REG_I , "PMD17", 17, 1ull << 11) -+}; -+#define PFM_ITA_NUM_PMDS ARRAY_SIZE(pfm_ita_pmd_desc) -+ -+static int pfm_ita_pmc_check(struct pfm_context *ctx, -+ struct pfm_event_set *set, -+ struct pfarg_pmc *req) -+{ -+#define PFM_ITA_PMC_PM_POS6 (1UL<<6) -+ struct pfm_arch_context *ctx_arch; -+ u64 tmpval; -+ u16 cnum; -+ int ret = 0, is_system; -+ -+ tmpval = req->reg_value; -+ cnum = req->reg_num; -+ ctx_arch = pfm_ctx_arch(ctx); -+ is_system = ctx->flags.system; -+ -+ switch (cnum) { -+ case 4: -+ case 5: -+ case 6: -+ case 7: -+ case 10: -+ case 11: -+ case 12: -+ if (is_system) -+ tmpval |= PFM_ITA_PMC_PM_POS6; -+ else -+ tmpval &= ~PFM_ITA_PMC_PM_POS6; -+ break; -+ } -+ -+ /* -+ * we must clear the (instruction) debug registers if pmc13.ta bit is -+ * cleared before they are written (fl_using_dbreg==0) to avoid -+ * picking up stale information. -+ */ -+ if (cnum == 13 && ((tmpval & 0x1) == 0) -+ && ctx_arch->flags.use_dbr == 0) { -+ PFM_DBG("pmc13 has pmc13.ta cleared, clearing ibr"); -+ ret = pfm_ia64_mark_dbregs_used(ctx, set); -+ if (ret) -+ return ret; -+ } -+ -+ /* -+ * we must clear the (data) debug registers if pmc11.pt bit is cleared -+ * before they are written (fl_using_dbreg==0) to avoid picking up -+ * stale information. -+ */ -+ if (cnum == 11 && ((tmpval >> 28) & 0x1) == 0 -+ && ctx_arch->flags.use_dbr == 0) { -+ PFM_DBG("pmc11 has pmc11.pt cleared, clearing dbr"); -+ ret = pfm_ia64_mark_dbregs_used(ctx, set); -+ if (ret) -+ return ret; -+ } -+ -+ req->reg_value = tmpval; -+ -+ return 0; -+} -+ -+static int pfm_ita_probe_pmu(void) -+{ -+ return local_cpu_data->family == 0x7 && !ia64_platform_is("hpsim") -+ ? 0 : -1; -+} -+ -+/* -+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors! -+ */ -+static struct pfm_pmu_config pfm_ita_pmu_conf = { -+ .pmu_name = "Itanium", -+ .counter_width = 32, -+ .pmd_desc = pfm_ita_pmd_desc, -+ .pmc_desc = pfm_ita_pmc_desc, -+ .pmc_write_check = pfm_ita_pmc_check, -+ .num_pmc_entries = PFM_ITA_NUM_PMCS, -+ .num_pmd_entries = PFM_ITA_NUM_PMDS, -+ .probe_pmu = pfm_ita_probe_pmu, -+ .version = "1.0", -+ .flags = PFM_PMU_BUILTIN_FLAG, -+ .owner = THIS_MODULE, -+ .pmu_info = &pfm_ita_pmu_info -+}; -+ -+static int __init pfm_ita_pmu_init_module(void) -+{ -+ return pfm_pmu_register(&pfm_ita_pmu_conf); -+} -+ -+static void __exit pfm_ita_pmu_cleanup_module(void) -+{ -+ pfm_pmu_unregister(&pfm_ita_pmu_conf); -+} -+ -+module_init(pfm_ita_pmu_init_module); -+module_exit(pfm_ita_pmu_cleanup_module); -+ -diff --git a/arch/ia64/perfmon/perfmon_mckinley.c b/arch/ia64/perfmon/perfmon_mckinley.c -new file mode 100644 -index 0000000..dc59092 ---- /dev/null -+++ b/arch/ia64/perfmon/perfmon_mckinley.c -@@ -0,0 +1,290 @@ -+/* -+ * This file contains the McKinley PMU register description tables -+ * and pmc checker used by perfmon.c. -+ * -+ * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+ -+MODULE_AUTHOR("Stephane Eranian "); -+MODULE_DESCRIPTION("Itanium 2 (McKinley) PMU description tables"); -+MODULE_LICENSE("GPL"); -+ -+#define RDEP(x) (1UL << (x)) -+ -+#define PFM_MCK_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)|RDEP(10)|RDEP(11)|\ -+ RDEP(12)) -+ -+#define PFM_MCK_NO64 (1UL<<5) -+ -+static struct pfm_arch_pmu_info pfm_mck_pmu_info = { -+ .mask_pmcs = {PFM_MCK_MASK_PMCS,}, -+}; -+ -+/* reserved bits are 1 in the mask */ -+#define PFM_ITA2_RSVD 0xfffffffffc8000a0UL -+ -+/* -+ * For debug registers, writing xBR(y) means we use also xBR(y+1). Hence using -+ * PMC256+y means we use PMC256+y+1. Yet, we do not have dependency information -+ * but this is fine because they are handled separately in the IA-64 specific -+ * code. -+ */ -+static struct pfm_regmap_desc pfm_mck_pmc_desc[] = { -+/* pmc0 */ PMX_NA, -+/* pmc1 */ PMX_NA, -+/* pmc2 */ PMX_NA, -+/* pmc3 */ PMX_NA, -+/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4" , 0x800020UL, 0xfffffffffc8000a0, PFM_MCK_NO64, 4), -+/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5" , 0x20UL, PFM_ITA2_RSVD, PFM_MCK_NO64, 5), -+/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6" , 0x20UL, PFM_ITA2_RSVD, PFM_MCK_NO64, 6), -+/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7" , 0x20UL, PFM_ITA2_RSVD, PFM_MCK_NO64, 7), -+/* pmc8 */ PMC_D(PFM_REG_W , "PMC8" , 0xffffffff3fffffffUL, 0xc0000004UL, 0, 8), -+/* pmc9 */ PMC_D(PFM_REG_W , "PMC9" , 0xffffffff3ffffffcUL, 0xc0000004UL, 0, 9), -+/* pmc10 */ PMC_D(PFM_REG_W , "PMC10", 0x0, 0xffffffffffff0000UL, 0, 10), -+/* pmc11 */ PMC_D(PFM_REG_W , "PMC11", 0x0, 0xfffffffffcf0fe30UL, 0, 11), -+/* pmc12 */ PMC_D(PFM_REG_W , "PMC12", 0x0, 0xffffffffffff0000UL, 0, 12), -+/* pmc13 */ PMC_D(PFM_REG_W , "PMC13", 0x2078fefefefeUL, 0xfffe1fffe7e7e7e7UL, 0, 13), -+/* pmc14 */ PMC_D(PFM_REG_W , "PMC14", 0x0db60db60db60db6UL, 0xffffffffffffdb6dUL, 0, 14), -+/* pmc15 */ PMC_D(PFM_REG_W , "PMC15", 0xfffffff0UL, 0xfffffffffffffff0UL, 0, 15), -+/* pmc16 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc24 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc32 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc40 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc48 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc56 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc64 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc72 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc80 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc88 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc96 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc104 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc112 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc120 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc128 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc136 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc144 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc152 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc160 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc168 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc176 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc184 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc192 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc200 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc208 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc216 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc224 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc232 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc240 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc248 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc256 */ PMC_D(PFM_REG_W , "IBR0", 0x0, 0, 0, 0), -+/* pmc257 */ PMC_D(PFM_REG_W , "IBR1", 0x0, 0x8000000000000000UL, 0, 1), -+/* pmc258 */ PMC_D(PFM_REG_W , "IBR2", 0x0, 0, 0, 2), -+/* pmc259 */ PMC_D(PFM_REG_W , "IBR3", 0x0, 0x8000000000000000UL, 0, 3), -+/* pmc260 */ PMC_D(PFM_REG_W , "IBR4", 0x0, 0, 0, 4), -+/* pmc261 */ PMC_D(PFM_REG_W , "IBR5", 0x0, 0x8000000000000000UL, 0, 5), -+/* pmc262 */ PMC_D(PFM_REG_W , "IBR6", 0x0, 0, 0, 6), -+/* pmc263 */ PMC_D(PFM_REG_W , "IBR7", 0x0, 0x8000000000000000UL, 0, 7), -+/* pmc264 */ PMC_D(PFM_REG_W , "DBR0", 0x0, 0, 0, 0), -+/* pmc265 */ PMC_D(PFM_REG_W , "DBR1", 0x0, 0xc000000000000000UL, 0, 1), -+/* pmc266 */ PMC_D(PFM_REG_W , "DBR2", 0x0, 0, 0, 2), -+/* pmc267 */ PMC_D(PFM_REG_W , "DBR3", 0x0, 0xc000000000000000UL, 0, 3), -+/* pmc268 */ PMC_D(PFM_REG_W , "DBR4", 0x0, 0, 0, 4), -+/* pmc269 */ PMC_D(PFM_REG_W , "DBR5", 0x0, 0xc000000000000000UL, 0, 5), -+/* pmc270 */ PMC_D(PFM_REG_W , "DBR6", 0x0, 0, 0, 6), -+/* pmc271 */ PMC_D(PFM_REG_W , "DBR7", 0x0, 0xc000000000000000UL, 0, 7) -+}; -+#define PFM_MCK_NUM_PMCS ARRAY_SIZE(pfm_mck_pmc_desc) -+ -+static struct pfm_regmap_desc pfm_mck_pmd_desc[] = { -+/* pmd0 */ PMD_DP(PFM_REG_I, "PMD0", 0, 1ull << 10), -+/* pmd1 */ PMD_DP(PFM_REG_I, "PMD1", 1, 1ull << 10), -+/* pmd2 */ PMD_DP(PFM_REG_I, "PMD2", 2, 1ull << 11), -+/* pmd3 */ PMD_DP(PFM_REG_I, "PMD3", 3, 1ull << 11), -+/* pmd4 */ PMD_DP(PFM_REG_C, "PMD4", 4, 1ull << 4), -+/* pmd5 */ PMD_DP(PFM_REG_C, "PMD5", 5, 1ull << 5), -+/* pmd6 */ PMD_DP(PFM_REG_C, "PMD6", 6, 1ull << 6), -+/* pmd7 */ PMD_DP(PFM_REG_C, "PMD7", 7, 1ull << 7), -+/* pmd8 */ PMD_DP(PFM_REG_I, "PMD8", 8, 1ull << 12), -+/* pmd9 */ PMD_DP(PFM_REG_I, "PMD9", 9, 1ull << 12), -+/* pmd10 */ PMD_DP(PFM_REG_I, "PMD10", 10, 1ull << 12), -+/* pmd11 */ PMD_DP(PFM_REG_I, "PMD11", 11, 1ull << 12), -+/* pmd12 */ PMD_DP(PFM_REG_I, "PMD12", 12, 1ull << 12), -+/* pmd13 */ PMD_DP(PFM_REG_I, "PMD13", 13, 1ull << 12), -+/* pmd14 */ PMD_DP(PFM_REG_I, "PMD14", 14, 1ull << 12), -+/* pmd15 */ PMD_DP(PFM_REG_I, "PMD15", 15, 1ull << 12), -+/* pmd16 */ PMD_DP(PFM_REG_I, "PMD16", 16, 1ull << 12), -+/* pmd17 */ PMD_DP(PFM_REG_I, "PMD17", 17, 1ull << 11) -+}; -+#define PFM_MCK_NUM_PMDS ARRAY_SIZE(pfm_mck_pmd_desc) -+ -+static int pfm_mck_pmc_check(struct pfm_context *ctx, -+ struct pfm_event_set *set, -+ struct pfarg_pmc *req) -+{ -+ struct pfm_arch_context *ctx_arch; -+ u64 val8 = 0, val14 = 0, val13 = 0; -+ u64 tmpval; -+ u16 cnum; -+ int ret = 0, check_case1 = 0; -+ int is_system; -+ -+ tmpval = req->reg_value; -+ cnum = req->reg_num; -+ ctx_arch = pfm_ctx_arch(ctx); -+ is_system = ctx->flags.system; -+ -+#define PFM_MCK_PMC_PM_POS6 (1UL<<6) -+#define PFM_MCK_PMC_PM_POS4 (1UL<<4) -+ -+ switch (cnum) { -+ case 4: -+ case 5: -+ case 6: -+ case 7: -+ case 11: -+ case 12: -+ if (is_system) -+ tmpval |= PFM_MCK_PMC_PM_POS6; -+ else -+ tmpval &= ~PFM_MCK_PMC_PM_POS6; -+ break; -+ -+ case 8: -+ val8 = tmpval; -+ val13 = set->pmcs[13]; -+ val14 = set->pmcs[14]; -+ check_case1 = 1; -+ break; -+ -+ case 10: -+ if (is_system) -+ tmpval |= PFM_MCK_PMC_PM_POS4; -+ else -+ tmpval &= ~PFM_MCK_PMC_PM_POS4; -+ break; -+ -+ case 13: -+ val8 = set->pmcs[8]; -+ val13 = tmpval; -+ val14 = set->pmcs[14]; -+ check_case1 = 1; -+ break; -+ -+ case 14: -+ val8 = set->pmcs[8]; -+ val13 = set->pmcs[13]; -+ val14 = tmpval; -+ check_case1 = 1; -+ break; -+ } -+ -+ /* -+ * check illegal configuration which can produce inconsistencies -+ * in tagging i-side events in L1D and L2 caches -+ */ -+ if (check_case1) { -+ ret = (((val13 >> 45) & 0xf) == 0 && ((val8 & 0x1) == 0)) -+ && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0) -+ || (((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0)); -+ -+ if (ret) { -+ PFM_DBG("perfmon: invalid config pmc8=0x%lx " -+ "pmc13=0x%lx pmc14=0x%lx", -+ val8, val13, val14); -+ return -EINVAL; -+ } -+ } -+ -+ /* -+ * check if configuration implicitely activates the use of -+ * the debug registers. If true, then we ensure that this is -+ * possible and that we do not pick up stale value in the HW -+ * registers. -+ * -+ * We postpone the checks of pmc13 and pmc14 to avoid side effects -+ * in case of errors -+ */ -+ -+ /* -+ * pmc13 is "active" if: -+ * one of the pmc13.cfg_dbrpXX field is different from 0x3 -+ * AND -+ * at the corresponding pmc13.ena_dbrpXX is set. -+ */ -+ if (cnum == 13 && (tmpval & 0x1e00000000000UL) -+ && (tmpval & 0x18181818UL) != 0x18181818UL -+ && ctx_arch->flags.use_dbr == 0) { -+ PFM_DBG("pmc13=0x%lx active", tmpval); -+ ret = pfm_ia64_mark_dbregs_used(ctx, set); -+ if (ret) -+ return ret; -+ } -+ -+ /* -+ * if any pmc14.ibrpX bit is enabled we must clear the ibrs -+ */ -+ if (cnum == 14 && ((tmpval & 0x2222UL) != 0x2222UL) -+ && ctx_arch->flags.use_dbr == 0) { -+ PFM_DBG("pmc14=0x%lx active", tmpval); -+ ret = pfm_ia64_mark_dbregs_used(ctx, set); -+ if (ret) -+ return ret; -+ } -+ -+ req->reg_value = tmpval; -+ -+ return 0; -+} -+ -+static int pfm_mck_probe_pmu(void) -+{ -+ return local_cpu_data->family == 0x1f ? 0 : -1; -+} -+ -+/* -+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors! -+ */ -+static struct pfm_pmu_config pfm_mck_pmu_conf = { -+ .pmu_name = "Itanium 2", -+ .counter_width = 47, -+ .pmd_desc = pfm_mck_pmd_desc, -+ .pmc_desc = pfm_mck_pmc_desc, -+ .pmc_write_check = pfm_mck_pmc_check, -+ .num_pmc_entries = PFM_MCK_NUM_PMCS, -+ .num_pmd_entries = PFM_MCK_NUM_PMDS, -+ .probe_pmu = pfm_mck_probe_pmu, -+ .version = "1.0", -+ .flags = PFM_PMU_BUILTIN_FLAG, -+ .owner = THIS_MODULE, -+ .pmu_info = &pfm_mck_pmu_info, -+}; -+ -+static int __init pfm_mck_pmu_init_module(void) -+{ -+ return pfm_pmu_register(&pfm_mck_pmu_conf); -+} -+ -+static void __exit pfm_mck_pmu_cleanup_module(void) -+{ -+ pfm_pmu_unregister(&pfm_mck_pmu_conf); -+} -+ -+module_init(pfm_mck_pmu_init_module); -+module_exit(pfm_mck_pmu_cleanup_module); -diff --git a/arch/ia64/perfmon/perfmon_montecito.c b/arch/ia64/perfmon/perfmon_montecito.c -new file mode 100644 -index 0000000..3f76f73 ---- /dev/null -+++ b/arch/ia64/perfmon/perfmon_montecito.c -@@ -0,0 +1,412 @@ -+/* -+ * This file contains the McKinley PMU register description tables -+ * and pmc checker used by perfmon.c. -+ * -+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed Stephane Eranian -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+ -+MODULE_AUTHOR("Stephane Eranian "); -+MODULE_DESCRIPTION("Dual-Core Itanium 2 (Montecito) PMU description table"); -+MODULE_LICENSE("GPL"); -+ -+#define RDEP(x) (1UL << (x)) -+ -+#define PFM_MONT_MASK_PMCS (RDEP(4)|RDEP(5)|RDEP(6)|RDEP(7)|\ -+ RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|\ -+ RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|\ -+ RDEP(37)|RDEP(39)|RDEP(40)|RDEP(42)) -+ -+#define PFM_MONT_NO64 (1UL<<5) -+ -+static struct pfm_arch_pmu_info pfm_mont_pmu_info = { -+ .mask_pmcs = {PFM_MONT_MASK_PMCS,}, -+}; -+ -+#define PFM_MONT_RSVD 0xffffffff838000a0UL -+/* -+ * -+ * For debug registers, writing xBR(y) means we use also xBR(y+1). Hence using -+ * PMC256+y means we use PMC256+y+1. Yet, we do not have dependency information -+ * but this is fine because they are handled separately in the IA-64 specific -+ * code. -+ * -+ * For PMC4-PMC15, PMC40: we force pmc.ism=2 (IA-64 mode only) -+ */ -+static struct pfm_regmap_desc pfm_mont_pmc_desc[] = { -+/* pmc0 */ PMX_NA, -+/* pmc1 */ PMX_NA, -+/* pmc2 */ PMX_NA, -+/* pmc3 */ PMX_NA, -+/* pmc4 */ PMC_D(PFM_REG_W64, "PMC4" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 4), -+/* pmc5 */ PMC_D(PFM_REG_W64, "PMC5" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 5), -+/* pmc6 */ PMC_D(PFM_REG_W64, "PMC6" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 6), -+/* pmc7 */ PMC_D(PFM_REG_W64, "PMC7" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 7), -+/* pmc8 */ PMC_D(PFM_REG_W64, "PMC8" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 8), -+/* pmc9 */ PMC_D(PFM_REG_W64, "PMC9" , 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 9), -+/* pmc10 */ PMC_D(PFM_REG_W64, "PMC10", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 10), -+/* pmc11 */ PMC_D(PFM_REG_W64, "PMC11", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 11), -+/* pmc12 */ PMC_D(PFM_REG_W64, "PMC12", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 12), -+/* pmc13 */ PMC_D(PFM_REG_W64, "PMC13", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 13), -+/* pmc14 */ PMC_D(PFM_REG_W64, "PMC14", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 14), -+/* pmc15 */ PMC_D(PFM_REG_W64, "PMC15", 0x2000020UL, PFM_MONT_RSVD, PFM_MONT_NO64, 15), -+/* pmc16 */ PMX_NA, -+/* pmc17 */ PMX_NA, -+/* pmc18 */ PMX_NA, -+/* pmc19 */ PMX_NA, -+/* pmc20 */ PMX_NA, -+/* pmc21 */ PMX_NA, -+/* pmc22 */ PMX_NA, -+/* pmc23 */ PMX_NA, -+/* pmc24 */ PMX_NA, -+/* pmc25 */ PMX_NA, -+/* pmc26 */ PMX_NA, -+/* pmc27 */ PMX_NA, -+/* pmc28 */ PMX_NA, -+/* pmc29 */ PMX_NA, -+/* pmc30 */ PMX_NA, -+/* pmc31 */ PMX_NA, -+/* pmc32 */ PMC_D(PFM_REG_W , "PMC32", 0x30f01ffffffffffUL, 0xfcf0fe0000000000UL, 0, 32), -+/* pmc33 */ PMC_D(PFM_REG_W , "PMC33", 0x0, 0xfffffe0000000000UL, 0, 33), -+/* pmc34 */ PMC_D(PFM_REG_W , "PMC34", 0xf01ffffffffffUL, 0xfff0fe0000000000UL, 0, 34), -+/* pmc35 */ PMC_D(PFM_REG_W , "PMC35", 0x0, 0x1ffffffffffUL, 0, 35), -+/* pmc36 */ PMC_D(PFM_REG_W , "PMC36", 0xfffffff0UL, 0xfffffffffffffff0UL, 0, 36), -+/* pmc37 */ PMC_D(PFM_REG_W , "PMC37", 0x0, 0xffffffffffffc000UL, 0, 37), -+/* pmc38 */ PMC_D(PFM_REG_W , "PMC38", 0xdb6UL, 0xffffffffffffdb6dUL, 0, 38), -+/* pmc39 */ PMC_D(PFM_REG_W , "PMC39", 0x0, 0xffffffffffff0030UL, 0, 39), -+/* pmc40 */ PMC_D(PFM_REG_W , "PMC40", 0x2000000UL, 0xfffffffffff0fe30UL, 0, 40), -+/* pmc41 */ PMC_D(PFM_REG_W , "PMC41", 0x00002078fefefefeUL, 0xfffe1fffe7e7e7e7UL, 0, 41), -+/* pmc42 */ PMC_D(PFM_REG_W , "PMC42", 0x0, 0xfff800b0UL, 0, 42), -+/* pmc43 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc48 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc56 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc64 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc72 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc80 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc88 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc96 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc104 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc112 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc120 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc128 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc136 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc144 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc152 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc160 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc168 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc176 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc184 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc192 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc200 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc208 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc216 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc224 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc232 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc240 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc248 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc256 */ PMC_D(PFM_REG_W, "IBR0", 0x0, 0, 0, 0), -+/* pmc257 */ PMC_D(PFM_REG_W, "IBR1", 0x0, 0x8000000000000000UL, 0, 1), -+/* pmc258 */ PMC_D(PFM_REG_W, "IBR2", 0x0, 0, 0, 2), -+/* pmc259 */ PMC_D(PFM_REG_W, "IBR3", 0x0, 0x8000000000000000UL, 0, 3), -+/* pmc260 */ PMC_D(PFM_REG_W, "IBR4", 0x0, 0, 0, 4), -+/* pmc261 */ PMC_D(PFM_REG_W, "IBR5", 0x0, 0x8000000000000000UL, 0, 5), -+/* pmc262 */ PMC_D(PFM_REG_W, "IBR6", 0x0, 0, 0, 6), -+/* pmc263 */ PMC_D(PFM_REG_W, "IBR7", 0x0, 0x8000000000000000UL, 0, 7), -+/* pmc264 */ PMC_D(PFM_REG_W, "DBR0", 0x0, 0, 0, 0), -+/* pmc265 */ PMC_D(PFM_REG_W, "DBR1", 0x0, 0xc000000000000000UL, 0, 1), -+/* pmc266 */ PMC_D(PFM_REG_W, "DBR2", 0x0, 0, 0, 2), -+/* pmc267 */ PMC_D(PFM_REG_W, "DBR3", 0x0, 0xc000000000000000UL, 0, 3), -+/* pmc268 */ PMC_D(PFM_REG_W, "DBR4", 0x0, 0, 0, 4), -+/* pmc269 */ PMC_D(PFM_REG_W, "DBR5", 0x0, 0xc000000000000000UL, 0, 5), -+/* pmc270 */ PMC_D(PFM_REG_W, "DBR6", 0x0, 0, 0, 6), -+/* pmc271 */ PMC_D(PFM_REG_W, "DBR7", 0x0, 0xc000000000000000UL, 0, 7) -+}; -+#define PFM_MONT_NUM_PMCS ARRAY_SIZE(pfm_mont_pmc_desc) -+ -+static struct pfm_regmap_desc pfm_mont_pmd_desc[] = { -+/* pmd0 */ PMX_NA, -+/* pmd1 */ PMX_NA, -+/* pmd2 */ PMX_NA, -+/* pmd3 */ PMX_NA, -+/* pmd4 */ PMD_DP(PFM_REG_C, "PMD4", 4, 1ull << 4), -+/* pmd5 */ PMD_DP(PFM_REG_C, "PMD5", 5, 1ull << 5), -+/* pmd6 */ PMD_DP(PFM_REG_C, "PMD6", 6, 1ull << 6), -+/* pmd7 */ PMD_DP(PFM_REG_C, "PMD7", 7, 1ull << 7), -+/* pmd8 */ PMD_DP(PFM_REG_C, "PMD8", 8, 1ull << 8), -+/* pmd9 */ PMD_DP(PFM_REG_C, "PMD9", 9, 1ull << 9), -+/* pmd10 */ PMD_DP(PFM_REG_C, "PMD10", 10, 1ull << 10), -+/* pmd11 */ PMD_DP(PFM_REG_C, "PMD11", 11, 1ull << 11), -+/* pmd12 */ PMD_DP(PFM_REG_C, "PMD12", 12, 1ull << 12), -+/* pmd13 */ PMD_DP(PFM_REG_C, "PMD13", 13, 1ull << 13), -+/* pmd14 */ PMD_DP(PFM_REG_C, "PMD14", 14, 1ull << 14), -+/* pmd15 */ PMD_DP(PFM_REG_C, "PMD15", 15, 1ull << 15), -+/* pmd16 */ PMX_NA, -+/* pmd17 */ PMX_NA, -+/* pmd18 */ PMX_NA, -+/* pmd19 */ PMX_NA, -+/* pmd20 */ PMX_NA, -+/* pmd21 */ PMX_NA, -+/* pmd22 */ PMX_NA, -+/* pmd23 */ PMX_NA, -+/* pmd24 */ PMX_NA, -+/* pmd25 */ PMX_NA, -+/* pmd26 */ PMX_NA, -+/* pmd27 */ PMX_NA, -+/* pmd28 */ PMX_NA, -+/* pmd29 */ PMX_NA, -+/* pmd30 */ PMX_NA, -+/* pmd31 */ PMX_NA, -+/* pmd32 */ PMD_DP(PFM_REG_I, "PMD32", 32, 1ull << 40), -+/* pmd33 */ PMD_DP(PFM_REG_I, "PMD33", 33, 1ull << 40), -+/* pmd34 */ PMD_DP(PFM_REG_I, "PMD34", 34, 1ull << 37), -+/* pmd35 */ PMD_DP(PFM_REG_I, "PMD35", 35, 1ull << 37), -+/* pmd36 */ PMD_DP(PFM_REG_I, "PMD36", 36, 1ull << 40), -+/* pmd37 */ PMX_NA, -+/* pmd38 */ PMD_DP(PFM_REG_I, "PMD38", 38, (1ull<<39)|(1ull<<42)), -+/* pmd39 */ PMD_DP(PFM_REG_I, "PMD39", 39, (1ull<<39)|(1ull<<42)), -+/* pmd40 */ PMX_NA, -+/* pmd41 */ PMX_NA, -+/* pmd42 */ PMX_NA, -+/* pmd43 */ PMX_NA, -+/* pmd44 */ PMX_NA, -+/* pmd45 */ PMX_NA, -+/* pmd46 */ PMX_NA, -+/* pmd47 */ PMX_NA, -+/* pmd48 */ PMD_DP(PFM_REG_I, "PMD48", 48, (1ull<<39)|(1ull<<42)), -+/* pmd49 */ PMD_DP(PFM_REG_I, "PMD49", 49, (1ull<<39)|(1ull<<42)), -+/* pmd50 */ PMD_DP(PFM_REG_I, "PMD50", 50, (1ull<<39)|(1ull<<42)), -+/* pmd51 */ PMD_DP(PFM_REG_I, "PMD51", 51, (1ull<<39)|(1ull<<42)), -+/* pmd52 */ PMD_DP(PFM_REG_I, "PMD52", 52, (1ull<<39)|(1ull<<42)), -+/* pmd53 */ PMD_DP(PFM_REG_I, "PMD53", 53, (1ull<<39)|(1ull<<42)), -+/* pmd54 */ PMD_DP(PFM_REG_I, "PMD54", 54, (1ull<<39)|(1ull<<42)), -+/* pmd55 */ PMD_DP(PFM_REG_I, "PMD55", 55, (1ull<<39)|(1ull<<42)), -+/* pmd56 */ PMD_DP(PFM_REG_I, "PMD56", 56, (1ull<<39)|(1ull<<42)), -+/* pmd57 */ PMD_DP(PFM_REG_I, "PMD57", 57, (1ull<<39)|(1ull<<42)), -+/* pmd58 */ PMD_DP(PFM_REG_I, "PMD58", 58, (1ull<<39)|(1ull<<42)), -+/* pmd59 */ PMD_DP(PFM_REG_I, "PMD59", 59, (1ull<<39)|(1ull<<42)), -+/* pmd60 */ PMD_DP(PFM_REG_I, "PMD60", 60, (1ull<<39)|(1ull<<42)), -+/* pmd61 */ PMD_DP(PFM_REG_I, "PMD61", 61, (1ull<<39)|(1ull<<42)), -+/* pmd62 */ PMD_DP(PFM_REG_I, "PMD62", 62, (1ull<<39)|(1ull<<42)), -+/* pmd63 */ PMD_DP(PFM_REG_I, "PMD63", 63, (1ull<<39)|(1ull<<42)) -+}; -+#define PFM_MONT_NUM_PMDS ARRAY_SIZE(pfm_mont_pmd_desc) -+ -+static int pfm_mont_has_ht; -+ -+static int pfm_mont_pmc_check(struct pfm_context *ctx, -+ struct pfm_event_set *set, -+ struct pfarg_pmc *req) -+{ -+ struct pfm_arch_context *ctx_arch; -+ u64 val32 = 0, val38 = 0, val41 = 0; -+ u64 tmpval; -+ u16 cnum; -+ int ret = 0, check_case1 = 0; -+ int is_system; -+ -+ tmpval = req->reg_value; -+ cnum = req->reg_num; -+ ctx_arch = pfm_ctx_arch(ctx); -+ is_system = ctx->flags.system; -+ -+#define PFM_MONT_PMC_PM_POS6 (1UL<<6) -+#define PFM_MONT_PMC_PM_POS4 (1UL<<4) -+ -+ switch (cnum) { -+ case 4: -+ case 5: -+ case 6: -+ case 7: -+ case 8: -+ case 9: -+ if (is_system) -+ tmpval |= PFM_MONT_PMC_PM_POS6; -+ else -+ tmpval &= ~PFM_MONT_PMC_PM_POS6; -+ break; -+ case 10: -+ case 11: -+ case 12: -+ case 13: -+ case 14: -+ case 15: -+ if ((req->reg_flags & PFM_REGFL_NO_EMUL64) == 0) { -+ if (pfm_mont_has_ht) { -+ PFM_INFO("perfmon: Errata 121 PMD10/PMD15 cannot be used to overflow" -+ "when threads on on"); -+ return -EINVAL; -+ } -+ } -+ if (is_system) -+ tmpval |= PFM_MONT_PMC_PM_POS6; -+ else -+ tmpval &= ~PFM_MONT_PMC_PM_POS6; -+ break; -+ case 39: -+ case 40: -+ case 42: -+ if (pfm_mont_has_ht && ((req->reg_value >> 8) & 0x7) == 4) { -+ PFM_INFO("perfmon: Errata 120: IP-EAR not available when threads are on"); -+ return -EINVAL; -+ } -+ if (is_system) -+ tmpval |= PFM_MONT_PMC_PM_POS6; -+ else -+ tmpval &= ~PFM_MONT_PMC_PM_POS6; -+ break; -+ -+ case 32: -+ val32 = tmpval; -+ val38 = set->pmcs[38]; -+ val41 = set->pmcs[41]; -+ check_case1 = 1; -+ break; -+ -+ case 37: -+ if (is_system) -+ tmpval |= PFM_MONT_PMC_PM_POS4; -+ else -+ tmpval &= ~PFM_MONT_PMC_PM_POS4; -+ break; -+ -+ case 38: -+ val38 = tmpval; -+ val32 = set->pmcs[32]; -+ val41 = set->pmcs[41]; -+ check_case1 = 1; -+ break; -+ case 41: -+ val41 = tmpval; -+ val32 = set->pmcs[32]; -+ val38 = set->pmcs[38]; -+ check_case1 = 1; -+ break; -+ } -+ -+ if (check_case1) { -+ ret = (((val41 >> 45) & 0xf) == 0 && ((val32>>57) & 0x1) == 0) -+ && ((((val38>>1) & 0x3) == 0x2 || ((val38>>1) & 0x3) == 0) -+ || (((val38>>4) & 0x3) == 0x2 || ((val38>>4) & 0x3) == 0)); -+ if (ret) { -+ PFM_DBG("perfmon: invalid config pmc38=0x%lx " -+ "pmc41=0x%lx pmc32=0x%lx", -+ val38, val41, val32); -+ return -EINVAL; -+ } -+ } -+ -+ /* -+ * check if configuration implicitely activates the use of the -+ * debug registers. If true, then we ensure that this is possible -+ * and that we do not pick up stale value in the HW registers. -+ */ -+ -+ /* -+ * -+ * pmc41 is "active" if: -+ * one of the pmc41.cfgdtagXX field is different from 0x3 -+ * AND -+ * the corsesponding pmc41.en_dbrpXX is set. -+ * AND -+ * ctx_fl_use_dbr (dbr not yet used) -+ */ -+ if (cnum == 41 -+ && (tmpval & 0x1e00000000000) -+ && (tmpval & 0x18181818) != 0x18181818 -+ && ctx_arch->flags.use_dbr == 0) { -+ PFM_DBG("pmc41=0x%lx active, clearing dbr", tmpval); -+ ret = pfm_ia64_mark_dbregs_used(ctx, set); -+ if (ret) -+ return ret; -+ } -+ /* -+ * we must clear the (instruction) debug registers if: -+ * pmc38.ig_ibrpX is 0 (enabled) -+ * and -+ * fl_use_dbr == 0 (dbr not yet used) -+ */ -+ if (cnum == 38 && ((tmpval & 0x492) != 0x492) -+ && ctx_arch->flags.use_dbr == 0) { -+ PFM_DBG("pmc38=0x%lx active pmc38, clearing ibr", tmpval); -+ ret = pfm_ia64_mark_dbregs_used(ctx, set); -+ if (ret) -+ return ret; -+ -+ } -+ req->reg_value = tmpval; -+ return 0; -+} -+ -+static void pfm_handle_errata(void) -+{ -+ pfm_mont_has_ht = 1; -+ -+ PFM_INFO("activating workaround for errata 120 " -+ "(Disable IP-EAR when threads are on)"); -+ -+ PFM_INFO("activating workaround for Errata 121 " -+ "(PMC10-PMC15 cannot be used to overflow" -+ " when threads are on"); -+} -+static int pfm_mont_probe_pmu(void) -+{ -+ if (local_cpu_data->family != 0x20) -+ return -1; -+ -+ /* -+ * the 2 errata must be activated when -+ * threads are/can be enabled -+ */ -+ if (is_multithreading_enabled()) -+ pfm_handle_errata(); -+ -+ return 0; -+} -+ -+/* -+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors! -+ */ -+static struct pfm_pmu_config pfm_mont_pmu_conf = { -+ .pmu_name = "Montecito", -+ .counter_width = 47, -+ .pmd_desc = pfm_mont_pmd_desc, -+ .pmc_desc = pfm_mont_pmc_desc, -+ .num_pmc_entries = PFM_MONT_NUM_PMCS, -+ .num_pmd_entries = PFM_MONT_NUM_PMDS, -+ .pmc_write_check = pfm_mont_pmc_check, -+ .probe_pmu = pfm_mont_probe_pmu, -+ .version = "1.0", -+ .pmu_info = &pfm_mont_pmu_info, -+ .flags = PFM_PMU_BUILTIN_FLAG, -+ .owner = THIS_MODULE -+}; -+ -+static int __init pfm_mont_pmu_init_module(void) -+{ -+ return pfm_pmu_register(&pfm_mont_pmu_conf); -+} -+ -+static void __exit pfm_mont_pmu_cleanup_module(void) -+{ -+ pfm_pmu_unregister(&pfm_mont_pmu_conf); -+} -+ -+module_init(pfm_mont_pmu_init_module); -+module_exit(pfm_mont_pmu_cleanup_module); -diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig -index 1e06d23..b87f445 100644 ---- a/arch/mips/Kconfig -+++ b/arch/mips/Kconfig -@@ -1857,6 +1857,8 @@ config SECCOMP - - If unsure, say Y. Only embedded should say N here. - -+source "arch/mips/perfmon/Kconfig" -+ - endmenu - - config RWSEM_GENERIC_SPINLOCK -diff --git a/arch/mips/Makefile b/arch/mips/Makefile -index 9aab51c..712acf7 100644 ---- a/arch/mips/Makefile -+++ b/arch/mips/Makefile -@@ -154,6 +154,12 @@ endif - endif - - # -+# Perfmon support -+# -+ -+core-$(CONFIG_PERFMON) += arch/mips/perfmon/ -+ -+# - # Firmware support - # - libs-$(CONFIG_ARC) += arch/mips/fw/arc/ -diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c -index 22fc19b..4467361 100644 ---- a/arch/mips/kernel/process.c -+++ b/arch/mips/kernel/process.c -@@ -27,6 +27,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -94,6 +95,7 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp) - - void exit_thread(void) - { -+ pfm_exit_thread(); - } - - void flush_thread(void) -@@ -162,6 +164,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, - if (clone_flags & CLONE_SETTLS) - ti->tp_value = regs->regs[7]; - -+ pfm_copy_thread(p); -+ - return 0; - } - -diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S -index 5e75a31..e96ddd6 100644 ---- a/arch/mips/kernel/scall32-o32.S -+++ b/arch/mips/kernel/scall32-o32.S -@@ -653,6 +653,18 @@ einval: li v0, -EINVAL - sys sys_dup3 3 - sys sys_pipe2 2 - sys sys_inotify_init1 1 -+ sys sys_pfm_create_context 4 /* 4330 */ -+ sys sys_pfm_write_pmcs 3 -+ sys sys_pfm_write_pmds 4 -+ sys sys_pfm_read_pmds 3 -+ sys sys_pfm_load_context 2 -+ sys sys_pfm_start 2 /* 4335 */ -+ sys sys_pfm_stop 1 -+ sys sys_pfm_restart 1 -+ sys sys_pfm_create_evtsets 3 -+ sys sys_pfm_getinfo_evtsets 3 -+ sys sys_pfm_delete_evtsets 3 /* 4340 */ -+ sys sys_pfm_unload_context 1 - .endm - - /* We pre-compute the number of _instruction_ bytes needed to -diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S -index 3d58204..adb2ba9 100644 ---- a/arch/mips/kernel/scall64-64.S -+++ b/arch/mips/kernel/scall64-64.S -@@ -487,4 +487,16 @@ sys_call_table: - PTR sys_dup3 - PTR sys_pipe2 - PTR sys_inotify_init1 -+ PTR sys_pfm_create_context -+ PTR sys_pfm_write_pmcs /* 5290 */ -+ PTR sys_pfm_write_pmds -+ PTR sys_pfm_read_pmds -+ PTR sys_pfm_load_context -+ PTR sys_pfm_start -+ PTR sys_pfm_stop /* 5295 */ -+ PTR sys_pfm_restart -+ PTR sys_pfm_create_evtsets -+ PTR sys_pfm_getinfo_evtsets -+ PTR sys_pfm_delete_evtsets -+ PTR sys_pfm_unload_context /* 5300 */ - .size sys_call_table,.-sys_call_table -diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S -index da7f1b6..6d12095 100644 ---- a/arch/mips/kernel/scall64-n32.S -+++ b/arch/mips/kernel/scall64-n32.S -@@ -400,12 +400,12 @@ EXPORT(sysn32_call_table) - PTR sys_ioprio_set - PTR sys_ioprio_get - PTR compat_sys_utimensat -- PTR compat_sys_signalfd /* 5280 */ -+ PTR compat_sys_signalfd /* 6280 */ - PTR sys_ni_syscall - PTR sys_eventfd - PTR sys_fallocate - PTR sys_timerfd_create -- PTR sys_timerfd_gettime /* 5285 */ -+ PTR sys_timerfd_gettime /* 6285 */ - PTR sys_timerfd_settime - PTR sys_signalfd4 - PTR sys_eventfd2 -@@ -413,4 +413,16 @@ EXPORT(sysn32_call_table) - PTR sys_dup3 /* 5290 */ - PTR sys_pipe2 - PTR sys_inotify_init1 -+ PTR sys_pfm_create_context -+ PTR sys_pfm_write_pmcs -+ PTR sys_pfm_write_pmds /* 6295 */ -+ PTR sys_pfm_read_pmds -+ PTR sys_pfm_load_context -+ PTR sys_pfm_start -+ PTR sys_pfm_stop -+ PTR sys_pfm_restart /* 6300 */ -+ PTR sys_pfm_create_evtsets -+ PTR sys_pfm_getinfo_evtsets -+ PTR sys_pfm_delete_evtsets -+ PTR sys_pfm_unload_context - .size sysn32_call_table,.-sysn32_call_table -diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S -index d7cd1aa..e77f55a 100644 ---- a/arch/mips/kernel/scall64-o32.S -+++ b/arch/mips/kernel/scall64-o32.S -@@ -535,4 +535,16 @@ sys_call_table: - PTR sys_dup3 - PTR sys_pipe2 - PTR sys_inotify_init1 -+ PTR sys_pfm_create_context /* 4330 */ -+ PTR sys_pfm_write_pmcs -+ PTR sys_pfm_write_pmds -+ PTR sys_pfm_read_pmds -+ PTR sys_pfm_load_context -+ PTR sys_pfm_start /* 4335 */ -+ PTR sys_pfm_stop -+ PTR sys_pfm_restart -+ PTR sys_pfm_create_evtsets -+ PTR sys_pfm_getinfo_evtsets -+ PTR sys_pfm_delete_evtsets /* 4340 */ -+ PTR sys_pfm_unload_context - .size sys_call_table,.-sys_call_table -diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c -index a4e106c..6a7e60c 100644 ---- a/arch/mips/kernel/signal.c -+++ b/arch/mips/kernel/signal.c -@@ -20,6 +20,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -694,8 +695,11 @@ static void do_signal(struct pt_regs *regs) - * - triggered by the TIF_WORK_MASK flags - */ - asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused, -- __u32 thread_info_flags) -+ __u32 thread_info_flags) - { -+ if (thread_info_flags & _TIF_PERFMON_WORK) -+ pfm_handle_work(regs); -+ - /* deal with pending signal delivery */ - if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) - do_signal(regs); -diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c -index 1f467d5..163dfe4 100644 ---- a/arch/mips/kernel/time.c -+++ b/arch/mips/kernel/time.c -@@ -49,10 +49,11 @@ int update_persistent_clock(struct timespec now) - return rtc_mips_set_mmss(now.tv_sec); - } - --static int null_perf_irq(void) -+int null_perf_irq(void) - { - return 0; - } -+EXPORT_SYMBOL(null_perf_irq); - - int (*perf_irq)(void) = null_perf_irq; - -diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c -index b602ac6..9cbd75f 100644 ---- a/arch/mips/kernel/traps.c -+++ b/arch/mips/kernel/traps.c -@@ -92,17 +92,15 @@ static void show_raw_backtrace(unsigned long reg29) - #ifdef CONFIG_KALLSYMS - printk("\n"); - #endif -- while (!kstack_end(sp)) { -- unsigned long __user *p = -- (unsigned long __user *)(unsigned long)sp++; -- if (__get_user(addr, p)) { -- printk(" (Bad stack address)"); -- break; -+#define IS_KVA01(a) ((((unsigned long)a) & 0xc0000000) == 0x80000000) -+ if (IS_KVA01(sp)) { -+ while (!kstack_end(sp)) { -+ addr = *sp++; -+ if (__kernel_text_address(addr)) -+ print_ip_sym(addr); - } -- if (__kernel_text_address(addr)) -- print_ip_sym(addr); -+ printk("\n"); - } -- printk("\n"); - } - - #ifdef CONFIG_KALLSYMS -diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c -index 0b97d47..d8f36b5 100644 ---- a/arch/mips/mti-malta/malta-time.c -+++ b/arch/mips/mti-malta/malta-time.c -@@ -27,6 +27,7 @@ - #include - #include - #include -+#include - - #include - #include -diff --git a/arch/mips/perfmon/Kconfig b/arch/mips/perfmon/Kconfig -new file mode 100644 -index 0000000..b426eea ---- /dev/null -+++ b/arch/mips/perfmon/Kconfig -@@ -0,0 +1,61 @@ -+menu "Hardware Performance Monitoring support" -+config PERFMON -+ bool "Perfmon2 performance monitoring interface" -+ default n -+ help -+ Enables the perfmon2 interface to access the hardware -+ performance counters. See for -+ more details. -+ -+config PERFMON_DEBUG -+ bool "Perfmon debugging" -+ default n -+ depends on PERFMON -+ help -+ Enables perfmon debugging support -+ -+config PERFMON_DEBUG_FS -+ bool "Enable perfmon statistics reporting via debugfs" -+ default y -+ depends on PERFMON && DEBUG_FS -+ help -+ Enable collection and reporting of perfmon timing statistics under -+ debugfs. This is used for debugging and performance analysis of the -+ subsystem. The debugfs filesystem must be mounted. -+ -+config PERFMON_FLUSH -+ bool "Flush sampling buffer when modified" -+ depends on PERFMON -+ default n -+ help -+ On some MIPS models, cache aliasing may cause invalid -+ data to be read from the perfmon sampling buffer. Use this option -+ to flush the buffer when it is modified to ensure valid data is -+ visible at the user level. -+ -+config PERFMON_ALIGN -+ bool "Align sampling buffer to avoid cache aliasing" -+ depends on PERFMON -+ default n -+ help -+ On some MIPS models, cache aliasing may cause invalid -+ data to be read from the perfmon sampling buffer. By forcing a bigger -+ page alignment (4-page), one can guarantee the buffer virtual address -+ will conflict in the cache with the user level mapping of the buffer -+ thereby ensuring a consistent view by user programs. -+ -+config PERFMON_DEBUG -+ bool "Perfmon debugging" -+ depends on PERFMON -+ default n -+ depends on PERFMON -+ help -+ Enables perfmon debugging support -+ -+config PERFMON_MIPS64 -+ tristate "Support for MIPS64 hardware performance counters" -+ depends on PERFMON -+ default n -+ help -+ Enables support for the MIPS64 hardware performance counters" -+endmenu -diff --git a/arch/mips/perfmon/Makefile b/arch/mips/perfmon/Makefile -new file mode 100644 -index 0000000..153b83f ---- /dev/null -+++ b/arch/mips/perfmon/Makefile -@@ -0,0 +1,2 @@ -+obj-$(CONFIG_PERFMON) += perfmon.o -+obj-$(CONFIG_PERFMON_MIPS64) += perfmon_mips64.o -diff --git a/arch/mips/perfmon/perfmon.c b/arch/mips/perfmon/perfmon.c -new file mode 100644 -index 0000000..6615a77 ---- /dev/null -+++ b/arch/mips/perfmon/perfmon.c -@@ -0,0 +1,313 @@ -+/* -+ * This file implements the MIPS64 specific -+ * support for the perfmon2 interface -+ * -+ * Copyright (c) 2005 Philip J. Mucci -+ * -+ * based on versions for other architectures: -+ * Copyright (c) 2005 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+ -+/* -+ * collect pending overflowed PMDs. Called from pfm_ctxsw() -+ * and from PMU interrupt handler. Must fill in set->povfl_pmds[] -+ * and set->npend_ovfls. Interrupts are masked -+ */ -+static void __pfm_get_ovfl_pmds(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ u64 new_val, wmask; -+ u64 *used_mask, *intr_pmds; -+ u64 mask[PFM_PMD_BV]; -+ unsigned int i, max; -+ -+ max = ctx->regs.max_intr_pmd; -+ intr_pmds = ctx->regs.intr_pmds; -+ used_mask = set->used_pmds; -+ -+ wmask = 1ULL << pfm_pmu_conf->counter_width; -+ -+ bitmap_and(cast_ulp(mask), -+ cast_ulp(intr_pmds), -+ cast_ulp(used_mask), -+ max); -+ -+ /* -+ * check all PMD that can generate interrupts -+ * (that includes counters) -+ */ -+ for (i = 0; i < max; i++) { -+ if (test_bit(i, mask)) { -+ new_val = pfm_arch_read_pmd(ctx, i); -+ -+ PFM_DBG_ovfl("pmd%u new_val=0x%llx bit=%d\n", -+ i, (unsigned long long)new_val, -+ (new_val&wmask) ? 1 : 0); -+ -+ if (new_val & wmask) { -+ __set_bit(i, set->povfl_pmds); -+ set->npend_ovfls++; -+ } -+ } -+ } -+} -+ -+static void pfm_stop_active(struct task_struct *task, struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ unsigned int i, max; -+ -+ max = ctx->regs.max_pmc; -+ -+ /* -+ * clear enable bits, assume all pmcs are enable pmcs -+ */ -+ for (i = 0; i < max; i++) { -+ if (test_bit(i, set->used_pmcs)) -+ pfm_arch_write_pmc(ctx, i, 0); -+ } -+ -+ if (set->npend_ovfls) -+ return; -+ -+ __pfm_get_ovfl_pmds(ctx, set); -+} -+ -+/* -+ * Called from pfm_ctxsw(). Task is guaranteed to be current. -+ * Context is locked. Interrupts are masked. Monitoring is active. -+ * PMU access is guaranteed. PMC and PMD registers are live in PMU. -+ * -+ * for per-thread: -+ * must stop monitoring for the task -+ * -+ * Return: -+ * non-zero : did not save PMDs (as part of stopping the PMU) -+ * 0 : saved PMDs (no need to save them in caller) -+ */ -+int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx) -+{ -+ /* -+ * disable lazy restore of PMC registers. -+ */ -+ ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS; -+ -+ /* -+ * if masked, monitoring is stopped, thus there is no -+ * need to stop the PMU again and there is no need to -+ * check for pending overflows. This is not just an -+ * optimization, this is also for correctness as you -+ * may end up detecting overflows twice. -+ */ -+ if (ctx->state == PFM_CTX_MASKED) -+ return 1; -+ -+ pfm_stop_active(task, ctx, ctx->active_set); -+ -+ return 1; -+} -+ -+/* -+ * Called from pfm_stop() and pfm_ctxsw() -+ * Interrupts are masked. Context is locked. Set is the active set. -+ * -+ * For per-thread: -+ * task is not necessarily current. If not current task, then -+ * task is guaranteed stopped and off any cpu. Access to PMU -+ * is not guaranteed. Interrupts are masked. Context is locked. -+ * Set is the active set. -+ * -+ * For system-wide: -+ * task is current -+ * -+ * must disable active monitoring. ctx cannot be NULL -+ */ -+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx) -+{ -+ /* -+ * no need to go through stop_save() -+ * if we are already stopped -+ */ -+ if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED) -+ return; -+ -+ /* -+ * stop live registers and collect pending overflow -+ */ -+ if (task == current) -+ pfm_stop_active(task, ctx, ctx->active_set); -+} -+ -+/* -+ * called from pfm_start() or pfm_ctxsw() when idle task and -+ * EXCL_IDLE is on. -+ * -+ * Interrupts are masked. Context is locked. Set is the active set. -+ * -+ * For per-trhead: -+ * Task is not necessarily current. If not current task, then task -+ * is guaranteed stopped and off any cpu. Access to PMU is not guaranteed. -+ * -+ * For system-wide: -+ * task is always current -+ * -+ * must enable active monitoring. -+ */ -+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx) -+{ -+ struct pfm_event_set *set; -+ unsigned int i, max_pmc; -+ -+ if (task != current) -+ return; -+ -+ set = ctx->active_set; -+ max_pmc = ctx->regs.max_pmc; -+ -+ for (i = 0; i < max_pmc; i++) { -+ if (test_bit(i, set->used_pmcs)) -+ pfm_arch_write_pmc(ctx, i, set->pmcs[i]); -+ } -+} -+ -+/* -+ * function called from pfm_switch_sets(), pfm_context_load_thread(), -+ * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets() -+ * context is locked. Interrupts are masked. set cannot be NULL. -+ * Access to the PMU is guaranteed. -+ * -+ * function must restore all PMD registers from set. -+ */ -+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ u64 ovfl_mask, val; -+ u64 *impl_pmds; -+ unsigned int i; -+ unsigned int max_pmd; -+ -+ max_pmd = ctx->regs.max_pmd; -+ ovfl_mask = pfm_pmu_conf->ovfl_mask; -+ impl_pmds = ctx->regs.pmds; -+ -+ /* -+ * must restore all pmds to avoid leaking -+ * information to user. -+ */ -+ for (i = 0; i < max_pmd; i++) { -+ -+ if (test_bit(i, impl_pmds) == 0) -+ continue; -+ -+ val = set->pmds[i].value; -+ -+ /* -+ * set upper bits for counter to ensure -+ * overflow will trigger -+ */ -+ val &= ovfl_mask; -+ -+ pfm_arch_write_pmd(ctx, i, val); -+ } -+} -+ -+/* -+ * function called from pfm_switch_sets(), pfm_context_load_thread(), -+ * pfm_context_load_sys(), pfm_ctxsw(). -+ * Context is locked. Interrupts are masked. set cannot be NULL. -+ * Access to the PMU is guaranteed. -+ * -+ * function must restore all PMC registers from set, if needed. -+ */ -+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ u64 *impl_pmcs; -+ unsigned int i, max_pmc; -+ -+ max_pmc = ctx->regs.max_pmc; -+ impl_pmcs = ctx->regs.pmcs; -+ -+ /* -+ * - by default no PMCS measures anything -+ * - on ctxswout, all used PMCs are disabled (cccr enable bit cleared) -+ * hence when masked we do not need to restore anything -+ */ -+ if (ctx->state == PFM_CTX_MASKED || ctx->flags.started == 0) -+ return; -+ -+ /* -+ * restore all pmcs -+ */ -+ for (i = 0; i < max_pmc; i++) -+ if (test_bit(i, impl_pmcs)) -+ pfm_arch_write_pmc(ctx, i, set->pmcs[i]); -+} -+ -+char *pfm_arch_get_pmu_module_name(void) -+{ -+ switch (cpu_data->cputype) { -+#ifndef CONFIG_SMP -+ case CPU_34K: -+#if defined(CPU_74K) -+ case CPU_74K: -+#endif -+#endif -+ case CPU_SB1: -+ case CPU_SB1A: -+ case CPU_R12000: -+ case CPU_25KF: -+ case CPU_24K: -+ case CPU_20KC: -+ case CPU_5KC: -+ return "perfmon_mips64"; -+ default: -+ return NULL; -+ } -+ return NULL; -+} -+ -+int perfmon_perf_irq(void) -+{ -+ /* BLATANTLY STOLEN FROM OPROFILE, then modified */ -+ struct pt_regs *regs; -+ unsigned int counters = pfm_pmu_conf->regs_all.max_pmc; -+ unsigned int control; -+ unsigned int counter; -+ -+ regs = get_irq_regs(); -+ switch (counters) { -+#define HANDLE_COUNTER(n) \ -+ case n + 1: \ -+ control = read_c0_perfctrl ## n(); \ -+ counter = read_c0_perfcntr ## n(); \ -+ if ((control & MIPS64_PMC_INT_ENABLE_MASK) && \ -+ (counter & MIPS64_PMD_INTERRUPT)) { \ -+ pfm_interrupt_handler(instruction_pointer(regs),\ -+ regs); \ -+ return(1); \ -+ } -+ HANDLE_COUNTER(3) -+ HANDLE_COUNTER(2) -+ HANDLE_COUNTER(1) -+ HANDLE_COUNTER(0) -+ } -+ -+ return 0; -+} -+EXPORT_SYMBOL(perfmon_perf_irq); -diff --git a/arch/mips/perfmon/perfmon_mips64.c b/arch/mips/perfmon/perfmon_mips64.c -new file mode 100644 -index 0000000..78cb43d ---- /dev/null -+++ b/arch/mips/perfmon/perfmon_mips64.c -@@ -0,0 +1,218 @@ -+/* -+ * This file contains the MIPS64 and decendent PMU register description tables -+ * and pmc checker used by perfmon.c. -+ * -+ * Copyright (c) 2005 Philip Mucci -+ * -+ * Based on perfmon_p6.c: -+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+ -+MODULE_AUTHOR("Philip Mucci "); -+MODULE_DESCRIPTION("MIPS64 PMU description tables"); -+MODULE_LICENSE("GPL"); -+ -+/* -+ * reserved: -+ * - bit 63-9 -+ * RSVD: reserved bits must be 1 -+ */ -+#define PFM_MIPS64_PMC_RSVD 0xfffffffffffff810ULL -+#define PFM_MIPS64_PMC_VAL (1ULL<<4) -+ -+extern int null_perf_irq(struct pt_regs *regs); -+extern int (*perf_irq)(struct pt_regs *regs); -+extern int perfmon_perf_irq(struct pt_regs *regs); -+ -+static struct pfm_arch_pmu_info pfm_mips64_pmu_info; -+ -+static struct pfm_regmap_desc pfm_mips64_pmc_desc[] = { -+/* pmc0 */ PMC_D(PFM_REG_I64, "CP0_25_0", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 0), -+/* pmc1 */ PMC_D(PFM_REG_I64, "CP0_25_1", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 1), -+/* pmc2 */ PMC_D(PFM_REG_I64, "CP0_25_2", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 2), -+/* pmc3 */ PMC_D(PFM_REG_I64, "CP0_25_3", PFM_MIPS64_PMC_VAL, PFM_MIPS64_PMC_RSVD, 0, 3) -+}; -+#define PFM_MIPS64_NUM_PMCS ARRAY_SIZE(pfm_mips64_pmc_desc) -+ -+static struct pfm_regmap_desc pfm_mips64_pmd_desc[] = { -+/* pmd0 */ PMD_D(PFM_REG_C, "CP0_25_0", 0), -+/* pmd1 */ PMD_D(PFM_REG_C, "CP0_25_1", 1), -+/* pmd2 */ PMD_D(PFM_REG_C, "CP0_25_2", 2), -+/* pmd3 */ PMD_D(PFM_REG_C, "CP0_25_3", 3) -+}; -+#define PFM_MIPS64_NUM_PMDS ARRAY_SIZE(pfm_mips64_pmd_desc) -+ -+static int pfm_mips64_probe_pmu(void) -+{ -+ struct cpuinfo_mips *c = ¤t_cpu_data; -+ -+ switch (c->cputype) { -+#ifndef CONFIG_SMP -+ case CPU_34K: -+#if defined(CPU_74K) -+ case CPU_74K: -+#endif -+#endif -+ case CPU_SB1: -+ case CPU_SB1A: -+ case CPU_R12000: -+ case CPU_25KF: -+ case CPU_24K: -+ case CPU_20KC: -+ case CPU_5KC: -+ return 0; -+ break; -+ default: -+ PFM_INFO("Unknown cputype 0x%x", c->cputype); -+ } -+ return -1; -+} -+ -+/* -+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors! -+ */ -+static struct pfm_pmu_config pfm_mips64_pmu_conf = { -+ .pmu_name = "MIPS", /* placeholder */ -+ .counter_width = 31, -+ .pmd_desc = pfm_mips64_pmd_desc, -+ .pmc_desc = pfm_mips64_pmc_desc, -+ .num_pmc_entries = PFM_MIPS64_NUM_PMCS, -+ .num_pmd_entries = PFM_MIPS64_NUM_PMDS, -+ .probe_pmu = pfm_mips64_probe_pmu, -+ .flags = PFM_PMU_BUILTIN_FLAG, -+ .owner = THIS_MODULE, -+ .pmu_info = &pfm_mips64_pmu_info -+}; -+ -+static inline int n_counters(void) -+{ -+ if (!(read_c0_config1() & MIPS64_CONFIG_PMC_MASK)) -+ return 0; -+ if (!(read_c0_perfctrl0() & MIPS64_PMC_CTR_MASK)) -+ return 1; -+ if (!(read_c0_perfctrl1() & MIPS64_PMC_CTR_MASK)) -+ return 2; -+ if (!(read_c0_perfctrl2() & MIPS64_PMC_CTR_MASK)) -+ return 3; -+ return 4; -+} -+ -+static int __init pfm_mips64_pmu_init_module(void) -+{ -+ struct cpuinfo_mips *c = ¤t_cpu_data; -+ int i, ret, num; -+ u64 temp_mask; -+ -+ switch (c->cputype) { -+ case CPU_5KC: -+ pfm_mips64_pmu_conf.pmu_name = "MIPS5KC"; -+ break; -+ case CPU_R12000: -+ pfm_mips64_pmu_conf.pmu_name = "MIPSR12000"; -+ break; -+ case CPU_20KC: -+ pfm_mips64_pmu_conf.pmu_name = "MIPS20KC"; -+ break; -+ case CPU_24K: -+ pfm_mips64_pmu_conf.pmu_name = "MIPS24K"; -+ break; -+ case CPU_25KF: -+ pfm_mips64_pmu_conf.pmu_name = "MIPS25KF"; -+ break; -+ case CPU_SB1: -+ pfm_mips64_pmu_conf.pmu_name = "SB1"; -+ break; -+ case CPU_SB1A: -+ pfm_mips64_pmu_conf.pmu_name = "SB1A"; -+ break; -+#ifndef CONFIG_SMP -+ case CPU_34K: -+ pfm_mips64_pmu_conf.pmu_name = "MIPS34K"; -+ break; -+#if defined(CPU_74K) -+ case CPU_74K: -+ pfm_mips64_pmu_conf.pmu_name = "MIPS74K"; -+ break; -+#endif -+#endif -+ default: -+ PFM_INFO("Unknown cputype 0x%x", c->cputype); -+ return -1; -+ } -+ -+ /* The R14k and older performance counters have to */ -+ /* be hard-coded, as there is no support for auto-detection */ -+ if ((c->cputype == CPU_R12000) || (c->cputype == CPU_R14000)) -+ num = 4; -+ else if (c->cputype == CPU_R10000) -+ num = 2; -+ else -+ num = n_counters(); -+ -+ if (num == 0) { -+ PFM_INFO("cputype 0x%x has no counters", c->cputype); -+ return -1; -+ } -+ /* mark remaining counters unavailable */ -+ for (i = num; i < PFM_MIPS64_NUM_PMCS; i++) -+ pfm_mips64_pmc_desc[i].type = PFM_REG_NA; -+ -+ for (i = num; i < PFM_MIPS64_NUM_PMDS; i++) -+ pfm_mips64_pmd_desc[i].type = PFM_REG_NA; -+ -+ /* set the PMC_RSVD mask */ -+ switch (c->cputype) { -+ case CPU_5KC: -+ case CPU_R10000: -+ case CPU_20KC: -+ /* 4-bits for event */ -+ temp_mask = 0xfffffffffffffe10ULL; -+ break; -+ case CPU_R12000: -+ case CPU_R14000: -+ /* 5-bits for event */ -+ temp_mask = 0xfffffffffffffc10ULL; -+ break; -+ default: -+ /* 6-bits for event */ -+ temp_mask = 0xfffffffffffff810ULL; -+ } -+ for (i = 0; i < PFM_MIPS64_NUM_PMCS; i++) -+ pfm_mips64_pmc_desc[i].rsvd_msk = temp_mask; -+ -+ pfm_mips64_pmu_conf.num_pmc_entries = num; -+ pfm_mips64_pmu_conf.num_pmd_entries = num; -+ -+ pfm_mips64_pmu_info.pmu_style = c->cputype; -+ -+ ret = pfm_pmu_register(&pfm_mips64_pmu_conf); -+ if (ret == 0) -+ perf_irq = perfmon_perf_irq; -+ return ret; -+} -+ -+static void __exit pfm_mips64_pmu_cleanup_module(void) -+{ -+ pfm_pmu_unregister(&pfm_mips64_pmu_conf); -+ perf_irq = null_perf_irq; -+} -+ -+module_init(pfm_mips64_pmu_init_module); -+module_exit(pfm_mips64_pmu_cleanup_module); -diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig -index 587da5e..a411389 100644 ---- a/arch/powerpc/Kconfig -+++ b/arch/powerpc/Kconfig -@@ -230,6 +230,8 @@ source "init/Kconfig" - source "arch/powerpc/sysdev/Kconfig" - source "arch/powerpc/platforms/Kconfig" - -+source "arch/powerpc/perfmon/Kconfig" -+ - menu "Kernel options" - - config HIGHMEM -diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile -index c6be19e..7ea20cb 100644 ---- a/arch/powerpc/Makefile -+++ b/arch/powerpc/Makefile -@@ -146,6 +146,7 @@ core-y += arch/powerpc/kernel/ \ - arch/powerpc/platforms/ - core-$(CONFIG_MATH_EMULATION) += arch/powerpc/math-emu/ - core-$(CONFIG_XMON) += arch/powerpc/xmon/ -+core-$(CONFIG_PERFMON) += arch/powerpc/perfmon/ - core-$(CONFIG_KVM) += arch/powerpc/kvm/ - - drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ -diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild -index 5ab7d7f..88cb533 100644 ---- a/arch/powerpc/include/asm/Kbuild -+++ b/arch/powerpc/include/asm/Kbuild -@@ -21,6 +21,7 @@ header-y += resource.h - header-y += sigcontext.h - header-y += statfs.h - header-y += ps3fb.h -+header-y += perfmon.h - - unifdef-y += bootx.h - unifdef-y += byteorder.h -diff --git a/arch/powerpc/include/asm/cell-pmu.h b/arch/powerpc/include/asm/cell-pmu.h -index 8066eed..981db26 100644 ---- a/arch/powerpc/include/asm/cell-pmu.h -+++ b/arch/powerpc/include/asm/cell-pmu.h -@@ -61,6 +61,11 @@ - - /* Macros for the pm_status register. */ - #define CBE_PM_CTR_OVERFLOW_INTR(ctr) (1 << (31 - ((ctr) & 7))) -+#define CBE_PM_OVERFLOW_CTRS(pm_status) (((pm_status) >> 24) & 0xff) -+#define CBE_PM_ALL_OVERFLOW_INTR 0xff000000 -+#define CBE_PM_INTERVAL_INTR 0x00800000 -+#define CBE_PM_TRACE_BUFFER_FULL_INTR 0x00400000 -+#define CBE_PM_TRACE_BUFFER_UNDERFLOW_INTR 0x00200000 - - enum pm_reg_name { - group_control, -diff --git a/arch/powerpc/include/asm/cell-regs.h b/arch/powerpc/include/asm/cell-regs.h -index fd6fd00..580786d 100644 ---- a/arch/powerpc/include/asm/cell-regs.h -+++ b/arch/powerpc/include/asm/cell-regs.h -@@ -117,8 +117,9 @@ struct cbe_pmd_regs { - u8 pad_0x0c1c_0x0c20 [4]; /* 0x0c1c */ - #define CBE_PMD_FIR_MODE_M8 0x00800 - u64 fir_enable_mask; /* 0x0c20 */ -- -- u8 pad_0x0c28_0x0ca8 [0x0ca8 - 0x0c28]; /* 0x0c28 */ -+ u8 pad_0x0c28_0x0c98 [0x0c98 - 0x0c28]; /* 0x0c28 */ -+ u64 on_ramp_trace; /* 0x0c98 */ -+ u64 pad_0x0ca0; /* 0x0ca0 */ - u64 ras_esc_0; /* 0x0ca8 */ - u8 pad_0x0cb0_0x1000 [0x1000 - 0x0cb0]; /* 0x0cb0 */ - }; -@@ -218,7 +219,11 @@ extern struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu); - - - struct cbe_mic_tm_regs { -- u8 pad_0x0000_0x0040[0x0040 - 0x0000]; /* 0x0000 */ -+ u8 pad_0x0000_0x0010[0x0010 - 0x0000]; /* 0x0000 */ -+ -+ u64 MBL_debug; /* 0x0010 */ -+ -+ u8 pad_0x0018_0x0040[0x0040 - 0x0018]; /* 0x0018 */ - - u64 mic_ctl_cnfg2; /* 0x0040 */ - #define CBE_MIC_ENABLE_AUX_TRC 0x8000000000000000LL -@@ -303,6 +308,25 @@ struct cbe_mic_tm_regs { - extern struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np); - extern struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu); - -+/* -+ * -+ * PPE Privileged MMIO Registers definition. (offset 0x500000 - 0x500fff) -+ * -+ */ -+struct cbe_ppe_priv_regs { -+ u8 pad_0x0000_0x0858[0x0858 - 0x0000]; /* 0x0000 */ -+ -+ u64 L2_debug1; /* 0x0858 */ -+ -+ u8 pad_0x0860_0x0958[0x0958 - 0x0860]; /* 0x0860 */ -+ -+ u64 ciu_dr1; /* 0x0958 */ -+ -+ u8 pad_0x0960_0x1000[0x1000 - 0x0960]; /* 0x0960 */ -+}; -+ -+extern struct cbe_ppe_priv_regs __iomem *cbe_get_cpu_ppe_priv_regs(int cpu); -+ - /* some utility functions to deal with SMT */ - extern u32 cbe_get_hw_thread_id(int cpu); - extern u32 cbe_cpu_to_node(int cpu); -diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h -index 6493a39..ba9ead4 100644 ---- a/arch/powerpc/include/asm/paca.h -+++ b/arch/powerpc/include/asm/paca.h -@@ -97,6 +97,10 @@ struct paca_struct { - u8 soft_enabled; /* irq soft-enable flag */ - u8 hard_enabled; /* set if irqs are enabled in MSR */ - u8 io_sync; /* writel() needs spin_unlock sync */ -+#ifdef CONFIG_PERFMON -+ u8 pmu_except_pending; /* PMU exception occurred while soft -+ * disabled */ -+#endif - - /* Stuff for accurate time accounting */ - u64 user_time; /* accumulated usermode TB ticks */ -diff --git a/arch/powerpc/include/asm/perfmon.h b/arch/powerpc/include/asm/perfmon.h -new file mode 100644 -index 0000000..da0ae3b ---- /dev/null -+++ b/arch/powerpc/include/asm/perfmon.h -@@ -0,0 +1,33 @@ -+/* -+ * Copyright (c) 2007 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This file contains powerpc specific definitions for the perfmon -+ * interface. -+ * -+ * This file MUST never be included directly. Use linux/perfmon.h. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#ifndef _ASM_POWERPC_PERFMON_H_ -+#define _ASM_POWERPC_PERFMON_H_ -+ -+/* -+ * arch-specific user visible interface definitions -+ */ -+#define PFM_ARCH_MAX_PMCS (256+64) /* 256 HW 64 SW */ -+#define PFM_ARCH_MAX_PMDS (256+64) /* 256 HW 64 SW */ -+ -+#endif /* _ASM_POWERPC_PERFMON_H_ */ -diff --git a/arch/powerpc/include/asm/perfmon_kern.h b/arch/powerpc/include/asm/perfmon_kern.h -new file mode 100644 -index 0000000..65ec984 ---- /dev/null -+++ b/arch/powerpc/include/asm/perfmon_kern.h -@@ -0,0 +1,390 @@ -+/* -+ * Copyright (c) 2005 David Gibson, IBM Corporation. -+ * -+ * Based on other versions: -+ * Copyright (c) 2005 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This file contains powerpc specific definitions for the perfmon -+ * interface. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#ifndef _ASM_POWERPC_PERFMON_KERN_H_ -+#define _ASM_POWERPC_PERFMON_KERN_H_ -+ -+#ifdef __KERNEL__ -+ -+#ifdef CONFIG_PERFMON -+ -+#include -+#include -+ -+#define HID0_PMC5_6_GR_MODE (1UL << (63 - 40)) -+ -+enum powerpc_pmu_type { -+ PFM_POWERPC_PMU_NONE, -+ PFM_POWERPC_PMU_604, -+ PFM_POWERPC_PMU_604e, -+ PFM_POWERPC_PMU_750, /* XXX: Minor event set diffs between IBM and Moto. */ -+ PFM_POWERPC_PMU_7400, -+ PFM_POWERPC_PMU_7450, -+ PFM_POWERPC_PMU_POWER4, -+ PFM_POWERPC_PMU_POWER5, -+ PFM_POWERPC_PMU_POWER5p, -+ PFM_POWERPC_PMU_POWER6, -+ PFM_POWERPC_PMU_CELL, -+}; -+ -+struct pfm_arch_pmu_info { -+ enum powerpc_pmu_type pmu_style; -+ -+ void (*write_pmc)(unsigned int cnum, u64 value); -+ void (*write_pmd)(unsigned int cnum, u64 value); -+ -+ u64 (*read_pmd)(unsigned int cnum); -+ -+ void (*enable_counters)(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+ void (*disable_counters)(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+ -+ void (*irq_handler)(struct pt_regs *regs, struct pfm_context *ctx); -+ void (*get_ovfl_pmds)(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+ -+ /* The following routines are optional. */ -+ void (*restore_pmcs)(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+ void (*restore_pmds)(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+ -+ int (*ctxswout_thread)(struct task_struct *task, -+ struct pfm_context *ctx, -+ struct pfm_event_set *set); -+ void (*ctxswin_thread)(struct task_struct *task, -+ struct pfm_context *ctx, -+ struct pfm_event_set *set); -+ int (*load_context)(struct pfm_context *ctx); -+ void (*unload_context)(struct pfm_context *ctx); -+ int (*acquire_pmu)(u64 *unavail_pmcs, u64 *unavail_pmds); -+ void (*release_pmu)(void); -+ void *platform_info; -+ void (*resend_irq)(struct pfm_context *ctx); -+}; -+ -+#ifdef CONFIG_PPC32 -+#define PFM_ARCH_PMD_STK_ARG 6 /* conservative value */ -+#define PFM_ARCH_PMC_STK_ARG 6 /* conservative value */ -+#else -+#define PFM_ARCH_PMD_STK_ARG 8 /* conservative value */ -+#define PFM_ARCH_PMC_STK_ARG 8 /* conservative value */ -+#endif -+ -+static inline void pfm_arch_resend_irq(struct pfm_context *ctx) -+{ -+ struct pfm_arch_pmu_info *arch_info; -+ -+ arch_info = pfm_pmu_info(); -+ arch_info->resend_irq(ctx); -+} -+ -+static inline void pfm_arch_serialize(void) -+{} -+ -+static inline void pfm_arch_write_pmc(struct pfm_context *ctx, -+ unsigned int cnum, -+ u64 value) -+{ -+ struct pfm_arch_pmu_info *arch_info; -+ -+ arch_info = pfm_pmu_info(); -+ -+ /* -+ * we only write to the actual register when monitoring is -+ * active (pfm_start was issued) -+ */ -+ if (ctx && ctx->flags.started == 0) -+ return; -+ -+ BUG_ON(!arch_info->write_pmc); -+ -+ arch_info->write_pmc(cnum, value); -+} -+ -+static inline void pfm_arch_write_pmd(struct pfm_context *ctx, -+ unsigned int cnum, u64 value) -+{ -+ struct pfm_arch_pmu_info *arch_info; -+ -+ arch_info = pfm_pmu_info(); -+ -+ value &= pfm_pmu_conf->ovfl_mask; -+ -+ BUG_ON(!arch_info->write_pmd); -+ -+ arch_info->write_pmd(cnum, value); -+} -+ -+static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum) -+{ -+ struct pfm_arch_pmu_info *arch_info; -+ -+ arch_info = pfm_pmu_info(); -+ -+ BUG_ON(!arch_info->read_pmd); -+ -+ return arch_info->read_pmd(cnum); -+} -+ -+/* -+ * For some CPUs, the upper bits of a counter must be set in order for the -+ * overflow interrupt to happen. On overflow, the counter has wrapped around, -+ * and the upper bits are cleared. This function may be used to set them back. -+ */ -+static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx, -+ unsigned int cnum) -+{ -+ u64 val = pfm_arch_read_pmd(ctx, cnum); -+ -+ /* This masks out overflow bit 31 */ -+ pfm_arch_write_pmd(ctx, cnum, val); -+} -+ -+/* -+ * At certain points, perfmon needs to know if monitoring has been -+ * explicitely started/stopped by user via pfm_start/pfm_stop. The -+ * information is tracked in flags.started. However on certain -+ * architectures, it may be possible to start/stop directly from -+ * user level with a single assembly instruction bypassing -+ * the kernel. This function must be used to determine by -+ * an arch-specific mean if monitoring is actually started/stopped. -+ */ -+static inline int pfm_arch_is_active(struct pfm_context *ctx) -+{ -+ return ctx->flags.started; -+} -+ -+static inline void pfm_arch_ctxswout_sys(struct task_struct *task, -+ struct pfm_context *ctx) -+{} -+ -+static inline void pfm_arch_ctxswin_sys(struct task_struct *task, -+ struct pfm_context *ctx) -+{} -+ -+void pfm_arch_init_percpu(void); -+int pfm_arch_is_monitoring_active(struct pfm_context *ctx); -+int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx); -+void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx); -+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx); -+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx); -+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set); -+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set); -+void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx, struct pfm_event_set *set); -+int pfm_arch_get_ovfl_pmds(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+char *pfm_arch_get_pmu_module_name(void); -+/* -+ * called from __pfm_interrupt_handler(). ctx is not NULL. -+ * ctx is locked. PMU interrupt is masked. -+ * -+ * must stop all monitoring to ensure handler has consistent view. -+ * must collect overflowed PMDs bitmask into povfls_pmds and -+ * npend_ovfls. If no interrupt detected then npend_ovfls -+ * must be set to zero. -+ */ -+static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ pfm_arch_stop(current, ctx); -+} -+ -+void powerpc_irq_handler(struct pt_regs *regs); -+ -+/* -+ * unfreeze PMU from pfm_do_interrupt_handler() -+ * ctx may be NULL for spurious -+ */ -+static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx) -+{ -+ struct pfm_arch_pmu_info *arch_info; -+ -+ if (!ctx) -+ return; -+ -+ PFM_DBG_ovfl("state=%d", ctx->state); -+ -+ ctx->flags.started = 1; -+ -+ if (ctx->state == PFM_CTX_MASKED) -+ return; -+ -+ arch_info = pfm_pmu_info(); -+ BUG_ON(!arch_info->enable_counters); -+ arch_info->enable_counters(ctx, ctx->active_set); -+} -+ -+/* -+ * PowerPC does not save the PMDs during pfm_arch_intr_freeze_pmu(), thus -+ * this routine needs to do it when switching sets on overflow -+ */ -+static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ pfm_save_pmds(ctx, set); -+} -+ -+/* -+ * this function is called from the PMU interrupt handler ONLY. -+ * On PPC, the PMU is frozen via arch_stop, masking would be implemented -+ * via arch-stop as well. Given that the PMU is already stopped when -+ * entering the interrupt handler, we do not need to stop it again, so -+ * this function is a nop. -+ */ -+static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{} -+ -+/* -+ * Simply need to start the context in order to unmask. -+ */ -+static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ pfm_arch_start(current, ctx); -+} -+ -+ -+static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg) -+{ -+ return 0; -+} -+ -+static inline int pfm_arch_context_create(struct pfm_context *ctx, -+ u32 ctx_flags) -+{ -+ return 0; -+} -+ -+static inline void pfm_arch_context_free(struct pfm_context *ctx) -+{} -+ -+/* not necessary on PowerPC */ -+static inline void pfm_cacheflush(void *addr, unsigned int len) -+{} -+ -+/* -+ * function called from pfm_setfl_sane(). Context is locked -+ * and interrupts are masked. -+ * The value of flags is the value of ctx_flags as passed by -+ * user. -+ * -+ * function must check arch-specific set flags. -+ * Return: -+ * 1 when flags are valid -+ * 0 on error -+ */ -+static inline int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags) -+{ -+ return 0; -+} -+ -+static inline int pfm_arch_init(void) -+{ -+ return 0; -+} -+ -+static inline int pfm_arch_load_context(struct pfm_context *ctx) -+{ -+ struct pfm_arch_pmu_info *arch_info; -+ int rc = 0; -+ -+ arch_info = pfm_pmu_info(); -+ if (arch_info->load_context) -+ rc = arch_info->load_context(ctx); -+ -+ return rc; -+} -+ -+static inline void pfm_arch_unload_context(struct pfm_context *ctx) -+{ -+ struct pfm_arch_pmu_info *arch_info; -+ -+ arch_info = pfm_pmu_info(); -+ if (arch_info->unload_context) -+ arch_info->unload_context(ctx); -+} -+ -+static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds) -+{ -+ struct pfm_arch_pmu_info *arch_info; -+ int rc = 0; -+ -+ arch_info = pfm_pmu_info(); -+ if (arch_info->acquire_pmu) { -+ rc = arch_info->acquire_pmu(unavail_pmcs, unavail_pmds); -+ if (rc) -+ return rc; -+ } -+ -+ return reserve_pmc_hardware(powerpc_irq_handler); -+} -+ -+static inline void pfm_arch_pmu_release(void) -+{ -+ struct pfm_arch_pmu_info *arch_info; -+ -+ arch_info = pfm_pmu_info(); -+ if (arch_info->release_pmu) -+ arch_info->release_pmu(); -+ -+ release_pmc_hardware(); -+} -+ -+static inline void pfm_arch_arm_handle_work(struct task_struct *task) -+{} -+ -+static inline void pfm_arch_disarm_handle_work(struct task_struct *task) -+{} -+ -+static inline int pfm_arch_get_base_syscall(void) -+{ -+ return __NR_pfm_create_context; -+} -+ -+struct pfm_arch_context { -+ /* Cell: Most recent value of the pm_status -+ * register read by the interrupt handler. -+ * -+ * Interrupt handler sets last_read_updated if it -+ * just read and updated last_read_pm_status -+ */ -+ u32 last_read_pm_status; -+ u32 last_read_updated; -+ u64 powergs_pmc5, powergs_pmc6; -+ u64 delta_tb, delta_tb_start; -+ u64 delta_purr, delta_purr_start; -+}; -+ -+#define PFM_ARCH_CTX_SIZE sizeof(struct pfm_arch_context) -+/* -+ * PowerPC does not need extra alignment requirements for the sampling buffer -+ */ -+#define PFM_ARCH_SMPL_ALIGN_SIZE 0 -+ -+#endif /* CONFIG_PERFMON */ -+ -+#endif /* __KERNEL__ */ -+#endif /* _ASM_POWERPC_PERFMON_KERN_H_ */ -diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h -index c6d1ab6..a9f3ad0 100644 ---- a/arch/powerpc/include/asm/reg.h -+++ b/arch/powerpc/include/asm/reg.h -@@ -698,6 +698,7 @@ - #define PV_POWER5 0x003A - #define PV_POWER5p 0x003B - #define PV_970FX 0x003C -+#define PV_POWER6 0x003E - #define PV_630 0x0040 - #define PV_630p 0x0041 - #define PV_970MP 0x0044 -diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h -index f6cc7a4..0164841 100644 ---- a/arch/powerpc/include/asm/systbl.h -+++ b/arch/powerpc/include/asm/systbl.h -@@ -322,3 +322,15 @@ SYSCALL_SPU(epoll_create1) - SYSCALL_SPU(dup3) - SYSCALL_SPU(pipe2) - SYSCALL(inotify_init1) -+SYSCALL(pfm_create_context) -+SYSCALL(pfm_write_pmcs) -+SYSCALL(pfm_write_pmds) -+SYSCALL(pfm_read_pmds) -+SYSCALL(pfm_load_context) -+SYSCALL(pfm_start) -+SYSCALL(pfm_stop) -+SYSCALL(pfm_restart) -+SYSCALL(pfm_create_evtsets) -+SYSCALL(pfm_getinfo_evtsets) -+SYSCALL(pfm_delete_evtsets) -+SYSCALL(pfm_unload_context) -diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h -index 9665a26..6cda9f9 100644 ---- a/arch/powerpc/include/asm/thread_info.h -+++ b/arch/powerpc/include/asm/thread_info.h -@@ -130,10 +130,12 @@ static inline struct thread_info *current_thread_info(void) - #define _TIF_FREEZE (1< 0x10000 on 4xx/Book-E since it include MSR_CE. - */ - #if MSR_KERNEL >= 0x10000 --#define LOAD_MSR_KERNEL(r, x) lis r,(x)@h; ori r,r,(x)@l -+#define LOAD_MSR_KERNEL(r, x) lis r,(x)@ha; ori r,r,(x)@l - #else - #define LOAD_MSR_KERNEL(r, x) li r,(x) - #endif -diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S -index 2d802e9..77a090d 100644 ---- a/arch/powerpc/kernel/entry_64.S -+++ b/arch/powerpc/kernel/entry_64.S -@@ -643,6 +643,10 @@ user_work: - b .ret_from_except_lite - - 1: bl .save_nvgprs -+#ifdef CONFIG_PERFMON -+ addi r3,r1,STACK_FRAME_OVERHEAD -+ bl .pfm_handle_work -+#endif /* CONFIG_PERFMON */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl .do_signal - b .ret_from_except -diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c -index d972dec..b255fba 100644 ---- a/arch/powerpc/kernel/irq.c -+++ b/arch/powerpc/kernel/irq.c -@@ -104,6 +104,24 @@ static inline notrace void set_soft_enabled(unsigned long enable) - : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); - } - -+#ifdef CONFIG_PERFMON -+static inline unsigned long get_pmu_except_pending(void) -+{ -+ unsigned long pending; -+ -+ __asm__ __volatile__("lbz %0,%1(13)" -+ : "=r" (pending) : "i" (offsetof(struct paca_struct, pmu_except_pending))); -+ -+ return pending; -+} -+ -+static inline void set_pmu_except_pending(unsigned long pending) -+{ -+ __asm__ __volatile__("stb %0,%1(13)" -+ : : "r" (pending), "i" (offsetof(struct paca_struct, pmu_except_pending))); -+} -+#endif /* CONFIG_PERFMON */ -+ - notrace void raw_local_irq_restore(unsigned long en) - { - /* -@@ -162,6 +180,19 @@ notrace void raw_local_irq_restore(unsigned long en) - lv1_get_version_info(&tmp); - } - -+#ifdef CONFIG_PERFMON -+ /* -+ * If a PMU exception occurred while interrupts were soft disabled, -+ * force a PMU exception. -+ */ -+ if (get_pmu_except_pending()) { -+ set_pmu_except_pending(0); -+ /* Make sure we trigger the edge detection circuitry */ -+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO); -+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO); -+ } -+#endif /* CONFIG_PERFMON */ -+ - __hard_irq_enable(); - } - EXPORT_SYMBOL(raw_local_irq_restore); -diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c -index 957bded..32dbc8e 100644 ---- a/arch/powerpc/kernel/process.c -+++ b/arch/powerpc/kernel/process.c -@@ -33,6 +33,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -393,9 +394,14 @@ struct task_struct *__switch_to(struct task_struct *prev, - new_thread->start_tb = current_tb; - } - #endif -- - local_irq_save(flags); - -+ if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW)) -+ pfm_ctxsw_out(prev, new); -+ -+ if (test_tsk_thread_flag(new, TIF_PERFMON_CTXSW)) -+ pfm_ctxsw_in(prev, new); -+ - account_system_vtime(current); - account_process_vtime(current); - calculate_steal_time(); -@@ -544,6 +550,7 @@ void show_regs(struct pt_regs * regs) - void exit_thread(void) - { - discard_lazy_cpu_state(); -+ pfm_exit_thread(); - } - - void flush_thread(void) -@@ -669,6 +676,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, - #else - kregs->nip = (unsigned long)ret_from_fork; - #endif -+ pfm_copy_thread(p); - - return 0; - } -diff --git a/arch/powerpc/perfmon/Kconfig b/arch/powerpc/perfmon/Kconfig -new file mode 100644 -index 0000000..3f4bbf2 ---- /dev/null -+++ b/arch/powerpc/perfmon/Kconfig -@@ -0,0 +1,67 @@ -+menu "Hardware Performance Monitoring support" -+config PERFMON -+ bool "Perfmon2 performance monitoring interface" -+ default n -+ help -+ Enables the perfmon2 interface to access the hardware -+ performance counters. See for -+ more details. -+ -+config PERFMON_DEBUG -+ bool "Perfmon debugging" -+ default n -+ depends on PERFMON -+ help -+ Enables perfmon debugging support -+ -+config PERFMON_DEBUG_FS -+ bool "Enable perfmon statistics reporting via debugfs" -+ default y -+ depends on PERFMON && DEBUG_FS -+ help -+ Enable collection and reporting of perfmon timing statistics under -+ debugfs. This is used for debugging and performance analysis of the -+ subsystem. The debugfs filesystem must be mounted. -+ -+config PERFMON_POWER4 -+ tristate "Support for Power4 hardware performance counters" -+ depends on PERFMON && PPC64 -+ default n -+ help -+ Enables support for the Power 4 hardware performance counters -+ If unsure, say M. -+ -+config PERFMON_POWER5 -+ tristate "Support for Power5 hardware performance counters" -+ depends on PERFMON && PPC64 -+ default n -+ help -+ Enables support for the Power 5 hardware performance counters -+ If unsure, say M. -+ -+config PERFMON_POWER6 -+ tristate "Support for Power6 hardware performance counters" -+ depends on PERFMON && PPC64 -+ default n -+ help -+ Enables support for the Power 6 hardware performance counters -+ If unsure, say M. -+ -+config PERFMON_PPC32 -+ tristate "Support for PPC32 hardware performance counters" -+ depends on PERFMON && PPC32 -+ default n -+ help -+ Enables support for the PPC32 hardware performance counters -+ If unsure, say M. -+ -+config PERFMON_CELL -+ tristate "Support for Cell hardware performance counters" -+ depends on PERFMON && PPC_CELL -+ select PS3_LPM if PPC_PS3 -+ default n -+ help -+ Enables support for the Cell hardware performance counters. -+ If unsure, say M. -+ -+endmenu -diff --git a/arch/powerpc/perfmon/Makefile b/arch/powerpc/perfmon/Makefile -new file mode 100644 -index 0000000..300661f ---- /dev/null -+++ b/arch/powerpc/perfmon/Makefile -@@ -0,0 +1,6 @@ -+obj-$(CONFIG_PERFMON) += perfmon.o -+obj-$(CONFIG_PERFMON_POWER4) += perfmon_power4.o -+obj-$(CONFIG_PERFMON_POWER5) += perfmon_power5.o -+obj-$(CONFIG_PERFMON_POWER6) += perfmon_power6.o -+obj-$(CONFIG_PERFMON_PPC32) += perfmon_ppc32.o -+obj-$(CONFIG_PERFMON_CELL) += perfmon_cell.o -diff --git a/arch/powerpc/perfmon/perfmon.c b/arch/powerpc/perfmon/perfmon.c -new file mode 100644 -index 0000000..51a8b6a ---- /dev/null -+++ b/arch/powerpc/perfmon/perfmon.c -@@ -0,0 +1,334 @@ -+/* -+ * This file implements the powerpc specific -+ * support for the perfmon2 interface -+ * -+ * Copyright (c) 2005 David Gibson, IBM Corporation. -+ * -+ * based on versions for other architectures: -+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+ -+static void pfm_stop_active(struct task_struct *task, -+ struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ struct pfm_arch_pmu_info *arch_info; -+ -+ arch_info = pfm_pmu_info(); -+ BUG_ON(!arch_info->disable_counters || !arch_info->get_ovfl_pmds); -+ -+ arch_info->disable_counters(ctx, set); -+ -+ if (set->npend_ovfls) -+ return; -+ -+ arch_info->get_ovfl_pmds(ctx, set); -+} -+ -+/* -+ * Called from pfm_save_pmds(). Interrupts are masked. Registers are -+ * already saved away. -+ */ -+void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ int i, num; -+ u64 *used_pmds, *intr_pmds; -+ -+ num = set->nused_pmds; -+ used_pmds = set->used_pmds; -+ intr_pmds = ctx->regs.intr_pmds; -+ -+ for (i = 0; num; i++) -+ if (likely(test_bit(i, used_pmds))) { -+ if (likely(test_bit(i, intr_pmds))) -+ pfm_write_pmd(ctx, i, 0); -+ num--; -+ } -+} -+ -+/* -+ * Called from pfm_ctxsw(). Task is guaranteed to be current. -+ * Context is locked. Interrupts are masked. Monitoring is active. -+ * PMU access is guaranteed. PMC and PMD registers are live in PMU. -+ * -+ * for per-thread: -+ * must stop monitoring for the task -+ * Return: -+ * non-zero : did not save PMDs (as part of stopping the PMU) -+ * 0 : saved PMDs (no need to save them in caller) -+ */ -+int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx) -+{ -+ struct pfm_arch_pmu_info *arch_info; -+ -+ arch_info = pfm_pmu_info(); -+ /* -+ * disable lazy restore of the PMC/PMD registers. -+ */ -+ ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_BOTH; -+ -+ if (ctx->state == PFM_CTX_MASKED) -+ return 1; -+ -+ pfm_stop_active(task, ctx, ctx->active_set); -+ -+ if (arch_info->ctxswout_thread) -+ arch_info->ctxswout_thread(task, ctx, ctx->active_set); -+ -+ return pfm_arch_is_active(ctx); -+} -+ -+/* -+ * Called from pfm_ctxsw -+ */ -+void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx) -+{ -+ struct pfm_arch_pmu_info *arch_info; -+ -+ arch_info = pfm_pmu_info(); -+ if (ctx->state != PFM_CTX_MASKED && ctx->flags.started == 1) { -+ BUG_ON(!arch_info->enable_counters); -+ arch_info->enable_counters(ctx, ctx->active_set); -+ } -+ -+ if (arch_info->ctxswin_thread) -+ arch_info->ctxswin_thread(task, ctx, ctx->active_set); -+} -+ -+/* -+ * Called from pfm_stop() and idle notifier -+ * -+ * Interrupts are masked. Context is locked. Set is the active set. -+ * -+ * For per-thread: -+ * task is not necessarily current. If not current task, then -+ * task is guaranteed stopped and off any cpu. Access to PMU -+ * is not guaranteed. Interrupts are masked. Context is locked. -+ * Set is the active set. -+ * -+ * For system-wide: -+ * task is current -+ * -+ * must disable active monitoring. ctx cannot be NULL -+ */ -+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx) -+{ -+ /* -+ * no need to go through stop_save() -+ * if we are already stopped -+ */ -+ if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED) -+ return; -+ -+ /* -+ * stop live registers and collect pending overflow -+ */ -+ if (task == current) -+ pfm_stop_active(task, ctx, ctx->active_set); -+} -+ -+/* -+ * Enable active monitoring. Called from pfm_start() and -+ * pfm_arch_unmask_monitoring(). -+ * -+ * Interrupts are masked. Context is locked. Set is the active set. -+ * -+ * For per-thread: -+ * Task is not necessarily current. If not current task, then task -+ * is guaranteed stopped and off any cpu. No access to PMU if task -+ * is not current. -+ * -+ * For system-wide: -+ * Task is always current -+ */ -+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx) -+{ -+ struct pfm_arch_pmu_info *arch_info; -+ -+ arch_info = pfm_pmu_info(); -+ if (task != current) -+ return; -+ -+ BUG_ON(!arch_info->enable_counters); -+ -+ arch_info->enable_counters(ctx, ctx->active_set); -+} -+ -+/* -+ * function called from pfm_switch_sets(), pfm_context_load_thread(), -+ * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets() -+ * context is locked. Interrupts are masked. set cannot be NULL. -+ * Access to the PMU is guaranteed. -+ * -+ * function must restore all PMD registers from set. -+ */ -+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ struct pfm_arch_pmu_info *arch_info; -+ u64 *used_pmds; -+ u16 i, num; -+ -+ arch_info = pfm_pmu_info(); -+ -+ /* The model-specific module can override the default -+ * restore-PMD method. -+ */ -+ if (arch_info->restore_pmds) -+ return arch_info->restore_pmds(ctx, set); -+ -+ num = set->nused_pmds; -+ used_pmds = set->used_pmds; -+ -+ for (i = 0; num; i++) { -+ if (likely(test_bit(i, used_pmds))) { -+ pfm_write_pmd(ctx, i, set->pmds[i].value); -+ num--; -+ } -+ } -+} -+ -+/* -+ * function called from pfm_switch_sets(), pfm_context_load_thread(), -+ * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets() -+ * context is locked. Interrupts are masked. set cannot be NULL. -+ * Access to the PMU is guaranteed. -+ * -+ * function must restore all PMC registers from set, if needed. -+ */ -+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ struct pfm_arch_pmu_info *arch_info; -+ u64 *impl_pmcs; -+ unsigned int i, max_pmc, reg; -+ -+ arch_info = pfm_pmu_info(); -+ /* The model-specific module can override the default -+ * restore-PMC method. -+ */ -+ if (arch_info->restore_pmcs) -+ return arch_info->restore_pmcs(ctx, set); -+ -+ /* The "common" powerpc model's enable the counters simply by writing -+ * all the control registers. Therefore, if we're masked or stopped we -+ * don't need to bother restoring the PMCs now. -+ */ -+ if (ctx->state == PFM_CTX_MASKED || ctx->flags.started == 0) -+ return; -+ -+ max_pmc = ctx->regs.max_pmc; -+ impl_pmcs = ctx->regs.pmcs; -+ -+ /* -+ * Restore all pmcs in reverse order to ensure the counters aren't -+ * enabled before their event selectors are set correctly. -+ */ -+ reg = max_pmc - 1; -+ for (i = 0; i < max_pmc; i++) { -+ if (test_bit(reg, impl_pmcs)) -+ pfm_arch_write_pmc(ctx, reg, set->pmcs[reg]); -+ reg--; -+ } -+} -+ -+char *pfm_arch_get_pmu_module_name(void) -+{ -+ unsigned int pvr = mfspr(SPRN_PVR); -+ -+ switch (PVR_VER(pvr)) { -+ case 0x0004: /* 604 */ -+ case 0x0009: /* 604e; */ -+ case 0x000A: /* 604ev */ -+ case 0x0008: /* 750/740 */ -+ case 0x7000: /* 750FX */ -+ case 0x7001: -+ case 0x7002: /* 750GX */ -+ case 0x000C: /* 7400 */ -+ case 0x800C: /* 7410 */ -+ case 0x8000: /* 7451/7441 */ -+ case 0x8001: /* 7455/7445 */ -+ case 0x8002: /* 7457/7447 */ -+ case 0x8003: /* 7447A */ -+ case 0x8004: /* 7448 */ -+ return("perfmon_ppc32"); -+ case PV_POWER4: -+ case PV_POWER4p: -+ return "perfmon_power4"; -+ case PV_POWER5: -+ return "perfmon_power5"; -+ case PV_POWER5p: -+ if (PVR_REV(pvr) < 0x300) -+ /* PMU behaves like POWER5 */ -+ return "perfmon_power5"; -+ else -+ /* PMU behaves like POWER6 */ -+ return "perfmon_power6"; -+ case PV_POWER6: -+ return "perfmon_power6"; -+ case PV_970: -+ case PV_970FX: -+ case PV_970MP: -+ return "perfmon_ppc970"; -+ case PV_BE: -+ return "perfmon_cell"; -+ } -+ return NULL; -+} -+ -+void pfm_arch_init_percpu(void) -+{ -+#ifdef CONFIG_PPC64 -+ extern void ppc64_enable_pmcs(void); -+ ppc64_enable_pmcs(); -+#endif -+} -+ -+/** -+ * powerpc_irq_handler -+ * -+ * Get the perfmon context that belongs to the current CPU, and call the -+ * model-specific interrupt handler. -+ **/ -+void powerpc_irq_handler(struct pt_regs *regs) -+{ -+ struct pfm_arch_pmu_info *arch_info; -+ struct pfm_context *ctx; -+ -+ if (! regs->softe) { -+ /* -+ * We got a PMU interrupt while interrupts were soft -+ * disabled. Disable hardware interrupts by clearing -+ * MSR_EE and also clear PMAO because we will need to set -+ * that again later when interrupts are re-enabled and -+ * raw_local_irq_restore() sees that the pmu_except_pending -+ * flag is set. -+ */ -+ regs->msr &= ~MSR_EE; -+ get_paca()->pmu_except_pending = 1; -+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO); -+ return; -+ } -+ -+ arch_info = pfm_pmu_info(); -+ if (arch_info->irq_handler) { -+ ctx = __get_cpu_var(pmu_ctx); -+ if (likely(ctx)) -+ arch_info->irq_handler(regs, ctx); -+ } -+} -diff --git a/arch/powerpc/perfmon/perfmon_cell.c b/arch/powerpc/perfmon/perfmon_cell.c -new file mode 100644 -index 0000000..e1ae12c ---- /dev/null -+++ b/arch/powerpc/perfmon/perfmon_cell.c -@@ -0,0 +1,1449 @@ -+/* -+ * This file contains the Cell PMU register description tables -+ * and pmc checker used by perfmon.c. -+ * -+ * Copyright IBM Corporation 2007 -+ * (C) Copyright 2007 TOSHIBA CORPORATION -+ * -+ * Based on other Perfmon2 PMU modules. -+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+MODULE_AUTHOR("Kevin Corry , " -+ "Carl Love "); -+MODULE_DESCRIPTION("Cell PMU description table"); -+MODULE_LICENSE("GPL"); -+ -+struct pfm_cell_platform_pmu_info { -+ u32 (*read_ctr)(u32 cpu, u32 ctr); -+ void (*write_ctr)(u32 cpu, u32 ctr, u32 val); -+ void (*write_pm07_control)(u32 cpu, u32 ctr, u32 val); -+ void (*write_pm)(u32 cpu, enum pm_reg_name reg, u32 val); -+ void (*enable_pm)(u32 cpu); -+ void (*disable_pm)(u32 cpu); -+ void (*enable_pm_interrupts)(u32 cpu, u32 thread, u32 mask); -+ u32 (*get_and_clear_pm_interrupts)(u32 cpu); -+ u32 (*get_hw_thread_id)(int cpu); -+ struct cbe_ppe_priv_regs __iomem *(*get_cpu_ppe_priv_regs)(int cpu); -+ struct cbe_pmd_regs __iomem *(*get_cpu_pmd_regs)(int cpu); -+ struct cbe_mic_tm_regs __iomem *(*get_cpu_mic_tm_regs)(int cpu); -+ int (*rtas_token)(const char *service); -+ int (*rtas_call)(int token, int param1, int param2, int *param3, ...); -+}; -+ -+/* -+ * Mapping from Perfmon logical control registers to Cell hardware registers. -+ */ -+static struct pfm_regmap_desc pfm_cell_pmc_desc[] = { -+ /* Per-counter control registers. */ -+ PMC_D(PFM_REG_I, "pm0_control", 0, 0, 0, 0), -+ PMC_D(PFM_REG_I, "pm1_control", 0, 0, 0, 0), -+ PMC_D(PFM_REG_I, "pm2_control", 0, 0, 0, 0), -+ PMC_D(PFM_REG_I, "pm3_control", 0, 0, 0, 0), -+ PMC_D(PFM_REG_I, "pm4_control", 0, 0, 0, 0), -+ PMC_D(PFM_REG_I, "pm5_control", 0, 0, 0, 0), -+ PMC_D(PFM_REG_I, "pm6_control", 0, 0, 0, 0), -+ PMC_D(PFM_REG_I, "pm7_control", 0, 0, 0, 0), -+ -+ /* Per-counter RTAS arguments. Each of these registers has three fields. -+ * bits 63-48: debug-bus word -+ * bits 47-32: sub-unit -+ * bits 31-0 : full signal number -+ * (MSB = 63, LSB = 0) -+ */ -+ PMC_D(PFM_REG_I, "pm0_event", 0, 0, 0, 0), -+ PMC_D(PFM_REG_I, "pm1_event", 0, 0, 0, 0), -+ PMC_D(PFM_REG_I, "pm2_event", 0, 0, 0, 0), -+ PMC_D(PFM_REG_I, "pm3_event", 0, 0, 0, 0), -+ PMC_D(PFM_REG_I, "pm4_event", 0, 0, 0, 0), -+ PMC_D(PFM_REG_I, "pm5_event", 0, 0, 0, 0), -+ PMC_D(PFM_REG_I, "pm6_event", 0, 0, 0, 0), -+ PMC_D(PFM_REG_I, "pm7_event", 0, 0, 0, 0), -+ -+ /* Global control registers. Same order as enum pm_reg_name. */ -+ PMC_D(PFM_REG_I, "group_control", 0, 0, 0, 0), -+ PMC_D(PFM_REG_I, "debug_bus_control", 0, 0, 0, 0), -+ PMC_D(PFM_REG_I, "trace_address", 0, 0, 0, 0), -+ PMC_D(PFM_REG_I, "ext_trace_timer", 0, 0, 0, 0), -+ PMC_D(PFM_REG_I, "pm_status", 0, 0, 0, 0), -+ /* set the interrupt overflow bit for the four 32 bit counters -+ * that is currently supported. Will need to fix when 32 and 16 -+ * bit counters are supported. -+ */ -+ PMC_D(PFM_REG_I, "pm_control", 0xF0000000, 0xF0000000, 0, 0), -+ PMC_D(PFM_REG_I, "pm_interval", 0, 0, 0, 0), /* FIX: Does user-space also need read access to this one? */ -+ PMC_D(PFM_REG_I, "pm_start_stop", 0, 0, 0, 0), -+}; -+#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_cell_pmc_desc) -+ -+#define CELL_PMC_GROUP_CONTROL 16 -+#define CELL_PMC_PM_STATUS 20 -+#define CELL_PMC_PM_CONTROL 21 -+#define CELL_PMC_PM_CONTROL_CNTR_MASK 0x01E00000UL -+#define CELL_PMC_PM_CONTROL_CNTR_16 0x01E00000UL -+ -+/* -+ * Mapping from Perfmon logical data counters to Cell hardware counters. -+ */ -+static struct pfm_regmap_desc pfm_cell_pmd_desc[] = { -+ PMD_D(PFM_REG_C, "pm0", 0), -+ PMD_D(PFM_REG_C, "pm1", 0), -+ PMD_D(PFM_REG_C, "pm2", 0), -+ PMD_D(PFM_REG_C, "pm3", 0), -+ PMD_D(PFM_REG_C, "pm4", 0), -+ PMD_D(PFM_REG_C, "pm5", 0), -+ PMD_D(PFM_REG_C, "pm6", 0), -+ PMD_D(PFM_REG_C, "pm7", 0), -+}; -+#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_cell_pmd_desc) -+ -+#define PFM_EVENT_PMC_BUS_WORD(x) (((x) >> 48) & 0x00ff) -+#define PFM_EVENT_PMC_FULL_SIGNAL_NUMBER(x) ((x) & 0xffffffff) -+#define PFM_EVENT_PMC_SIGNAL_GROUP(x) (((x) & 0xffffffff) / 100) -+#define PFM_PM_CTR_INPUT_MUX_BIT(pm07_control) (((pm07_control) >> 26) & 0x1f) -+#define PFM_PM_CTR_INPUT_MUX_GROUP_INDEX(pm07_control) ((pm07_control) >> 31) -+#define PFM_GROUP_CONTROL_GROUP0_WORD(grp_ctrl) ((grp_ctrl) >> 30) -+#define PFM_GROUP_CONTROL_GROUP1_WORD(grp_ctrl) (((grp_ctrl) >> 28) & 0x3) -+#define PFM_NUM_OF_GROUPS 2 -+#define PFM_PPU_IU1_THREAD1_BASE_BIT 19 -+#define PFM_PPU_XU_THREAD1_BASE_BIT 16 -+#define PFM_COUNTER_CTRL_PMC_PPU_TH0 0x100000000ULL -+#define PFM_COUNTER_CTRL_PMC_PPU_TH1 0x200000000ULL -+ -+/* -+ * Debug-bus signal handling. -+ * -+ * Some Cell systems have firmware that can handle the debug-bus signal -+ * routing. For systems without this firmware, we have a minimal in-kernel -+ * implementation as well. -+ */ -+ -+/* The firmware only sees physical CPUs, so divide by 2 if SMT is on. */ -+#ifdef CONFIG_SCHED_SMT -+#define RTAS_CPU(cpu) ((cpu) / 2) -+#else -+#define RTAS_CPU(cpu) (cpu) -+#endif -+#define RTAS_BUS_WORD(x) (u16)(((x) >> 48) & 0x0000ffff) -+#define RTAS_SUB_UNIT(x) (u16)(((x) >> 32) & 0x0000ffff) -+#define RTAS_SIGNAL_NUMBER(x) (s32)( (x) & 0xffffffff) -+#define RTAS_SIGNAL_GROUP(x) (RTAS_SIGNAL_NUMBER(x) / 100) -+ -+#define subfunc_RESET 1 -+#define subfunc_ACTIVATE 2 -+ -+#define passthru_ENABLE 1 -+#define passthru_DISABLE 2 -+ -+/** -+ * struct cell_rtas_arg -+ * -+ * @cpu: Processor to modify. Linux numbers CPUs based on SMT IDs, but the -+ * firmware only sees the physical CPUs. So this value should be the -+ * SMT ID (from smp_processor_id() or get_cpu()) divided by 2. -+ * @sub_unit: Hardware subunit this applies to (if applicable). -+ * @signal_group: Signal group to enable/disable on the trace bus. -+ * @bus_word: For signal groups that propagate via the trace bus, this trace -+ * bus word will be used. This is a mask of (1 << TraceBusWord). -+ * For other signal groups, this specifies the trigger or event bus. -+ * @bit: Trigger/Event bit, if applicable for the signal group. -+ * -+ * An array of these structures are passed to rtas_call() to set up the -+ * signals on the debug bus. -+ **/ -+struct cell_rtas_arg { -+ u16 cpu; -+ u16 sub_unit; -+ s16 signal_group; -+ u8 bus_word; -+ u8 bit; -+}; -+ -+/** -+ * rtas_reset_signals -+ * -+ * Use the firmware RTAS call to disable signal pass-thru and to reset the -+ * debug-bus signals. -+ **/ -+static int rtas_reset_signals(u32 cpu) -+{ -+ struct cell_rtas_arg signal; -+ u64 real_addr = virt_to_phys(&signal); -+ int rc; -+ struct pfm_cell_platform_pmu_info *info = -+ ((struct pfm_arch_pmu_info *) -+ (pfm_pmu_conf->pmu_info))->platform_info; -+ -+ memset(&signal, 0, sizeof(signal)); -+ signal.cpu = RTAS_CPU(cpu); -+ rc = info->rtas_call(info->rtas_token("ibm,cbe-perftools"), -+ 5, 1, NULL, -+ subfunc_RESET, -+ passthru_DISABLE, -+ real_addr >> 32, -+ real_addr & 0xffffffff, -+ sizeof(signal)); -+ -+ return rc; -+} -+ -+/** -+ * rtas_activate_signals -+ * -+ * Use the firmware RTAS call to enable signal pass-thru and to activate the -+ * desired signal groups on the debug-bus. -+ **/ -+static int rtas_activate_signals(struct cell_rtas_arg *signals, -+ int num_signals) -+{ -+ u64 real_addr = virt_to_phys(signals); -+ int rc; -+ struct pfm_cell_platform_pmu_info *info = -+ ((struct pfm_arch_pmu_info *) -+ (pfm_pmu_conf->pmu_info))->platform_info; -+ -+ rc = info->rtas_call(info->rtas_token("ibm,cbe-perftools"), -+ 5, 1, NULL, -+ subfunc_ACTIVATE, -+ passthru_ENABLE, -+ real_addr >> 32, -+ real_addr & 0xffffffff, -+ num_signals * sizeof(*signals)); -+ -+ return rc; -+} -+ -+#define HID1_RESET_MASK (~0x00000001ffffffffUL) -+#define PPU_IU1_WORD0_HID1_EN_MASK (~0x00000001f0c0802cUL) -+#define PPU_IU1_WORD0_HID1_EN_WORD ( 0x00000001f0400000UL) -+#define PPU_IU1_WORD1_HID1_EN_MASK (~0x000000010fc08023UL) -+#define PPU_IU1_WORD1_HID1_EN_WORD ( 0x000000010f400001UL) -+#define PPU_XU_WORD0_HID1_EN_MASK (~0x00000001f038402cUL) -+#define PPU_XU_WORD0_HID1_EN_WORD ( 0x00000001f0080008UL) -+#define PPU_XU_WORD1_HID1_EN_MASK (~0x000000010f074023UL) -+#define PPU_XU_WORD1_HID1_EN_WORD ( 0x000000010f030002UL) -+ -+/* The bus_word field in the cell_rtas_arg structure is a bit-mask -+ * indicating which debug-bus word(s) to use. -+ */ -+enum { -+ BUS_WORD_0 = 1, -+ BUS_WORD_1 = 2, -+ BUS_WORD_2 = 4, -+ BUS_WORD_3 = 8, -+}; -+ -+/* Definitions of the signal-groups that the built-in signal-activation -+ * code can handle. -+ */ -+enum { -+ SIG_GROUP_NONE = 0, -+ -+ /* 2.x PowerPC Processor Unit (PPU) Signal Groups */ -+ SIG_GROUP_PPU_BASE = 20, -+ SIG_GROUP_PPU_IU1 = 21, -+ SIG_GROUP_PPU_XU = 22, -+ -+ /* 3.x PowerPC Storage Subsystem (PPSS) Signal Groups */ -+ SIG_GROUP_PPSS_BASE = 30, -+ -+ /* 4.x Synergistic Processor Unit (SPU) Signal Groups */ -+ SIG_GROUP_SPU_BASE = 40, -+ -+ /* 5.x Memory Flow Controller (MFC) Signal Groups */ -+ SIG_GROUP_MFC_BASE = 50, -+ -+ /* 6.x Element )nterconnect Bus (EIB) Signal Groups */ -+ SIG_GROUP_EIB_BASE = 60, -+ -+ /* 7.x Memory Interface Controller (MIC) Signal Groups */ -+ SIG_GROUP_MIC_BASE = 70, -+ -+ /* 8.x Cell Broadband Engine Interface (BEI) Signal Groups */ -+ SIG_GROUP_BEI_BASE = 80, -+}; -+ -+/** -+ * rmw_spr -+ * -+ * Read-modify-write for a special-purpose-register. -+ **/ -+#define rmw_spr(spr_id, a_mask, o_mask) \ -+ do { \ -+ u64 value = mfspr(spr_id); \ -+ value &= (u64)(a_mask); \ -+ value |= (u64)(o_mask); \ -+ mtspr((spr_id), value); \ -+ } while (0) -+ -+/** -+ * rmw_mmio_reg64 -+ * -+ * Read-modify-write for a 64-bit MMIO register. -+ **/ -+#define rmw_mmio_reg64(mem, a_mask, o_mask) \ -+ do { \ -+ u64 value = in_be64(&(mem)); \ -+ value &= (u64)(a_mask); \ -+ value |= (u64)(o_mask); \ -+ out_be64(&(mem), value); \ -+ } while (0) -+ -+/** -+ * rmwb_mmio_reg64 -+ * -+ * Set or unset a specified bit within a 64-bit MMIO register. -+ **/ -+#define rmwb_mmio_reg64(mem, bit_num, set_bit) \ -+ rmw_mmio_reg64((mem), ~(1UL << (63 - (bit_num))), \ -+ ((set_bit) << (63 - (bit_num)))) -+ -+/** -+ * passthru -+ * -+ * Enable or disable passthru mode in all the Cell signal islands. -+ **/ -+static int passthru(u32 cpu, u64 enable) -+{ -+ struct cbe_ppe_priv_regs __iomem *ppe_priv_regs; -+ struct cbe_pmd_regs __iomem *pmd_regs; -+ struct cbe_mic_tm_regs __iomem *mic_tm_regs; -+ struct pfm_cell_platform_pmu_info *info = -+ ((struct pfm_arch_pmu_info *) -+ (pfm_pmu_conf->pmu_info))->platform_info; -+ -+ ppe_priv_regs = info->get_cpu_ppe_priv_regs(cpu); -+ pmd_regs = info->get_cpu_pmd_regs(cpu); -+ mic_tm_regs = info->get_cpu_mic_tm_regs(cpu); -+ -+ if (!ppe_priv_regs || !pmd_regs || !mic_tm_regs) { -+ PFM_ERR("Error getting Cell PPE, PMD, and MIC " -+ "register maps: 0x%p, 0x%p, 0x%p", -+ ppe_priv_regs, pmd_regs, mic_tm_regs); -+ return -EINVAL; -+ } -+ -+ rmwb_mmio_reg64(ppe_priv_regs->L2_debug1, 61, enable); -+ rmwb_mmio_reg64(ppe_priv_regs->ciu_dr1, 5, enable); -+ rmwb_mmio_reg64(pmd_regs->on_ramp_trace, 39, enable); -+ rmwb_mmio_reg64(mic_tm_regs->MBL_debug, 20, enable); -+ -+ return 0; -+} -+ -+#define passthru_enable(cpu) passthru(cpu, 1) -+#define passthru_disable(cpu) passthru(cpu, 0) -+ -+static inline void reset_signal_registers(u32 cpu) -+{ -+ rmw_spr(SPRN_HID1, HID1_RESET_MASK, 0); -+} -+ -+/** -+ * celleb_reset_signals -+ * -+ * Non-rtas version of resetting the debug-bus signals. -+ **/ -+static int celleb_reset_signals(u32 cpu) -+{ -+ int rc; -+ rc = passthru_disable(cpu); -+ if (!rc) -+ reset_signal_registers(cpu); -+ return rc; -+} -+ -+/** -+ * ppu_selection -+ * -+ * Write the HID1 register to connect the specified PPU signal-group to the -+ * debug-bus. -+ **/ -+static int ppu_selection(struct cell_rtas_arg *signal) -+{ -+ u64 hid1_enable_word = 0; -+ u64 hid1_enable_mask = 0; -+ -+ switch (signal->signal_group) { -+ -+ case SIG_GROUP_PPU_IU1: /* 2.1 PPU Instruction Unit - Group 1 */ -+ switch (signal->bus_word) { -+ case BUS_WORD_0: -+ hid1_enable_mask = PPU_IU1_WORD0_HID1_EN_MASK; -+ hid1_enable_word = PPU_IU1_WORD0_HID1_EN_WORD; -+ break; -+ case BUS_WORD_1: -+ hid1_enable_mask = PPU_IU1_WORD1_HID1_EN_MASK; -+ hid1_enable_word = PPU_IU1_WORD1_HID1_EN_WORD; -+ break; -+ default: -+ PFM_ERR("Invalid bus-word (0x%x) for signal-group %d.", -+ signal->bus_word, signal->signal_group); -+ return -EINVAL; -+ } -+ break; -+ -+ case SIG_GROUP_PPU_XU: /* 2.2 PPU Execution Unit */ -+ switch (signal->bus_word) { -+ case BUS_WORD_0: -+ hid1_enable_mask = PPU_XU_WORD0_HID1_EN_MASK; -+ hid1_enable_word = PPU_XU_WORD0_HID1_EN_WORD; -+ break; -+ case BUS_WORD_1: -+ hid1_enable_mask = PPU_XU_WORD1_HID1_EN_MASK; -+ hid1_enable_word = PPU_XU_WORD1_HID1_EN_WORD; -+ break; -+ default: -+ PFM_ERR("Invalid bus-word (0x%x) for signal-group %d.", -+ signal->bus_word, signal->signal_group); -+ return -EINVAL; -+ } -+ break; -+ -+ default: -+ PFM_ERR("Signal-group %d not implemented.", -+ signal->signal_group); -+ return -EINVAL; -+ } -+ -+ rmw_spr(SPRN_HID1, hid1_enable_mask, hid1_enable_word); -+ -+ return 0; -+} -+ -+/** -+ * celleb_activate_signals -+ * -+ * Non-rtas version of activating the debug-bus signals. -+ **/ -+static int celleb_activate_signals(struct cell_rtas_arg *signals, -+ int num_signals) -+{ -+ int i, rc = -EINVAL; -+ -+ for (i = 0; i < num_signals; i++) { -+ switch (signals[i].signal_group) { -+ -+ /* 2.x PowerPC Processor Unit (PPU) Signal Selection */ -+ case SIG_GROUP_PPU_IU1: -+ case SIG_GROUP_PPU_XU: -+ rc = ppu_selection(signals + i); -+ if (rc) -+ return rc; -+ break; -+ -+ default: -+ PFM_ERR("Signal-group %d not implemented.", -+ signals[i].signal_group); -+ return -EINVAL; -+ } -+ } -+ -+ if (0 < i) -+ rc = passthru_enable(signals[0].cpu); -+ -+ return rc; -+} -+ -+/** -+ * ps3_reset_signals -+ * -+ * ps3 version of resetting the debug-bus signals. -+ **/ -+static int ps3_reset_signals(u32 cpu) -+{ -+#ifdef CONFIG_PPC_PS3 -+ return ps3_set_signal(0, 0, 0, 0); -+#else -+ return 0; -+#endif -+} -+ -+/** -+ * ps3_activate_signals -+ * -+ * ps3 version of activating the debug-bus signals. -+ **/ -+static int ps3_activate_signals(struct cell_rtas_arg *signals, -+ int num_signals) -+{ -+#ifdef CONFIG_PPC_PS3 -+ int i; -+ -+ for (i = 0; i < num_signals; i++) -+ ps3_set_signal(signals[i].signal_group, signals[i].bit, -+ signals[i].sub_unit, signals[i].bus_word); -+#endif -+ return 0; -+} -+ -+ -+/** -+ * reset_signals -+ * -+ * Call to the firmware (if available) to reset the debug-bus signals. -+ * Otherwise call the built-in version. -+ **/ -+int reset_signals(u32 cpu) -+{ -+ int rc; -+ -+ if (machine_is(celleb)) -+ rc = celleb_reset_signals(cpu); -+ else if (machine_is(ps3)) -+ rc = ps3_reset_signals(cpu); -+ else -+ rc = rtas_reset_signals(cpu); -+ -+ return rc; -+} -+ -+/** -+ * activate_signals -+ * -+ * Call to the firmware (if available) to activate the debug-bus signals. -+ * Otherwise call the built-in version. -+ **/ -+int activate_signals(struct cell_rtas_arg *signals, int num_signals) -+{ -+ int rc; -+ -+ if (machine_is(celleb)) -+ rc = celleb_activate_signals(signals, num_signals); -+ else if (machine_is(ps3)) -+ rc = ps3_activate_signals(signals, num_signals); -+ else -+ rc = rtas_activate_signals(signals, num_signals); -+ -+ return rc; -+} -+ -+/** -+ * pfm_cell_pmc_check -+ * -+ * Verify that we are going to write a valid value to the specified PMC. -+ **/ -+int pfm_cell_pmc_check(struct pfm_context *ctx, -+ struct pfm_event_set *set, -+ struct pfarg_pmc *req) -+{ -+ u16 cnum, reg_num = req->reg_num; -+ s16 signal_group = RTAS_SIGNAL_GROUP(req->reg_value); -+ u8 bus_word = RTAS_BUS_WORD(req->reg_value); -+ -+ if (reg_num < NR_CTRS || reg_num >= (NR_CTRS * 2)) -+ return -EINVAL; -+ -+ switch (signal_group) { -+ case SIG_GROUP_PPU_IU1: -+ case SIG_GROUP_PPU_XU: -+ if ((bus_word != 0) && (bus_word != 1)) { -+ PFM_ERR("Invalid bus word (%d) for signal-group %d", -+ bus_word, signal_group); -+ return -EINVAL; -+ } -+ break; -+ default: -+ PFM_ERR("Signal-group %d not implemented.", signal_group); -+ return -EINVAL; -+ } -+ -+ for (cnum = NR_CTRS; cnum < (NR_CTRS * 2); cnum++) { -+ if (test_bit(cnum, cast_ulp(set->used_pmcs)) && -+ bus_word == RTAS_BUS_WORD(set->pmcs[cnum]) && -+ signal_group != RTAS_SIGNAL_GROUP(set->pmcs[cnum])) { -+ PFM_ERR("Impossible signal-group combination: " -+ "(%u,%u,%d) (%u,%u,%d)", -+ reg_num, bus_word, signal_group, cnum, -+ RTAS_BUS_WORD(set->pmcs[cnum]), -+ RTAS_SIGNAL_GROUP(set->pmcs[cnum])); -+ return -EBUSY; -+ } -+ } -+ -+ return 0; -+} -+ -+/** -+ * write_pm07_event -+ * -+ * Pull out the RTAS arguments from the 64-bit register value and make the -+ * RTAS activate-signals call. -+ **/ -+static void write_pm07_event(int cpu, unsigned int ctr, u64 value) -+{ -+ struct cell_rtas_arg signal; -+ s32 signal_number; -+ int rc; -+ -+ signal_number = RTAS_SIGNAL_NUMBER(value); -+ if (!signal_number) { -+ /* Don't include counters that are counting cycles. */ -+ return; -+ } -+ -+ signal.cpu = RTAS_CPU(cpu); -+ signal.bus_word = 1 << RTAS_BUS_WORD(value); -+ signal.sub_unit = RTAS_SUB_UNIT(value); -+ signal.signal_group = signal_number / 100; -+ signal.bit = abs(signal_number) % 100; -+ -+ rc = activate_signals(&signal, 1); -+ if (rc) { -+ PFM_WARN("%s(%d, %u, %lu): Error calling " -+ "activate_signals(): %d\n", __func__, -+ cpu, ctr, (unsigned long)value, rc); -+ /* FIX: Could we change this routine to return an error? */ -+ } -+} -+ -+/** -+ * pfm_cell_probe_pmu -+ * -+ * Simply check the processor version register to see if we're currently -+ * on a Cell system. -+ **/ -+static int pfm_cell_probe_pmu(void) -+{ -+ unsigned long pvr = mfspr(SPRN_PVR); -+ -+ if (PVR_VER(pvr) != PV_BE) -+ return -1; -+ -+ return 0; -+} -+ -+/** -+ * pfm_cell_write_pmc -+ **/ -+static void pfm_cell_write_pmc(unsigned int cnum, u64 value) -+{ -+ int cpu = smp_processor_id(); -+ struct pfm_cell_platform_pmu_info *info = -+ ((struct pfm_arch_pmu_info *) -+ (pfm_pmu_conf->pmu_info))->platform_info; -+ -+ if (cnum < NR_CTRS) { -+ info->write_pm07_control(cpu, cnum, value); -+ -+ } else if (cnum < NR_CTRS * 2) { -+ write_pm07_event(cpu, cnum - NR_CTRS, value); -+ -+ } else if (cnum == CELL_PMC_PM_STATUS) { -+ /* The pm_status register must be treated separately from -+ * the other "global" PMCs. This call will ensure that -+ * the interrupts are routed to the correct CPU, as well -+ * as writing the desired value to the pm_status register. -+ */ -+ info->enable_pm_interrupts(cpu, info->get_hw_thread_id(cpu), -+ value); -+ -+ } else if (cnum < PFM_PM_NUM_PMCS) { -+ info->write_pm(cpu, cnum - (NR_CTRS * 2), value); -+ } -+} -+ -+/** -+ * pfm_cell_write_pmd -+ **/ -+static void pfm_cell_write_pmd(unsigned int cnum, u64 value) -+{ -+ int cpu = smp_processor_id(); -+ struct pfm_cell_platform_pmu_info *info = -+ ((struct pfm_arch_pmu_info *) -+ (pfm_pmu_conf->pmu_info))->platform_info; -+ -+ if (cnum < NR_CTRS) -+ info->write_ctr(cpu, cnum, value); -+} -+ -+/** -+ * pfm_cell_read_pmd -+ **/ -+static u64 pfm_cell_read_pmd(unsigned int cnum) -+{ -+ int cpu = smp_processor_id(); -+ struct pfm_cell_platform_pmu_info *info = -+ ((struct pfm_arch_pmu_info *) -+ (pfm_pmu_conf->pmu_info))->platform_info; -+ -+ if (cnum < NR_CTRS) -+ return info->read_ctr(cpu, cnum); -+ -+ return -EINVAL; -+} -+ -+/** -+ * pfm_cell_enable_counters -+ * -+ * Just need to turn on the global disable bit in pm_control. -+ **/ -+static void pfm_cell_enable_counters(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ struct pfm_cell_platform_pmu_info *info = -+ ((struct pfm_arch_pmu_info *) -+ (pfm_pmu_conf->pmu_info))->platform_info; -+ -+ info->enable_pm(smp_processor_id()); -+} -+ -+/** -+ * pfm_cell_disable_counters -+ * -+ * Just need to turn off the global disable bit in pm_control. -+ **/ -+static void pfm_cell_disable_counters(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ struct pfm_cell_platform_pmu_info *info = -+ ((struct pfm_arch_pmu_info *) -+ (pfm_pmu_conf->pmu_info))->platform_info; -+ -+ info->disable_pm(smp_processor_id()); -+ if (machine_is(ps3)) -+ reset_signals(smp_processor_id()); -+} -+ -+/* -+ * Return the thread id of the specified ppu signal. -+ */ -+static inline u32 get_target_ppu_thread_id(u32 group, u32 bit) -+{ -+ if ((group == SIG_GROUP_PPU_IU1 && -+ bit < PFM_PPU_IU1_THREAD1_BASE_BIT) || -+ (group == SIG_GROUP_PPU_XU && -+ bit < PFM_PPU_XU_THREAD1_BASE_BIT)) -+ return 0; -+ else -+ return 1; -+} -+ -+/* -+ * Return whether the specified counter is for PPU signal group. -+ */ -+static inline int is_counter_for_ppu_sig_grp(u32 counter_control, u32 sig_grp) -+{ -+ if (!(counter_control & CBE_PM_CTR_INPUT_CONTROL) && -+ (counter_control & CBE_PM_CTR_ENABLE) && -+ ((sig_grp == SIG_GROUP_PPU_IU1) || (sig_grp == SIG_GROUP_PPU_XU))) -+ return 1; -+ else -+ return 0; -+} -+ -+/* -+ * Search ppu signal groups. -+ */ -+static int get_ppu_signal_groups(struct pfm_event_set *set, -+ u32 *ppu_sig_grp0, u32 *ppu_sig_grp1) -+{ -+ u64 pm_event, *used_pmcs = set->used_pmcs; -+ int i, j; -+ u32 grp0_wd, grp1_wd, wd, sig_grp; -+ -+ *ppu_sig_grp0 = 0; -+ *ppu_sig_grp1 = 0; -+ grp0_wd = PFM_GROUP_CONTROL_GROUP0_WORD( -+ set->pmcs[CELL_PMC_GROUP_CONTROL]); -+ grp1_wd = PFM_GROUP_CONTROL_GROUP1_WORD( -+ set->pmcs[CELL_PMC_GROUP_CONTROL]); -+ -+ for (i = 0, j = 0; (i < NR_CTRS) && (j < PFM_NUM_OF_GROUPS); i++) { -+ if (test_bit(i + NR_CTRS, used_pmcs)) { -+ pm_event = set->pmcs[i + NR_CTRS]; -+ wd = PFM_EVENT_PMC_BUS_WORD(pm_event); -+ sig_grp = PFM_EVENT_PMC_SIGNAL_GROUP(pm_event); -+ if ((sig_grp == SIG_GROUP_PPU_IU1) || -+ (sig_grp == SIG_GROUP_PPU_XU)) { -+ -+ if (wd == grp0_wd && *ppu_sig_grp0 == 0) { -+ *ppu_sig_grp0 = sig_grp; -+ j++; -+ } else if (wd == grp1_wd && -+ *ppu_sig_grp1 == 0) { -+ *ppu_sig_grp1 = sig_grp; -+ j++; -+ } -+ } -+ } -+ } -+ return j; -+} -+ -+/** -+ * pfm_cell_restore_pmcs -+ * -+ * Write all control register values that are saved in the specified event -+ * set. We could use the pfm_arch_write_pmc() function to restore each PMC -+ * individually (as is done in other architectures), but that results in -+ * multiple RTAS calls. As an optimization, we will setup the RTAS argument -+ * array so we can do all event-control registers in one RTAS call. -+ * -+ * In per-thread mode, -+ * The counter enable bit of the pmX_control PMC is enabled while the target -+ * task runs on the target HW thread. -+ **/ -+void pfm_cell_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ u64 ctr_ctrl; -+ u64 *used_pmcs = set->used_pmcs; -+ int i; -+ int cpu = smp_processor_id(); -+ u32 current_th_id; -+ struct pfm_cell_platform_pmu_info *info = -+ ((struct pfm_arch_pmu_info *) -+ (pfm_pmu_conf->pmu_info))->platform_info; -+ -+ for (i = 0; i < NR_CTRS; i++) { -+ ctr_ctrl = set->pmcs[i]; -+ -+ if (ctr_ctrl & PFM_COUNTER_CTRL_PMC_PPU_TH0) { -+ current_th_id = info->get_hw_thread_id(cpu); -+ -+ /* -+ * Set the counter enable bit down if the current -+ * HW thread is NOT 0 -+ **/ -+ if (current_th_id) -+ ctr_ctrl = ctr_ctrl & ~CBE_PM_CTR_ENABLE; -+ -+ } else if (ctr_ctrl & PFM_COUNTER_CTRL_PMC_PPU_TH1) { -+ current_th_id = info->get_hw_thread_id(cpu); -+ -+ /* -+ * Set the counter enable bit down if the current -+ * HW thread is 0 -+ **/ -+ if (!current_th_id) -+ ctr_ctrl = ctr_ctrl & ~CBE_PM_CTR_ENABLE; -+ } -+ -+ /* Write the per-counter control register. If the PMC is not -+ * in use, then it will simply clear the register, which will -+ * disable the associated counter. -+ */ -+ info->write_pm07_control(cpu, i, ctr_ctrl); -+ -+ if (test_bit(i + NR_CTRS, used_pmcs)) -+ write_pm07_event(cpu, 0, set->pmcs[i + NR_CTRS]); -+ } -+ -+ /* Write all the global PMCs. Need to call pfm_cell_write_pmc() -+ * instead of cbe_write_pm() due to special handling for the -+ * pm_status register. -+ */ -+ for (i *= 2; i < PFM_PM_NUM_PMCS; i++) -+ pfm_cell_write_pmc(i, set->pmcs[i]); -+} -+ -+/** -+ * pfm_cell_restore_pmds -+ * -+ * Write to pm_control register before writing to counter registers -+ * so that we can decide the counter width berfore writing to the couters. -+ **/ -+void pfm_cell_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ u64 *used_pmds; -+ unsigned int i, max_pmd; -+ int cpu = smp_processor_id(); -+ struct pfm_cell_platform_pmu_info *info = -+ ((struct pfm_arch_pmu_info *) -+ (pfm_pmu_conf->pmu_info))->platform_info; -+ -+ /* -+ * Write pm_control register value -+ */ -+ info->write_pm(cpu, pm_control, -+ set->pmcs[CELL_PMC_PM_CONTROL] & -+ ~CBE_PM_ENABLE_PERF_MON); -+ PFM_DBG("restore pm_control(0x%lx) before restoring pmds", -+ set->pmcs[CELL_PMC_PM_CONTROL]); -+ -+ max_pmd = ctx->regs.max_pmd; -+ used_pmds = set->used_pmds; -+ -+ for (i = 0; i < max_pmd; i++) -+ if (test_bit(i, used_pmds) && -+ !(pfm_pmu_conf->pmd_desc[i].type & PFM_REG_RO)) -+ pfm_cell_write_pmd(i, set->pmds[i].value); -+} -+ -+/** -+ * pfm_cell_get_cntr_width -+ * -+ * This function check the 16bit counter field in pm_control pmc. -+ * -+ * Return value -+ * 16 : all counters are 16bit width. -+ * 32 : all counters are 32bit width. -+ * 0 : several counter width exists. -+ **/ -+static int pfm_cell_get_cntr_width(struct pfm_context *ctx, -+ struct pfm_event_set *s) -+{ -+ int width = 0; -+ int tmp = 0; -+ u64 cntr_field; -+ -+ if (ctx->flags.switch_ovfl || ctx->flags.switch_time) { -+ list_for_each_entry(s, &ctx->set_list, list) { -+ cntr_field = s->pmcs[CELL_PMC_PM_CONTROL] & -+ CELL_PMC_PM_CONTROL_CNTR_MASK; -+ -+ if (cntr_field == CELL_PMC_PM_CONTROL_CNTR_16) -+ tmp = 16; -+ else if (cntr_field == 0x0) -+ tmp = 32; -+ else -+ return 0; -+ -+ if (tmp != width && width != 0) -+ return 0; -+ -+ width = tmp; -+ } -+ } else { -+ cntr_field = s->pmcs[CELL_PMC_PM_CONTROL] & -+ CELL_PMC_PM_CONTROL_CNTR_MASK; -+ -+ if (cntr_field == CELL_PMC_PM_CONTROL_CNTR_16) -+ width = 16; -+ else if (cntr_field == 0x0) -+ width = 32; -+ else -+ width = 0; -+ } -+ return width; -+} -+ -+/** -+ * pfm_cell_check_cntr_ovfl_mask -+ * -+ * Return value -+ * 1 : cntr_ovfl interrupt is used. -+ * 0 : cntr_ovfl interrupt is not used. -+ **/ -+static int pfm_cell_check_cntr_ovfl(struct pfm_context *ctx, -+ struct pfm_event_set *s) -+{ -+ if (ctx->flags.switch_ovfl || ctx->flags.switch_time) { -+ list_for_each_entry(s, &ctx->set_list, list) { -+ if (CBE_PM_OVERFLOW_CTRS(s->pmcs[CELL_PMC_PM_STATUS])) -+ return 1; -+ } -+ } else { -+ if (CBE_PM_OVERFLOW_CTRS(s->pmcs[CELL_PMC_PM_STATUS])) -+ return 1; -+ } -+ return 0; -+} -+ -+#ifdef CONFIG_PPC_PS3 -+/** -+ * update_sub_unit_field -+ * -+ **/ -+static inline u64 update_sub_unit_field(u64 pm_event, u64 spe_id) -+{ -+ return ((pm_event & 0xFFFF0000FFFFFFFF) | (spe_id << 32)); -+} -+ -+/** -+ * pfm_get_spe_id -+ * -+ **/ -+static u64 pfm_get_spe_id(void *arg) -+{ -+ struct spu *spu = arg; -+ u64 spe_id; -+ -+ if (machine_is(ps3)) -+ spe_id = ps3_get_spe_id(arg); -+ else -+ spe_id = spu->spe_id; -+ -+ return spe_id; -+} -+ -+/** -+ * pfm_spu_number_to_id -+ * -+ **/ -+static int pfm_spu_number_to_id(int number, u64 *spe_id) -+{ -+ struct spu *spu; -+ int i; -+ -+ for (i = 0; i < MAX_NUMNODES; i++) { -+ if (cbe_spu_info[i].n_spus == 0) -+ continue; -+ -+ list_for_each_entry(spu, &cbe_spu_info[i].spus, cbe_list) -+ if (spu->number == number) { -+ *spe_id = pfm_get_spe_id(spu); -+ return 0; -+ } -+ } -+ return -ENODEV; -+} -+ -+/** -+ * pfm_update_pmX_event_subunit_field -+ * -+ * In system wide mode, -+ * This function updates the subunit field of SPE pmX_event. -+ **/ -+static int pfm_update_pmX_event_subunit_field(struct pfm_context *ctx) -+{ -+ struct pfm_event_set *set; -+ int i, last_pmc, ret; -+ u64 signal_group, spe_id; -+ int sub_unit; -+ u64 *used_pmcs; -+ -+ last_pmc = NR_CTRS + 8; -+ ret = 0; -+ list_for_each_entry(set, &ctx->set_list, list) { -+ -+ used_pmcs = set->used_pmcs; -+ for (i = NR_CTRS; i < last_pmc; i++) { -+ if (!test_bit(i, used_pmcs)) -+ continue; -+ -+ signal_group = PFM_EVENT_PMC_SIGNAL_GROUP(set->pmcs[i]); -+ -+ /* -+ * If the target event is a SPE signal group event, -+ * The sub_unit field in pmX_event pmc is changed to the -+ * specified spe_id. -+ */ -+ if (SIG_GROUP_SPU_BASE < signal_group && -+ signal_group < SIG_GROUP_EIB_BASE) { -+ sub_unit = RTAS_SUB_UNIT(set->pmcs[i]); -+ -+ ret = pfm_spu_number_to_id(sub_unit, &spe_id); -+ if (ret) -+ return ret; -+ -+ set->pmcs[i] = update_sub_unit_field( -+ set->pmcs[i], spe_id); -+ } -+ } -+ } -+ return 0; -+} -+#endif -+ -+/** -+ * pfm_cell_load_context -+ * -+ * In per-thread mode, -+ * The pmX_control PMCs which are used for PPU IU/XU event are marked with -+ * the thread id(PFM_COUNTER_CTRL_PMC_PPU_TH0/TH1). -+ **/ -+static int pfm_cell_load_context(struct pfm_context *ctx) -+{ -+ int i; -+ u32 ppu_sig_grp[PFM_NUM_OF_GROUPS] = {SIG_GROUP_NONE, SIG_GROUP_NONE}; -+ u32 bit; -+ int index; -+ u32 target_th_id; -+ int ppu_sig_num = 0; -+ struct pfm_event_set *s; -+ int cntr_width = 32; -+ int ret = 0; -+ -+ if (pfm_cell_check_cntr_ovfl(ctx, ctx->active_set)) { -+ cntr_width = pfm_cell_get_cntr_width(ctx, ctx->active_set); -+ -+ /* -+ * Counter overflow interrupt works with only 32bit counter, -+ * because perfmon core uses pfm_cell_pmu_conf.counter_width -+ * to deal with the counter overflow. we can't change the -+ * counter width here. -+ */ -+ if (cntr_width != 32) -+ return -EINVAL; -+ } -+ -+ if (ctx->flags.system) { -+#ifdef CONFIG_PPC_PS3 -+ if (machine_is(ps3)) -+ ret = pfm_update_pmX_event_subunit_field(ctx); -+#endif -+ return ret; -+ } -+ -+ list_for_each_entry(s, &ctx->set_list, list) { -+ ppu_sig_num = get_ppu_signal_groups(s, &ppu_sig_grp[0], -+ &ppu_sig_grp[1]); -+ -+ for (i = 0; i < NR_CTRS; i++) { -+ index = PFM_PM_CTR_INPUT_MUX_GROUP_INDEX(s->pmcs[i]); -+ if (ppu_sig_num && -+ (ppu_sig_grp[index] != SIG_GROUP_NONE) && -+ is_counter_for_ppu_sig_grp(s->pmcs[i], -+ ppu_sig_grp[index])) { -+ -+ bit = PFM_PM_CTR_INPUT_MUX_BIT(s->pmcs[i]); -+ target_th_id = get_target_ppu_thread_id( -+ ppu_sig_grp[index], bit); -+ if (!target_th_id) -+ s->pmcs[i] |= -+ PFM_COUNTER_CTRL_PMC_PPU_TH0; -+ else -+ s->pmcs[i] |= -+ PFM_COUNTER_CTRL_PMC_PPU_TH1; -+ PFM_DBG("set:%d mark ctr:%d target_thread:%d", -+ s->id, i, target_th_id); -+ } -+ } -+ } -+ -+ return ret; -+} -+ -+/** -+ * pfm_cell_unload_context -+ * -+ * For system-wide contexts and self-monitored contexts, make the RTAS call -+ * to reset the debug-bus signals. -+ * -+ * For non-self-monitored contexts, the monitored thread will already have -+ * been taken off the CPU and we don't need to do anything additional. -+ **/ -+static void pfm_cell_unload_context(struct pfm_context *ctx) -+{ -+ if (ctx->task == current || ctx->flags.system) -+ reset_signals(smp_processor_id()); -+} -+ -+/** -+ * pfm_cell_ctxswout_thread -+ * -+ * When a monitored thread is switched out (self-monitored or externally -+ * monitored) we need to reset the debug-bus signals so the next context that -+ * gets switched in can start from a clean set of signals. -+ **/ -+int pfm_cell_ctxswout_thread(struct task_struct *task, -+ struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ reset_signals(smp_processor_id()); -+ return 0; -+} -+ -+/** -+ * pfm_cell_get_ovfl_pmds -+ * -+ * Determine which counters in this set have overflowed and fill in the -+ * set->povfl_pmds mask and set->npend_ovfls count. On Cell, the pm_status -+ * register contains a bit for each counter to indicate overflow. However, -+ * those 8 bits are in the reverse order than what Perfmon2 is expecting, -+ * so we need to reverse the order of the overflow bits. -+ **/ -+static void pfm_cell_get_ovfl_pmds(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx); -+ u32 pm_status, ovfl_ctrs; -+ u64 povfl_pmds = 0; -+ int i; -+ struct pfm_cell_platform_pmu_info *info = -+ ((struct pfm_arch_pmu_info *) -+ (pfm_pmu_conf->pmu_info))->platform_info; -+ -+ if (!ctx_arch->last_read_updated) -+ /* This routine was not called via the interrupt handler. -+ * Need to start by getting interrupts and updating -+ * last_read_pm_status. -+ */ -+ ctx_arch->last_read_pm_status = -+ info->get_and_clear_pm_interrupts(smp_processor_id()); -+ -+ /* Reset the flag that the interrupt handler last read pm_status. */ -+ ctx_arch->last_read_updated = 0; -+ -+ pm_status = ctx_arch->last_read_pm_status & -+ set->pmcs[CELL_PMC_PM_STATUS]; -+ ovfl_ctrs = CBE_PM_OVERFLOW_CTRS(pm_status); -+ -+ /* Reverse the order of the bits in ovfl_ctrs -+ * and store the result in povfl_pmds. -+ */ -+ for (i = 0; i < PFM_PM_NUM_PMDS; i++) { -+ povfl_pmds = (povfl_pmds << 1) | (ovfl_ctrs & 1); -+ ovfl_ctrs >>= 1; -+ } -+ -+ /* Mask povfl_pmds with set->used_pmds to get set->povfl_pmds. -+ * Count the bits set in set->povfl_pmds to get set->npend_ovfls. -+ */ -+ bitmap_and(set->povfl_pmds, &povfl_pmds, -+ set->used_pmds, PFM_PM_NUM_PMDS); -+ set->npend_ovfls = bitmap_weight(set->povfl_pmds, PFM_PM_NUM_PMDS); -+} -+ -+/** -+ * pfm_cell_acquire_pmu -+ * -+ * acquire PMU resource. -+ * This acquisition is done when the first context is created. -+ **/ -+int pfm_cell_acquire_pmu(u64 *unavail_pmcs, u64 *unavail_pmds) -+{ -+#ifdef CONFIG_PPC_PS3 -+ int ret; -+ -+ if (machine_is(ps3)) { -+ PFM_DBG(""); -+ ret = ps3_lpm_open(PS3_LPM_TB_TYPE_INTERNAL, NULL, 0); -+ if (ret) { -+ PFM_ERR("Can't create PS3 lpm. error:%d", ret); -+ return -EFAULT; -+ } -+ } -+#endif -+ return 0; -+} -+ -+/** -+ * pfm_cell_release_pmu -+ * -+ * release PMU resource. -+ * actual release happens when last context is destroyed -+ **/ -+void pfm_cell_release_pmu(void) -+{ -+#ifdef CONFIG_PPC_PS3 -+ if (machine_is(ps3)) { -+ if (ps3_lpm_close()) -+ PFM_ERR("Can't delete PS3 lpm."); -+ } -+#endif -+} -+ -+/** -+ * handle_trace_buffer_interrupts -+ * -+ * This routine is for processing just the interval timer and trace buffer -+ * overflow interrupts. Performance counter interrupts are handled by the -+ * perf_irq_handler() routine, which reads and saves the pm_status register. -+ * This routine should not read the actual pm_status register, but rather -+ * the value passed in. -+ **/ -+static void handle_trace_buffer_interrupts(unsigned long iip, -+ struct pt_regs *regs, -+ struct pfm_context *ctx, -+ u32 pm_status) -+{ -+ /* FIX: Currently ignoring trace-buffer interrupts. */ -+ return; -+} -+ -+/** -+ * pfm_cell_irq_handler -+ * -+ * Handler for all Cell performance-monitor interrupts. -+ **/ -+static void pfm_cell_irq_handler(struct pt_regs *regs, struct pfm_context *ctx) -+{ -+ struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx); -+ u32 last_read_pm_status; -+ int cpu = smp_processor_id(); -+ struct pfm_cell_platform_pmu_info *info = -+ ((struct pfm_arch_pmu_info *) -+ (pfm_pmu_conf->pmu_info))->platform_info; -+ -+ /* Need to disable and reenable the performance counters to get the -+ * desired behavior from the hardware. This is specific to the Cell -+ * PMU hardware. -+ */ -+ info->disable_pm(cpu); -+ -+ /* Read the pm_status register to get the interrupt bits. If a -+ * perfmormance counter overflow interrupt occurred, call the core -+ * perfmon interrupt handler to service the counter overflow. If the -+ * interrupt was for the interval timer or the trace_buffer, -+ * call the interval timer and trace buffer interrupt handler. -+ * -+ * The value read from the pm_status register is stored in the -+ * pmf_arch_context structure for use by other routines. Note that -+ * reading the pm_status register resets the interrupt flags to zero. -+ * Hence, it is important that the register is only read in one place. -+ * -+ * The pm_status reg interrupt reg format is: -+ * [pmd0:pmd1:pmd2:pmd3:pmd4:pmd5:pmd6:pmd7:intt:tbf:tbu:] -+ * - pmd0 to pm7 are the perf counter overflow interrupts. -+ * - intt is the interval timer overflowed interrupt. -+ * - tbf is the trace buffer full interrupt. -+ * - tbu is the trace buffer underflow interrupt. -+ * - The pmd0 bit is the MSB of the 32 bit register. -+ */ -+ ctx_arch->last_read_pm_status = last_read_pm_status = -+ info->get_and_clear_pm_interrupts(cpu); -+ -+ /* Set flag for pfm_cell_get_ovfl_pmds() routine so it knows -+ * last_read_pm_status was updated by the interrupt handler. -+ */ -+ ctx_arch->last_read_updated = 1; -+ -+ if (last_read_pm_status & CBE_PM_ALL_OVERFLOW_INTR) -+ /* At least one counter overflowed. */ -+ pfm_interrupt_handler(instruction_pointer(regs), regs); -+ -+ if (last_read_pm_status & (CBE_PM_INTERVAL_INTR | -+ CBE_PM_TRACE_BUFFER_FULL_INTR | -+ CBE_PM_TRACE_BUFFER_UNDERFLOW_INTR)) -+ /* Trace buffer or interval timer overflow. */ -+ handle_trace_buffer_interrupts(instruction_pointer(regs), -+ regs, ctx, last_read_pm_status); -+ -+ /* The interrupt settings is the value written to the pm_status -+ * register. It is saved in the context when the register is -+ * written. -+ */ -+ info->enable_pm_interrupts(cpu, info->get_hw_thread_id(cpu), -+ ctx->active_set->pmcs[CELL_PMC_PM_STATUS]); -+ -+ /* The writes to the various performance counters only writes to a -+ * latch. The new values (interrupt setting bits, reset counter value -+ * etc.) are not copied to the actual registers until the performance -+ * monitor is enabled. In order to get this to work as desired, the -+ * permormance monitor needs to be disabled while writting to the -+ * latches. This is a HW design issue. -+ */ -+ info->enable_pm(cpu); -+} -+ -+ -+static struct pfm_cell_platform_pmu_info ps3_platform_pmu_info = { -+#ifdef CONFIG_PPC_PS3 -+ .read_ctr = ps3_read_ctr, -+ .write_ctr = ps3_write_ctr, -+ .write_pm07_control = ps3_write_pm07_control, -+ .write_pm = ps3_write_pm, -+ .enable_pm = ps3_enable_pm, -+ .disable_pm = ps3_disable_pm, -+ .enable_pm_interrupts = ps3_enable_pm_interrupts, -+ .get_and_clear_pm_interrupts = ps3_get_and_clear_pm_interrupts, -+ .get_hw_thread_id = ps3_get_hw_thread_id, -+ .get_cpu_ppe_priv_regs = NULL, -+ .get_cpu_pmd_regs = NULL, -+ .get_cpu_mic_tm_regs = NULL, -+ .rtas_token = NULL, -+ .rtas_call = NULL, -+#endif -+}; -+ -+static struct pfm_cell_platform_pmu_info native_platform_pmu_info = { -+#ifdef CONFIG_PPC_CELL_NATIVE -+ .read_ctr = cbe_read_ctr, -+ .write_ctr = cbe_write_ctr, -+ .write_pm07_control = cbe_write_pm07_control, -+ .write_pm = cbe_write_pm, -+ .enable_pm = cbe_enable_pm, -+ .disable_pm = cbe_disable_pm, -+ .enable_pm_interrupts = cbe_enable_pm_interrupts, -+ .get_and_clear_pm_interrupts = cbe_get_and_clear_pm_interrupts, -+ .get_hw_thread_id = cbe_get_hw_thread_id, -+ .get_cpu_ppe_priv_regs = cbe_get_cpu_ppe_priv_regs, -+ .get_cpu_pmd_regs = cbe_get_cpu_pmd_regs, -+ .get_cpu_mic_tm_regs = cbe_get_cpu_mic_tm_regs, -+ .rtas_token = rtas_token, -+ .rtas_call = rtas_call, -+#endif -+}; -+ -+static struct pfm_arch_pmu_info pfm_cell_pmu_info = { -+ .pmu_style = PFM_POWERPC_PMU_CELL, -+ .acquire_pmu = pfm_cell_acquire_pmu, -+ .release_pmu = pfm_cell_release_pmu, -+ .write_pmc = pfm_cell_write_pmc, -+ .write_pmd = pfm_cell_write_pmd, -+ .read_pmd = pfm_cell_read_pmd, -+ .enable_counters = pfm_cell_enable_counters, -+ .disable_counters = pfm_cell_disable_counters, -+ .irq_handler = pfm_cell_irq_handler, -+ .get_ovfl_pmds = pfm_cell_get_ovfl_pmds, -+ .restore_pmcs = pfm_cell_restore_pmcs, -+ .restore_pmds = pfm_cell_restore_pmds, -+ .ctxswout_thread = pfm_cell_ctxswout_thread, -+ .load_context = pfm_cell_load_context, -+ .unload_context = pfm_cell_unload_context, -+}; -+ -+static struct pfm_pmu_config pfm_cell_pmu_conf = { -+ .pmu_name = "Cell", -+ .version = "0.1", -+ .counter_width = 32, -+ .pmd_desc = pfm_cell_pmd_desc, -+ .pmc_desc = pfm_cell_pmc_desc, -+ .num_pmc_entries = PFM_PM_NUM_PMCS, -+ .num_pmd_entries = PFM_PM_NUM_PMDS, -+ .probe_pmu = pfm_cell_probe_pmu, -+ .pmu_info = &pfm_cell_pmu_info, -+ .flags = PFM_PMU_BUILTIN_FLAG, -+ .owner = THIS_MODULE, -+}; -+ -+/** -+ * pfm_cell_platform_probe -+ * -+ * If we're on a system without the firmware rtas call available, set up the -+ * PMC write-checker for all the pmX_event control registers. -+ **/ -+static void pfm_cell_platform_probe(void) -+{ -+ if (machine_is(celleb)) { -+ int cnum; -+ pfm_cell_pmu_conf.pmc_write_check = pfm_cell_pmc_check; -+ for (cnum = NR_CTRS; cnum < (NR_CTRS * 2); cnum++) -+ pfm_cell_pmc_desc[cnum].type |= PFM_REG_WC; -+ } -+ -+ if (machine_is(ps3)) -+ pfm_cell_pmu_info.platform_info = &ps3_platform_pmu_info; -+ else -+ pfm_cell_pmu_info.platform_info = &native_platform_pmu_info; -+} -+ -+static int __init pfm_cell_pmu_init_module(void) -+{ -+ pfm_cell_platform_probe(); -+ return pfm_pmu_register(&pfm_cell_pmu_conf); -+} -+ -+static void __exit pfm_cell_pmu_cleanup_module(void) -+{ -+ pfm_pmu_unregister(&pfm_cell_pmu_conf); -+} -+ -+module_init(pfm_cell_pmu_init_module); -+module_exit(pfm_cell_pmu_cleanup_module); -diff --git a/arch/powerpc/perfmon/perfmon_power4.c b/arch/powerpc/perfmon/perfmon_power4.c -new file mode 100644 -index 0000000..eba9e8c ---- /dev/null -+++ b/arch/powerpc/perfmon/perfmon_power4.c -@@ -0,0 +1,309 @@ -+/* -+ * This file contains the POWER4 PMU register description tables -+ * and pmc checker used by perfmon.c. -+ * -+ * Copyright (c) 2007, IBM Corporation. -+ * -+ * Based on a simple modification of perfmon_power5.c for POWER4 by -+ * Corey Ashford . -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+ -+MODULE_AUTHOR("Corey Ashford "); -+MODULE_DESCRIPTION("POWER4 PMU description table"); -+MODULE_LICENSE("GPL"); -+ -+static struct pfm_regmap_desc pfm_power4_pmc_desc[] = { -+/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", MMCR0_FC, 0, 0, SPRN_MMCR0), -+/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0, 0, 0, SPRN_MMCR1), -+/* mmcra */ PMC_D(PFM_REG_I, "MMCRA", 0, 0, 0, SPRN_MMCRA) -+}; -+#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_power4_pmc_desc) -+ -+/* The TB and PURR registers are read-only. Also, note that the TB register -+ * actually consists of both the 32-bit SPRN_TBRU and SPRN_TBRL registers. -+ * For Perfmon2's purposes, we'll treat it as a single 64-bit register. -+ */ -+static struct pfm_regmap_desc pfm_power4_pmd_desc[] = { -+/* tb */ PMD_D((PFM_REG_I|PFM_REG_RO), "TB", SPRN_TBRL), -+/* pmd1 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1), -+/* pmd2 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2), -+/* pmd3 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3), -+/* pmd4 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4), -+/* pmd5 */ PMD_D(PFM_REG_C, "PMC5", SPRN_PMC5), -+/* pmd6 */ PMD_D(PFM_REG_C, "PMC6", SPRN_PMC6), -+/* pmd7 */ PMD_D(PFM_REG_C, "PMC7", SPRN_PMC7), -+/* pmd8 */ PMD_D(PFM_REG_C, "PMC8", SPRN_PMC8) -+}; -+#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_power4_pmd_desc) -+ -+static int pfm_power4_probe_pmu(void) -+{ -+ unsigned long pvr = mfspr(SPRN_PVR); -+ int ver = PVR_VER(pvr); -+ -+ if ((ver == PV_POWER4) || (ver == PV_POWER4p)) -+ return 0; -+ -+ return -1; -+} -+ -+static void pfm_power4_write_pmc(unsigned int cnum, u64 value) -+{ -+ switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) { -+ case SPRN_MMCR0: -+ mtspr(SPRN_MMCR0, value); -+ break; -+ case SPRN_MMCR1: -+ mtspr(SPRN_MMCR1, value); -+ break; -+ case SPRN_MMCRA: -+ mtspr(SPRN_MMCRA, value); -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+static void pfm_power4_write_pmd(unsigned int cnum, u64 value) -+{ -+ u64 ovfl_mask = pfm_pmu_conf->ovfl_mask; -+ -+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { -+ case SPRN_PMC1: -+ mtspr(SPRN_PMC1, value & ovfl_mask); -+ break; -+ case SPRN_PMC2: -+ mtspr(SPRN_PMC2, value & ovfl_mask); -+ break; -+ case SPRN_PMC3: -+ mtspr(SPRN_PMC3, value & ovfl_mask); -+ break; -+ case SPRN_PMC4: -+ mtspr(SPRN_PMC4, value & ovfl_mask); -+ break; -+ case SPRN_PMC5: -+ mtspr(SPRN_PMC5, value & ovfl_mask); -+ break; -+ case SPRN_PMC6: -+ mtspr(SPRN_PMC6, value & ovfl_mask); -+ break; -+ case SPRN_PMC7: -+ mtspr(SPRN_PMC7, value & ovfl_mask); -+ break; -+ case SPRN_PMC8: -+ mtspr(SPRN_PMC8, value & ovfl_mask); -+ break; -+ case SPRN_TBRL: -+ case SPRN_PURR: -+ /* Ignore writes to read-only registers. */ -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+static u64 pfm_power4_read_pmd(unsigned int cnum) -+{ -+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { -+ case SPRN_PMC1: -+ return mfspr(SPRN_PMC1); -+ case SPRN_PMC2: -+ return mfspr(SPRN_PMC2); -+ case SPRN_PMC3: -+ return mfspr(SPRN_PMC3); -+ case SPRN_PMC4: -+ return mfspr(SPRN_PMC4); -+ case SPRN_PMC5: -+ return mfspr(SPRN_PMC5); -+ case SPRN_PMC6: -+ return mfspr(SPRN_PMC6); -+ case SPRN_PMC7: -+ return mfspr(SPRN_PMC7); -+ case SPRN_PMC8: -+ return mfspr(SPRN_PMC8); -+ case SPRN_TBRL: -+ return ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL); -+ case SPRN_PURR: -+ if (cpu_has_feature(CPU_FTR_PURR)) -+ return mfspr(SPRN_PURR); -+ else -+ return 0; -+ default: -+ BUG(); -+ } -+} -+ -+/* forward decl */ -+static void pfm_power4_disable_counters(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+ -+/** -+ * pfm_power4_enable_counters -+ * -+ **/ -+static void pfm_power4_enable_counters(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ unsigned int i, max_pmc; -+ -+ /* Make sure the counters are disabled before touching the other -+ control registers */ -+ pfm_power4_disable_counters(ctx, set); -+ -+ max_pmc = ctx->regs.max_pmc; -+ -+ /* Write MMCR0 last, and a fairly easy way to do this is to write -+ the registers in the reverse order */ -+ for (i = max_pmc; i != 0; i--) -+ if (test_bit(i - 1, set->used_pmcs)) -+ pfm_power4_write_pmc(i - 1, set->pmcs[i - 1]); -+} -+ -+/** -+ * pfm_power4_disable_counters -+ * -+ **/ -+static void pfm_power4_disable_counters(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ /* Set the Freeze Counters bit */ -+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC); -+ asm volatile ("sync"); -+} -+ -+/** -+ * pfm_power4_get_ovfl_pmds -+ * -+ * Determine which counters in this set have overflowed and fill in the -+ * set->povfl_pmds mask and set->npend_ovfls count. -+ **/ -+static void pfm_power4_get_ovfl_pmds(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ unsigned int i; -+ unsigned int max_pmd = ctx->regs.max_intr_pmd; -+ u64 *used_pmds = set->used_pmds; -+ u64 *cntr_pmds = ctx->regs.cnt_pmds; -+ u64 width_mask = 1 << pfm_pmu_conf->counter_width; -+ u64 new_val, mask[PFM_PMD_BV]; -+ -+ bitmap_and(cast_ulp(mask), cast_ulp(cntr_pmds), -+ cast_ulp(used_pmds), max_pmd); -+ -+ for (i = 0; i < max_pmd; i++) { -+ if (test_bit(i, mask)) { -+ new_val = pfm_power4_read_pmd(i); -+ if (new_val & width_mask) { -+ set_bit(i, set->povfl_pmds); -+ set->npend_ovfls++; -+ } -+ } -+ } -+} -+ -+static void pfm_power4_irq_handler(struct pt_regs *regs, -+ struct pfm_context *ctx) -+{ -+ u32 mmcr0; -+ -+ /* Disable the counters (set the freeze bit) to not polute -+ * the counts. -+ */ -+ mmcr0 = mfspr(SPRN_MMCR0); -+ mtspr(SPRN_MMCR0, (mmcr0 | MMCR0_FC)); -+ -+ /* Set the PMM bit (see comment below). */ -+ mtmsrd(mfmsr() | MSR_PMM); -+ -+ pfm_interrupt_handler(instruction_pointer(regs), regs); -+ -+ mmcr0 = mfspr(SPRN_MMCR0); -+ -+ /* -+ * Reset the perfmon trigger if -+ * not in masking mode. -+ */ -+ if (ctx->state != PFM_CTX_MASKED) -+ mmcr0 |= MMCR0_PMXE; -+ -+ /* -+ * We must clear the PMAO bit on some (GQ) chips. Just do it -+ * all the time. -+ */ -+ mmcr0 &= ~MMCR0_PMAO; -+ -+ /* -+ * Now clear the freeze bit, counting will not start until we -+ * rfid from this exception, because only at that point will -+ * the PMM bit be cleared. -+ */ -+ mmcr0 &= ~MMCR0_FC; -+ mtspr(SPRN_MMCR0, mmcr0); -+} -+ -+static void pfm_power4_resend_irq(struct pfm_context *ctx) -+{ -+ /* -+ * Assert the PMAO bit to cause a PMU interrupt. Make sure we -+ * trigger the edge detection circuitry for PMAO -+ */ -+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO); -+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO); -+} -+ -+struct pfm_arch_pmu_info pfm_power4_pmu_info = { -+ .pmu_style = PFM_POWERPC_PMU_POWER4, -+ .write_pmc = pfm_power4_write_pmc, -+ .write_pmd = pfm_power4_write_pmd, -+ .read_pmd = pfm_power4_read_pmd, -+ .irq_handler = pfm_power4_irq_handler, -+ .get_ovfl_pmds = pfm_power4_get_ovfl_pmds, -+ .enable_counters = pfm_power4_enable_counters, -+ .disable_counters = pfm_power4_disable_counters, -+ .resend_irq = pfm_power4_resend_irq -+}; -+ -+/* -+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors! -+ */ -+static struct pfm_pmu_config pfm_power4_pmu_conf = { -+ .pmu_name = "POWER4", -+ .counter_width = 31, -+ .pmd_desc = pfm_power4_pmd_desc, -+ .pmc_desc = pfm_power4_pmc_desc, -+ .num_pmc_entries = PFM_PM_NUM_PMCS, -+ .num_pmd_entries = PFM_PM_NUM_PMDS, -+ .probe_pmu = pfm_power4_probe_pmu, -+ .pmu_info = &pfm_power4_pmu_info, -+ .flags = PFM_PMU_BUILTIN_FLAG, -+ .owner = THIS_MODULE -+}; -+ -+static int __init pfm_power4_pmu_init_module(void) -+{ -+ return pfm_pmu_register(&pfm_power4_pmu_conf); -+} -+ -+static void __exit pfm_power4_pmu_cleanup_module(void) -+{ -+ pfm_pmu_unregister(&pfm_power4_pmu_conf); -+} -+ -+module_init(pfm_power4_pmu_init_module); -+module_exit(pfm_power4_pmu_cleanup_module); -diff --git a/arch/powerpc/perfmon/perfmon_power5.c b/arch/powerpc/perfmon/perfmon_power5.c -new file mode 100644 -index 0000000..f4bb1ac ---- /dev/null -+++ b/arch/powerpc/perfmon/perfmon_power5.c -@@ -0,0 +1,326 @@ -+/* -+ * This file contains the POWER5 PMU register description tables -+ * and pmc checker used by perfmon.c. -+ * -+ * Copyright (c) 2005 David Gibson, IBM Corporation. -+ * -+ * Based on perfmon_p6.c: -+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+ -+MODULE_AUTHOR("David Gibson "); -+MODULE_DESCRIPTION("POWER5 PMU description table"); -+MODULE_LICENSE("GPL"); -+ -+static struct pfm_regmap_desc pfm_power5_pmc_desc[] = { -+/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", MMCR0_FC, 0, 0, SPRN_MMCR0), -+/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0, 0, 0, SPRN_MMCR1), -+/* mmcra */ PMC_D(PFM_REG_I, "MMCRA", 0, 0, 0, SPRN_MMCRA) -+}; -+#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_power5_pmc_desc) -+ -+/* The TB and PURR registers are read-only. Also, note that the TB register -+ * actually consists of both the 32-bit SPRN_TBRU and SPRN_TBRL registers. -+ * For Perfmon2's purposes, we'll treat it as a single 64-bit register. -+ */ -+static struct pfm_regmap_desc pfm_power5_pmd_desc[] = { -+/* tb */ PMD_D((PFM_REG_I|PFM_REG_RO), "TB", SPRN_TBRL), -+/* pmd1 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1), -+/* pmd2 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2), -+/* pmd3 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3), -+/* pmd4 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4), -+/* pmd5 */ PMD_D(PFM_REG_C, "PMC5", SPRN_PMC5), -+/* pmd6 */ PMD_D(PFM_REG_C, "PMC6", SPRN_PMC6), -+/* purr */ PMD_D((PFM_REG_I|PFM_REG_RO), "PURR", SPRN_PURR), -+}; -+#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_power5_pmd_desc) -+ -+/* forward decl */ -+static void pfm_power5_disable_counters(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+ -+static int pfm_power5_probe_pmu(void) -+{ -+ unsigned long pvr = mfspr(SPRN_PVR); -+ -+ switch (PVR_VER(pvr)) { -+ case PV_POWER5: -+ return 0; -+ case PV_POWER5p: -+ return (PVR_REV(pvr) < 0x300) ? 0 : -1; -+ default: -+ return -1; -+ } -+} -+ -+static void pfm_power5_write_pmc(unsigned int cnum, u64 value) -+{ -+ switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) { -+ case SPRN_MMCR0: -+ mtspr(SPRN_MMCR0, value); -+ break; -+ case SPRN_MMCR1: -+ mtspr(SPRN_MMCR1, value); -+ break; -+ case SPRN_MMCRA: -+ mtspr(SPRN_MMCRA, value); -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+static void pfm_power5_write_pmd(unsigned int cnum, u64 value) -+{ -+ u64 ovfl_mask = pfm_pmu_conf->ovfl_mask; -+ -+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { -+ case SPRN_PMC1: -+ mtspr(SPRN_PMC1, value & ovfl_mask); -+ break; -+ case SPRN_PMC2: -+ mtspr(SPRN_PMC2, value & ovfl_mask); -+ break; -+ case SPRN_PMC3: -+ mtspr(SPRN_PMC3, value & ovfl_mask); -+ break; -+ case SPRN_PMC4: -+ mtspr(SPRN_PMC4, value & ovfl_mask); -+ break; -+ case SPRN_PMC5: -+ mtspr(SPRN_PMC5, value & ovfl_mask); -+ break; -+ case SPRN_PMC6: -+ mtspr(SPRN_PMC6, value & ovfl_mask); -+ break; -+ case SPRN_TBRL: -+ case SPRN_PURR: -+ /* Ignore writes to read-only registers. */ -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+static u64 pfm_power5_read_pmd(unsigned int cnum) -+{ -+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { -+ case SPRN_PMC1: -+ return mfspr(SPRN_PMC1); -+ case SPRN_PMC2: -+ return mfspr(SPRN_PMC2); -+ case SPRN_PMC3: -+ return mfspr(SPRN_PMC3); -+ case SPRN_PMC4: -+ return mfspr(SPRN_PMC4); -+ case SPRN_PMC5: -+ return mfspr(SPRN_PMC5); -+ case SPRN_PMC6: -+ return mfspr(SPRN_PMC6); -+ case SPRN_TBRL: -+ return ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL); -+ case SPRN_PURR: -+ if (cpu_has_feature(CPU_FTR_PURR)) -+ return mfspr(SPRN_PURR); -+ else -+ return 0; -+ default: -+ BUG(); -+ } -+} -+ -+/** -+ * pfm_power5_enable_counters -+ * -+ **/ -+static void pfm_power5_enable_counters(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ unsigned int i, max_pmc; -+ -+ /* -+ * Make sure the counters are disabled before touching the -+ * other control registers -+ */ -+ pfm_power5_disable_counters(ctx, set); -+ -+ max_pmc = ctx->regs.max_pmc; -+ -+ /* -+ * Write MMCR0 last, and a fairly easy way to do -+ * this is to write the registers in the reverse -+ * order -+ */ -+ for (i = max_pmc; i != 0; i--) -+ if (test_bit(i - 1, set->used_pmcs)) -+ pfm_power5_write_pmc(i - 1, set->pmcs[i - 1]); -+} -+ -+/** -+ * pfm_power5_disable_counters -+ * -+ * Just need to zero all the control registers. -+ **/ -+static void pfm_power5_disable_counters(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ /* Set the Freeze Counters bit */ -+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC); -+ asm volatile ("sync"); -+} -+ -+/** -+ * pfm_power5_get_ovfl_pmds -+ * -+ * Determine which counters in this set have overflowed and fill in the -+ * set->povfl_pmds mask and set->npend_ovfls count. -+ **/ -+static void pfm_power5_get_ovfl_pmds(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ unsigned int i; -+ unsigned int max = ctx->regs.max_intr_pmd; -+ u64 *used_pmds = set->used_pmds; -+ u64 *intr_pmds = ctx->regs.intr_pmds; -+ u64 width_mask = 1 << pfm_pmu_conf->counter_width; -+ u64 new_val, mask[PFM_PMD_BV]; -+ -+ bitmap_and(cast_ulp(mask), cast_ulp(intr_pmds), -+ cast_ulp(used_pmds), max); -+ /* -+ * If either PMC5 or PMC6 are not being used, just zero out the unused -+ * ones so that they won't interrupt again for another 2^31 counts. -+ * Note that if no other counters overflowed, set->npend_ovfls will -+ * be zero upon returning from this call (i.e. a spurious -+ * interrupt), but that should be ok. -+ * -+ * If neither PMC5 nor PMC6 are used, the counters should be frozen -+ * via MMCR0_FC5_6 and zeroed out. -+ * -+ * If both PMC5 and PMC6 are used, they can be handled correctly by -+ * the loop that follows. -+ */ -+ -+ if (!test_bit(5, cast_ulp(used_pmds))) -+ mtspr(SPRN_PMC5, 0); -+ if (!test_bit(6, cast_ulp(used_pmds))) -+ mtspr(SPRN_PMC6, 0); -+ -+ for (i = 0; i < max; i++) { -+ if (test_bit(i, mask)) { -+ new_val = pfm_power5_read_pmd(i); -+ if (new_val & width_mask) { -+ set_bit(i, set->povfl_pmds); -+ set->npend_ovfls++; -+ } -+ } -+ } -+} -+ -+static void pfm_power5_irq_handler(struct pt_regs *regs, -+ struct pfm_context *ctx) -+{ -+ u32 mmcr0; -+ -+ /* Disable the counters (set the freeze bit) to not polute -+ * the counts. -+ */ -+ mmcr0 = mfspr(SPRN_MMCR0); -+ mtspr(SPRN_MMCR0, (mmcr0 | MMCR0_FC)); -+ -+ /* Set the PMM bit (see comment below). */ -+ mtmsrd(mfmsr() | MSR_PMM); -+ -+ pfm_interrupt_handler(instruction_pointer(regs), regs); -+ -+ mmcr0 = mfspr(SPRN_MMCR0); -+ -+ /* -+ * Reset the perfmon trigger if -+ * not in masking mode. -+ */ -+ if (ctx->state != PFM_CTX_MASKED) -+ mmcr0 |= MMCR0_PMXE; -+ -+ /* -+ * We must clear the PMAO bit on some (GQ) chips. Just do it -+ * all the time. -+ */ -+ mmcr0 &= ~MMCR0_PMAO; -+ -+ /* -+ * Now clear the freeze bit, counting will not start until we -+ * rfid from this exception, because only at that point will -+ * the PMM bit be cleared. -+ */ -+ mmcr0 &= ~MMCR0_FC; -+ mtspr(SPRN_MMCR0, mmcr0); -+} -+ -+static void pfm_power5_resend_irq(struct pfm_context *ctx) -+{ -+ /* -+ * Assert the PMAO bit to cause a PMU interrupt. Make sure we -+ * trigger the edge detection circuitry for PMAO -+ */ -+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO); -+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO); -+} -+ -+struct pfm_arch_pmu_info pfm_power5_pmu_info = { -+ .pmu_style = PFM_POWERPC_PMU_POWER5, -+ .write_pmc = pfm_power5_write_pmc, -+ .write_pmd = pfm_power5_write_pmd, -+ .read_pmd = pfm_power5_read_pmd, -+ .irq_handler = pfm_power5_irq_handler, -+ .get_ovfl_pmds = pfm_power5_get_ovfl_pmds, -+ .enable_counters = pfm_power5_enable_counters, -+ .disable_counters = pfm_power5_disable_counters, -+ .resend_irq = pfm_power5_resend_irq -+}; -+ -+/* -+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors! -+ */ -+static struct pfm_pmu_config pfm_power5_pmu_conf = { -+ .pmu_name = "POWER5", -+ .counter_width = 31, -+ .pmd_desc = pfm_power5_pmd_desc, -+ .pmc_desc = pfm_power5_pmc_desc, -+ .num_pmc_entries = PFM_PM_NUM_PMCS, -+ .num_pmd_entries = PFM_PM_NUM_PMDS, -+ .probe_pmu = pfm_power5_probe_pmu, -+ .pmu_info = &pfm_power5_pmu_info, -+ .flags = PFM_PMU_BUILTIN_FLAG, -+ .owner = THIS_MODULE -+}; -+ -+static int __init pfm_power5_pmu_init_module(void) -+{ -+ return pfm_pmu_register(&pfm_power5_pmu_conf); -+} -+ -+static void __exit pfm_power5_pmu_cleanup_module(void) -+{ -+ pfm_pmu_unregister(&pfm_power5_pmu_conf); -+} -+ -+module_init(pfm_power5_pmu_init_module); -+module_exit(pfm_power5_pmu_cleanup_module); -diff --git a/arch/powerpc/perfmon/perfmon_power6.c b/arch/powerpc/perfmon/perfmon_power6.c -new file mode 100644 -index 0000000..7882feb ---- /dev/null -+++ b/arch/powerpc/perfmon/perfmon_power6.c -@@ -0,0 +1,520 @@ -+/* -+ * This file contains the POWER6 PMU register description tables -+ * and pmc checker used by perfmon.c. -+ * -+ * Copyright (c) 2007, IBM Corporation -+ * -+ * Based on perfmon_power5.c, and written by Carl Love -+ * and Kevin Corry . Some fixes and refinement by -+ * Corey Ashford -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+ -+MODULE_AUTHOR("Corey Ashford "); -+MODULE_DESCRIPTION("POWER6 PMU description table"); -+MODULE_LICENSE("GPL"); -+ -+static struct pfm_regmap_desc pfm_power6_pmc_desc[] = { -+/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", MMCR0_FC, 0, 0, SPRN_MMCR0), -+/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0, 0, 0, SPRN_MMCR1), -+/* mmcra */ PMC_D(PFM_REG_I, "MMCRA", 0, 0, 0, SPRN_MMCRA) -+}; -+#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_power6_pmc_desc) -+#define PFM_DELTA_TB 10000 /* Not a real registers */ -+#define PFM_DELTA_PURR 10001 -+ -+/* -+ * counters wrap to zero at transition from 2^32-1 to 2^32. Note: -+ * interrupt generated at transition from 2^31-1 to 2^31 -+ */ -+#define OVERFLOW_VALUE 0x100000000UL -+ -+/* The TB and PURR registers are read-only. Also, note that the TB register -+ * actually consists of both the 32-bit SPRN_TBRU and SPRN_TBRL registers. -+ * For Perfmon2's purposes, we'll treat it as a single 64-bit register. -+ */ -+static struct pfm_regmap_desc pfm_power6_pmd_desc[] = { -+ /* On POWER 6 PMC5 and PMC6 are not writable, they do not -+ * generate interrupts, and do not qualify their counts -+ * based on problem mode, supervisor mode or hypervisor mode. -+ * These two counters are implemented as virtual counters -+ * to make the appear to work like the other counters. A -+ * kernel timer is used sample the real PMC5 and PMC6 and -+ * update the virtual counters. -+ */ -+/* tb */ PMD_D((PFM_REG_I|PFM_REG_RO), "TB", SPRN_TBRL), -+/* pmd1 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1), -+/* pmd2 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2), -+/* pmd3 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3), -+/* pmd4 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4), -+/* pmd5 */ PMD_D((PFM_REG_I|PFM_REG_V), "PMC5", SPRN_PMC5), -+/* pmd6 */ PMD_D((PFM_REG_I|PFM_REG_V), "PMC6", SPRN_PMC6), -+/* purr */ PMD_D((PFM_REG_I|PFM_REG_RO), "PURR", SPRN_PURR), -+/* delta purr */ PMD_D((PFM_REG_I|PFM_REG_V), "DELTA_TB", PFM_DELTA_TB), -+/* delta tb */ PMD_D((PFM_REG_I|PFM_REG_V), "DELTA_PURR", PFM_DELTA_PURR), -+}; -+ -+#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_power6_pmd_desc) -+ -+u32 pmc5_start_save[NR_CPUS]; -+u32 pmc6_start_save[NR_CPUS]; -+ -+static struct timer_list pmc5_6_update[NR_CPUS]; -+u64 enable_cntrs_cnt; -+u64 disable_cntrs_cnt; -+u64 call_delta; -+u64 pm5_6_interrupt; -+u64 pm1_4_interrupt; -+/* need ctx_arch for kernel timer. Can't get it in context of the kernel -+ * timer. -+ */ -+struct pfm_arch_context *pmc5_6_ctx_arch[NR_CPUS]; -+long int update_time; -+ -+static void delta(int cpu_num, struct pfm_arch_context *ctx_arch) -+{ -+ u32 tmp5, tmp6; -+ -+ call_delta++; -+ -+ tmp5 = (u32) mfspr(SPRN_PMC5); -+ tmp6 = (u32) mfspr(SPRN_PMC6); -+ -+ /* -+ * The following difference calculation relies on 32-bit modular -+ * arithmetic for the deltas to come out correct (especially in the -+ * presence of a 32-bit counter wrap). -+ */ -+ ctx_arch->powergs_pmc5 += (u64)(tmp5 - pmc5_start_save[cpu_num]); -+ ctx_arch->powergs_pmc6 += (u64)(tmp6 - pmc6_start_save[cpu_num]); -+ -+ pmc5_start_save[cpu_num] = tmp5; -+ pmc6_start_save[cpu_num] = tmp6; -+ -+ return; -+} -+ -+ -+static void pmc5_6_updater(unsigned long cpu_num) -+{ -+ /* update the virtual pmd 5 and pmd 6 counters */ -+ -+ delta(cpu_num, pmc5_6_ctx_arch[cpu_num]); -+ mod_timer(&pmc5_6_update[cpu_num], jiffies + update_time); -+} -+ -+ -+static int pfm_power6_probe_pmu(void) -+{ -+ unsigned long pvr = mfspr(SPRN_PVR); -+ -+ switch (PVR_VER(pvr)) { -+ case PV_POWER6: -+ return 0; -+ case PV_POWER5p: -+ /* If this is a POWER5+ and the revision is less than 0x300, -+ don't treat it as a POWER6. */ -+ return (PVR_REV(pvr) < 0x300) ? -1 : 0; -+ default: -+ return -1; -+ } -+} -+ -+static void pfm_power6_write_pmc(unsigned int cnum, u64 value) -+{ -+ switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) { -+ case SPRN_MMCR0: -+ mtspr(SPRN_MMCR0, value); -+ break; -+ case SPRN_MMCR1: -+ mtspr(SPRN_MMCR1, value); -+ break; -+ case SPRN_MMCRA: -+ mtspr(SPRN_MMCRA, value); -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+static void pfm_power6_write_pmd(unsigned int cnum, u64 value) -+{ -+ /* On POWER 6 PMC5 and PMC6 are implemented as -+ * virtual counters. See comment in pfm_power6_pmd_desc -+ * definition. -+ */ -+ u64 ovfl_mask = pfm_pmu_conf->ovfl_mask; -+ -+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { -+ case SPRN_PMC1: -+ mtspr(SPRN_PMC1, value & ovfl_mask); -+ break; -+ case SPRN_PMC2: -+ mtspr(SPRN_PMC2, value & ovfl_mask); -+ break; -+ case SPRN_PMC3: -+ mtspr(SPRN_PMC3, value & ovfl_mask); -+ break; -+ case SPRN_PMC4: -+ mtspr(SPRN_PMC4, value & ovfl_mask); -+ break; -+ case SPRN_TBRL: -+ case SPRN_PURR: -+ /* Ignore writes to read-only registers. */ -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+static u64 pfm_power6_sread(struct pfm_context *ctx, unsigned int cnum) -+{ -+ struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx); -+ int cpu_num = smp_processor_id(); -+ -+ /* On POWER 6 PMC5 and PMC6 are implemented as -+ * virtual counters. See comment in pfm_power6_pmd_desc -+ * definition. -+ */ -+ -+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { -+ case SPRN_PMC5: -+ return ctx_arch->powergs_pmc5 + (u64)((u32)mfspr(SPRN_PMC5) - pmc5_start_save[cpu_num]); -+ break; -+ -+ case SPRN_PMC6: -+ return ctx_arch->powergs_pmc6 + (u64)((u32)mfspr(SPRN_PMC6) - pmc6_start_save[cpu_num]); -+ break; -+ -+ case PFM_DELTA_TB: -+ return ctx_arch->delta_tb -+ + (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL)) -+ - ctx_arch->delta_tb_start; -+ break; -+ -+ case PFM_DELTA_PURR: -+ return ctx_arch->delta_purr -+ + mfspr(SPRN_PURR) -+ - ctx_arch->delta_purr_start; -+ break; -+ -+ default: -+ BUG(); -+ } -+} -+ -+void pfm_power6_swrite(struct pfm_context *ctx, unsigned int cnum, -+ u64 val) -+{ -+ struct pfm_arch_context *ctx_arch = pfm_ctx_arch(ctx); -+ int cpu_num = smp_processor_id(); -+ -+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { -+ case SPRN_PMC5: -+ pmc5_start_save[cpu_num] = mfspr(SPRN_PMC5); -+ ctx_arch->powergs_pmc5 = val; -+ break; -+ -+ case SPRN_PMC6: -+ pmc6_start_save[cpu_num] = mfspr(SPRN_PMC6); -+ ctx_arch->powergs_pmc6 = val; -+ break; -+ -+ case PFM_DELTA_TB: -+ ctx_arch->delta_tb_start = -+ (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL)); -+ ctx_arch->delta_tb = val; -+ break; -+ -+ case PFM_DELTA_PURR: -+ ctx_arch->delta_purr_start = mfspr(SPRN_PURR); -+ ctx_arch->delta_purr = val; -+ break; -+ -+ default: -+ BUG(); -+ } -+} -+ -+static u64 pfm_power6_read_pmd(unsigned int cnum) -+{ -+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { -+ case SPRN_PMC1: -+ return mfspr(SPRN_PMC1); -+ case SPRN_PMC2: -+ return mfspr(SPRN_PMC2); -+ case SPRN_PMC3: -+ return mfspr(SPRN_PMC3); -+ case SPRN_PMC4: -+ return mfspr(SPRN_PMC4); -+ case SPRN_TBRL: -+ return ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL); -+ case SPRN_PURR: -+ if (cpu_has_feature(CPU_FTR_PURR)) -+ return mfspr(SPRN_PURR); -+ else -+ return 0; -+ default: -+ BUG(); -+ } -+} -+ -+ -+/** -+ * pfm_power6_enable_counters -+ * -+ **/ -+static void pfm_power6_enable_counters(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ -+ unsigned int i, max_pmc; -+ int cpu_num = smp_processor_id(); -+ struct pfm_arch_context *ctx_arch; -+ -+ enable_cntrs_cnt++; -+ -+ /* need the ctx passed down to the routine */ -+ ctx_arch = pfm_ctx_arch(ctx); -+ max_pmc = ctx->regs.max_pmc; -+ -+ /* Write MMCR0 last, and a fairly easy way to do this is to write -+ the registers in the reverse order */ -+ for (i = max_pmc; i != 0; i--) -+ if (test_bit(i - 1, set->used_pmcs)) -+ pfm_power6_write_pmc(i - 1, set->pmcs[i - 1]); -+ -+ /* save current free running HW event count */ -+ pmc5_start_save[cpu_num] = mfspr(SPRN_PMC5); -+ pmc6_start_save[cpu_num] = mfspr(SPRN_PMC6); -+ -+ ctx_arch->delta_purr_start = mfspr(SPRN_PURR); -+ -+ if (cpu_has_feature(CPU_FTR_PURR)) -+ ctx_arch->delta_tb_start = -+ ((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL); -+ else -+ ctx_arch->delta_tb_start = 0; -+ -+ /* Start kernel timer for this cpu to periodically update -+ * the virtual counters. -+ */ -+ init_timer(&pmc5_6_update[cpu_num]); -+ pmc5_6_update[cpu_num].function = pmc5_6_updater; -+ pmc5_6_update[cpu_num].data = (unsigned long) cpu_num; -+ pmc5_6_update[cpu_num].expires = jiffies + update_time; -+ /* context for this timer, timer will be removed if context -+ * is switched because the counters will be stopped first. -+ * NEEDS WORK, I think this is all ok, a little concerned about a -+ * race between the kernel timer going off right as the counters -+ * are being stopped and the context switching. Need to think -+ * about this. -+ */ -+ pmc5_6_ctx_arch[cpu_num] = ctx_arch; -+ add_timer(&pmc5_6_update[cpu_num]); -+} -+ -+/** -+ * pfm_power6_disable_counters -+ * -+ **/ -+static void pfm_power6_disable_counters(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ struct pfm_arch_context *ctx_arch; -+ int cpu_num = smp_processor_id(); -+ -+ disable_cntrs_cnt++; -+ -+ /* Set the Freeze Counters bit */ -+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC); -+ asm volatile ("sync"); -+ -+ /* delete kernel update timer */ -+ del_timer_sync(&pmc5_6_update[cpu_num]); -+ -+ /* Update the virtual pmd 5 and 6 counters from the free running -+ * HW counters -+ */ -+ ctx_arch = pfm_ctx_arch(ctx); -+ delta(cpu_num, ctx_arch); -+ -+ ctx_arch->delta_tb += -+ (((u64)mfspr(SPRN_TBRU) << 32) | mfspr(SPRN_TBRL)) -+ - ctx_arch->delta_tb_start; -+ -+ ctx_arch->delta_purr += mfspr(SPRN_PURR) -+ - ctx_arch->delta_purr_start; -+} -+ -+/** -+ * pfm_power6_get_ovfl_pmds -+ * -+ * Determine which counters in this set have overflowed and fill in the -+ * set->povfl_pmds mask and set->npend_ovfls count. -+ **/ -+static void pfm_power6_get_ovfl_pmds(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ unsigned int i; -+ unsigned int first_intr_pmd = ctx->regs.first_intr_pmd; -+ unsigned int max_intr_pmd = ctx->regs.max_intr_pmd; -+ u64 *used_pmds = set->used_pmds; -+ u64 *cntr_pmds = ctx->regs.cnt_pmds; -+ u64 width_mask = 1 << pfm_pmu_conf->counter_width; -+ u64 new_val, mask[PFM_PMD_BV]; -+ -+ bitmap_and(cast_ulp(mask), cast_ulp(cntr_pmds), cast_ulp(used_pmds), max_intr_pmd); -+ -+ /* max_intr_pmd is actually the last interrupting pmd register + 1 */ -+ for (i = first_intr_pmd; i < max_intr_pmd; i++) { -+ if (test_bit(i, mask)) { -+ new_val = pfm_power6_read_pmd(i); -+ if (new_val & width_mask) { -+ set_bit(i, set->povfl_pmds); -+ set->npend_ovfls++; -+ } -+ } -+ } -+} -+ -+static void pfm_power6_irq_handler(struct pt_regs *regs, -+ struct pfm_context *ctx) -+{ -+ u32 mmcr0; -+ u64 mmcra; -+ -+ /* Disable the counters (set the freeze bit) to not polute -+ * the counts. -+ */ -+ mmcr0 = mfspr(SPRN_MMCR0); -+ mtspr(SPRN_MMCR0, (mmcr0 | MMCR0_FC)); -+ mmcra = mfspr(SPRN_MMCRA); -+ -+ /* Set the PMM bit (see comment below). */ -+ mtmsrd(mfmsr() | MSR_PMM); -+ -+ pm1_4_interrupt++; -+ -+ pfm_interrupt_handler(instruction_pointer(regs), regs); -+ -+ mmcr0 = mfspr(SPRN_MMCR0); -+ -+ /* -+ * Reset the perfmon trigger if -+ * not in masking mode. -+ */ -+ if (ctx->state != PFM_CTX_MASKED) -+ mmcr0 |= MMCR0_PMXE; -+ -+ /* -+ * Clear the PMU Alert Occurred bit -+ */ -+ mmcr0 &= ~MMCR0_PMAO; -+ -+ /* Clear the appropriate bits in the MMCRA. */ -+ mmcra &= ~(POWER6_MMCRA_THRM | POWER6_MMCRA_OTHER); -+ mtspr(SPRN_MMCRA, mmcra); -+ -+ /* -+ * Now clear the freeze bit, counting will not start until we -+ * rfid from this exception, because only at that point will -+ * the PMM bit be cleared. -+ */ -+ mmcr0 &= ~MMCR0_FC; -+ mtspr(SPRN_MMCR0, mmcr0); -+} -+ -+static void pfm_power6_resend_irq(struct pfm_context *ctx) -+{ -+ /* -+ * Assert the PMAO bit to cause a PMU interrupt. Make sure we -+ * trigger the edge detection circuitry for PMAO -+ */ -+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_PMAO); -+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_PMAO); -+} -+ -+struct pfm_arch_pmu_info pfm_power6_pmu_info = { -+ .pmu_style = PFM_POWERPC_PMU_POWER6, -+ .write_pmc = pfm_power6_write_pmc, -+ .write_pmd = pfm_power6_write_pmd, -+ .read_pmd = pfm_power6_read_pmd, -+ .irq_handler = pfm_power6_irq_handler, -+ .get_ovfl_pmds = pfm_power6_get_ovfl_pmds, -+ .enable_counters = pfm_power6_enable_counters, -+ .disable_counters = pfm_power6_disable_counters, -+ .resend_irq = pfm_power6_resend_irq -+}; -+ -+/* -+ * impl_pmcs, impl_pmds are computed at runtime to minimize errors! -+ */ -+static struct pfm_pmu_config pfm_power6_pmu_conf = { -+ .pmu_name = "POWER6", -+ .counter_width = 31, -+ .pmd_desc = pfm_power6_pmd_desc, -+ .pmc_desc = pfm_power6_pmc_desc, -+ .num_pmc_entries = PFM_PM_NUM_PMCS, -+ .num_pmd_entries = PFM_PM_NUM_PMDS, -+ .probe_pmu = pfm_power6_probe_pmu, -+ .pmu_info = &pfm_power6_pmu_info, -+ .pmd_sread = pfm_power6_sread, -+ .pmd_swrite = pfm_power6_swrite, -+ .flags = PFM_PMU_BUILTIN_FLAG, -+ .owner = THIS_MODULE -+}; -+ -+static int __init pfm_power6_pmu_init_module(void) -+{ -+ int ret; -+ disable_cntrs_cnt = 0; -+ enable_cntrs_cnt = 0; -+ call_delta = 0; -+ pm5_6_interrupt = 0; -+ pm1_4_interrupt = 0; -+ -+ /* calculate the time for updating counters 5 and 6 */ -+ -+ /* -+ * MAX_EVENT_RATE assumes a max instruction issue rate of 2 -+ * instructions per clock cycle. Experience shows that this factor -+ * of 2 is more than adequate. -+ */ -+ -+# define MAX_EVENT_RATE (ppc_proc_freq * 2) -+ -+ /* -+ * Calculate the time, in jiffies, it takes for event counter 5 or -+ * 6 to completely wrap when counting at the max event rate, and -+ * then figure on sampling at twice that rate. -+ */ -+ update_time = (((unsigned long)HZ * OVERFLOW_VALUE) -+ / ((unsigned long)MAX_EVENT_RATE)) / 2; -+ -+ ret = pfm_pmu_register(&pfm_power6_pmu_conf); -+ return ret; -+} -+ -+static void __exit pfm_power6_pmu_cleanup_module(void) -+{ -+ pfm_pmu_unregister(&pfm_power6_pmu_conf); -+} -+ -+module_init(pfm_power6_pmu_init_module); -+module_exit(pfm_power6_pmu_cleanup_module); -diff --git a/arch/powerpc/perfmon/perfmon_ppc32.c b/arch/powerpc/perfmon/perfmon_ppc32.c -new file mode 100644 -index 0000000..76f0b84 ---- /dev/null -+++ b/arch/powerpc/perfmon/perfmon_ppc32.c -@@ -0,0 +1,340 @@ -+/* -+ * This file contains the PPC32 PMU register description tables -+ * and pmc checker used by perfmon.c. -+ * -+ * Philip Mucci, mucci@cs.utk.edu -+ * -+ * Based on code from: -+ * Copyright (c) 2005 David Gibson, IBM Corporation. -+ * -+ * Based on perfmon_p6.c: -+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+ -+MODULE_AUTHOR("Philip Mucci "); -+MODULE_DESCRIPTION("PPC32 PMU description table"); -+MODULE_LICENSE("GPL"); -+ -+static struct pfm_pmu_config pfm_ppc32_pmu_conf; -+ -+static struct pfm_regmap_desc pfm_ppc32_pmc_desc[] = { -+/* mmcr0 */ PMC_D(PFM_REG_I, "MMCR0", 0x0, 0, 0, SPRN_MMCR0), -+/* mmcr1 */ PMC_D(PFM_REG_I, "MMCR1", 0x0, 0, 0, SPRN_MMCR1), -+/* mmcr2 */ PMC_D(PFM_REG_I, "MMCR2", 0x0, 0, 0, SPRN_MMCR2), -+}; -+#define PFM_PM_NUM_PMCS ARRAY_SIZE(pfm_ppc32_pmc_desc) -+ -+static struct pfm_regmap_desc pfm_ppc32_pmd_desc[] = { -+/* pmd0 */ PMD_D(PFM_REG_C, "PMC1", SPRN_PMC1), -+/* pmd1 */ PMD_D(PFM_REG_C, "PMC2", SPRN_PMC2), -+/* pmd2 */ PMD_D(PFM_REG_C, "PMC3", SPRN_PMC3), -+/* pmd3 */ PMD_D(PFM_REG_C, "PMC4", SPRN_PMC4), -+/* pmd4 */ PMD_D(PFM_REG_C, "PMC5", SPRN_PMC5), -+/* pmd5 */ PMD_D(PFM_REG_C, "PMC6", SPRN_PMC6), -+}; -+#define PFM_PM_NUM_PMDS ARRAY_SIZE(pfm_ppc32_pmd_desc) -+ -+static void perfmon_perf_irq(struct pt_regs *regs) -+{ -+ u32 mmcr0; -+ -+ /* BLATANTLY STOLEN FROM OPROFILE, then modified */ -+ -+ /* set the PMM bit (see comment below) */ -+ mtmsr(mfmsr() | MSR_PMM); -+ -+ pfm_interrupt_handler(instruction_pointer(regs), regs); -+ -+ /* The freeze bit was set by the interrupt. -+ * Clear the freeze bit, and reenable the interrupt. -+ * The counters won't actually start until the rfi clears -+ * the PMM bit. -+ */ -+ -+ /* Unfreezes the counters on this CPU, enables the interrupt, -+ * enables the counters to trigger the interrupt, and sets the -+ * counters to only count when the mark bit is not set. -+ */ -+ mmcr0 = mfspr(SPRN_MMCR0); -+ -+ mmcr0 &= ~(MMCR0_FC | MMCR0_FCM0); -+ mmcr0 |= (MMCR0_FCECE | MMCR0_PMC1CE | MMCR0_PMCnCE | MMCR0_PMXE); -+ -+ mtspr(SPRN_MMCR0, mmcr0); -+} -+ -+static int pfm_ppc32_probe_pmu(void) -+{ -+ enum ppc32_pmu_type pm_type; -+ int nmmcr = 0, npmds = 0, intsok = 0, i; -+ unsigned int pvr; -+ char *str; -+ -+ pvr = mfspr(SPRN_PVR); -+ -+ switch (PVR_VER(pvr)) { -+ case 0x0004: /* 604 */ -+ str = "PPC604"; -+ pm_type = PFM_POWERPC_PMU_604; -+ nmmcr = 1; -+ npmds = 2; -+ break; -+ case 0x0009: /* 604e; */ -+ case 0x000A: /* 604ev */ -+ str = "PPC604e"; -+ pm_type = PFM_POWERPC_PMU_604e; -+ nmmcr = 2; -+ npmds = 4; -+ break; -+ case 0x0008: /* 750/740 */ -+ str = "PPC750"; -+ pm_type = PFM_POWERPC_PMU_750; -+ nmmcr = 2; -+ npmds = 4; -+ break; -+ case 0x7000: /* 750FX */ -+ case 0x7001: -+ str = "PPC750"; -+ pm_type = PFM_POWERPC_PMU_750; -+ nmmcr = 2; -+ npmds = 4; -+ if ((pvr & 0xFF0F) >= 0x0203) -+ intsok = 1; -+ break; -+ case 0x7002: /* 750GX */ -+ str = "PPC750"; -+ pm_type = PFM_POWERPC_PMU_750; -+ nmmcr = 2; -+ npmds = 4; -+ intsok = 1; -+ case 0x000C: /* 7400 */ -+ str = "PPC7400"; -+ pm_type = PFM_POWERPC_PMU_7400; -+ nmmcr = 3; -+ npmds = 4; -+ break; -+ case 0x800C: /* 7410 */ -+ str = "PPC7410"; -+ pm_type = PFM_POWERPC_PMU_7400; -+ nmmcr = 3; -+ npmds = 4; -+ if ((pvr & 0xFFFF) >= 0x01103) -+ intsok = 1; -+ break; -+ case 0x8000: /* 7451/7441 */ -+ case 0x8001: /* 7455/7445 */ -+ case 0x8002: /* 7457/7447 */ -+ case 0x8003: /* 7447A */ -+ case 0x8004: /* 7448 */ -+ str = "PPC7450"; -+ pm_type = PFM_POWERPC_PMU_7450; -+ nmmcr = 3; npmds = 6; -+ intsok = 1; -+ break; -+ default: -+ PFM_INFO("Unknown PVR_VER(0x%x)\n", PVR_VER(pvr)); -+ return -1; -+ } -+ -+ /* -+ * deconfigure unimplemented registers -+ */ -+ for (i = npmds; i < PFM_PM_NUM_PMDS; i++) -+ pfm_ppc32_pmd_desc[i].type = PFM_REG_NA; -+ -+ for (i = nmmcr; i < PFM_PM_NUM_PMCS; i++) -+ pfm_ppc32_pmc_desc[i].type = PFM_REG_NA; -+ -+ /* -+ * update PMU description structure -+ */ -+ pfm_ppc32_pmu_conf.pmu_name = str; -+ pfm_ppc32_pmu_info.pmu_style = pm_type; -+ pfm_ppc32_pmu_conf.num_pmc_entries = nmmcr; -+ pfm_ppc32_pmu_conf.num_pmd_entries = npmds; -+ -+ if (intsok == 0) -+ PFM_INFO("Interrupts unlikely to work\n"); -+ -+ return reserve_pmc_hardware(perfmon_perf_irq); -+} -+ -+static void pfm_ppc32_write_pmc(unsigned int cnum, u64 value) -+{ -+ switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) { -+ case SPRN_MMCR0: -+ mtspr(SPRN_MMCR0, value); -+ break; -+ case SPRN_MMCR1: -+ mtspr(SPRN_MMCR1, value); -+ break; -+ case SPRN_MMCR2: -+ mtspr(SPRN_MMCR2, value); -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+static void pfm_ppc32_write_pmd(unsigned int cnum, u64 value) -+{ -+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { -+ case SPRN_PMC1: -+ mtspr(SPRN_PMC1, value); -+ break; -+ case SPRN_PMC2: -+ mtspr(SPRN_PMC2, value); -+ break; -+ case SPRN_PMC3: -+ mtspr(SPRN_PMC3, value); -+ break; -+ case SPRN_PMC4: -+ mtspr(SPRN_PMC4, value); -+ break; -+ case SPRN_PMC5: -+ mtspr(SPRN_PMC5, value); -+ break; -+ case SPRN_PMC6: -+ mtspr(SPRN_PMC6, value); -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+static u64 pfm_ppc32_read_pmd(unsigned int cnum) -+{ -+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { -+ case SPRN_PMC1: -+ return mfspr(SPRN_PMC1); -+ case SPRN_PMC2: -+ return mfspr(SPRN_PMC2); -+ case SPRN_PMC3: -+ return mfspr(SPRN_PMC3); -+ case SPRN_PMC4: -+ return mfspr(SPRN_PMC4); -+ case SPRN_PMC5: -+ return mfspr(SPRN_PMC5); -+ case SPRN_PMC6: -+ return mfspr(SPRN_PMC6); -+ default: -+ BUG(); -+ } -+} -+ -+/** -+ * pfm_ppc32_enable_counters -+ * -+ * Just need to load the current values into the control registers. -+ **/ -+static void pfm_ppc32_enable_counters(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ unsigned int i, max_pmc; -+ -+ max_pmc = pfm_pmu_conf->regs.max_pmc; -+ -+ for (i = 0; i < max_pmc; i++) -+ if (test_bit(i, set->used_pmcs)) -+ pfm_ppc32_write_pmc(i, set->pmcs[i]); -+} -+ -+/** -+ * pfm_ppc32_disable_counters -+ * -+ * Just need to zero all the control registers. -+ **/ -+static void pfm_ppc32_disable_counters(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ unsigned int i, max; -+ -+ max = pfm_pmu_conf->regs.max_pmc; -+ -+ for (i = 0; i < max; i++) -+ if (test_bit(i, set->used_pmcs)) -+ pfm_ppc32_write_pmc(ctx, 0); -+} -+ -+/** -+ * pfm_ppc32_get_ovfl_pmds -+ * -+ * Determine which counters in this set have overflowed and fill in the -+ * set->povfl_pmds mask and set->npend_ovfls count. -+ **/ -+static void pfm_ppc32_get_ovfl_pmds(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ unsigned int i; -+ unsigned int max_pmd = pfm_pmu_conf->regs.max_cnt_pmd; -+ u64 *used_pmds = set->used_pmds; -+ u64 *cntr_pmds = pfm_pmu_conf->regs.cnt_pmds; -+ u64 width_mask = 1 << pfm_pmu_conf->counter_width; -+ u64 new_val, mask[PFM_PMD_BV]; -+ -+ bitmap_and(cast_ulp(mask), cast_ulp(cntr_pmds), -+ cast_ulp(used_pmds), max_pmd); -+ -+ for (i = 0; i < max_pmd; i++) { -+ if (test_bit(i, mask)) { -+ new_val = pfm_ppc32_read_pmd(i); -+ if (new_val & width_mask) { -+ set_bit(i, set->povfl_pmds); -+ set->npend_ovfls++; -+ } -+ } -+ } -+} -+ -+struct pfm_arch_pmu_info pfm_ppc32_pmu_info = { -+ .pmu_style = PFM_POWERPC_PMU_NONE, -+ .write_pmc = pfm_ppc32_write_pmc, -+ .write_pmd = pfm_ppc32_write_pmd, -+ .read_pmd = pfm_ppc32_read_pmd, -+ .get_ovfl_pmds = pfm_ppc32_get_ovfl_pmds, -+ .enable_counters = pfm_ppc32_enable_counters, -+ .disable_counters = pfm_ppc32_disable_counters, -+}; -+ -+static struct pfm_pmu_config pfm_ppc32_pmu_conf = { -+ .counter_width = 31, -+ .pmd_desc = pfm_ppc32_pmd_desc, -+ .pmc_desc = pfm_ppc32_pmc_desc, -+ .probe_pmu = pfm_ppc32_probe_pmu, -+ .flags = PFM_PMU_BUILTIN_FLAG, -+ .owner = THIS_MODULE, -+ .version = "0.1", -+ .arch_info = &pfm_ppc32_pmu_info, -+}; -+ -+static int __init pfm_ppc32_pmu_init_module(void) -+{ -+ return pfm_pmu_register(&pfm_ppc32_pmu_conf); -+} -+ -+static void __exit pfm_ppc32_pmu_cleanup_module(void) -+{ -+ release_pmc_hardware(); -+ pfm_pmu_unregister(&pfm_ppc32_pmu_conf); -+} -+ -+module_init(pfm_ppc32_pmu_init_module); -+module_exit(pfm_ppc32_pmu_cleanup_module); -diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c -index dbc338f..e24320e 100644 ---- a/arch/powerpc/platforms/cell/cbe_regs.c -+++ b/arch/powerpc/platforms/cell/cbe_regs.c -@@ -33,6 +33,7 @@ static struct cbe_regs_map - struct cbe_iic_regs __iomem *iic_regs; - struct cbe_mic_tm_regs __iomem *mic_tm_regs; - struct cbe_pmd_shadow_regs pmd_shadow_regs; -+ struct cbe_ppe_priv_regs __iomem *ppe_priv_regs; - } cbe_regs_maps[MAX_CBE]; - static int cbe_regs_map_count; - -@@ -145,6 +146,23 @@ struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu) - } - EXPORT_SYMBOL_GPL(cbe_get_cpu_mic_tm_regs); - -+struct cbe_ppe_priv_regs __iomem *cbe_get_ppe_priv_regs(struct device_node *np) -+{ -+ struct cbe_regs_map *map = cbe_find_map(np); -+ if (map == NULL) -+ return NULL; -+ return map->ppe_priv_regs; -+} -+ -+struct cbe_ppe_priv_regs __iomem *cbe_get_cpu_ppe_priv_regs(int cpu) -+{ -+ struct cbe_regs_map *map = cbe_thread_map[cpu].regs; -+ if (map == NULL) -+ return NULL; -+ return map->ppe_priv_regs; -+} -+EXPORT_SYMBOL_GPL(cbe_get_cpu_ppe_priv_regs); -+ - u32 cbe_get_hw_thread_id(int cpu) - { - return cbe_thread_map[cpu].thread_id; -@@ -206,6 +224,11 @@ void __init cbe_fill_regs_map(struct cbe_regs_map *map) - for_each_node_by_type(np, "mic-tm") - if (of_get_parent(np) == be) - map->mic_tm_regs = of_iomap(np, 0); -+ -+ for_each_node_by_type(np, "ppe-mmio") -+ if (of_get_parent(np) == be) -+ map->ppe_priv_regs = of_iomap(np, 0); -+ - } else { - struct device_node *cpu; - /* That hack must die die die ! */ -@@ -227,6 +250,10 @@ void __init cbe_fill_regs_map(struct cbe_regs_map *map) - prop = of_get_property(cpu, "mic-tm", NULL); - if (prop != NULL) - map->mic_tm_regs = ioremap(prop->address, prop->len); -+ -+ prop = of_get_property(cpu, "ppe-mmio", NULL); -+ if (prop != NULL) -+ map->ppe_priv_regs = ioremap(prop->address, prop->len); - } - } - -diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h -index 109ae24..bafe5a6 100644 ---- a/arch/sparc/include/asm/hypervisor.h -+++ b/arch/sparc/include/asm/hypervisor.h -@@ -2713,6 +2713,30 @@ extern unsigned long sun4v_ldc_revoke(unsigned long channel, - */ - #define HV_FAST_SET_PERFREG 0x101 - -+#define HV_N2_PERF_SPARC_CTL 0x0 -+#define HV_N2_PERF_DRAM_CTL0 0x1 -+#define HV_N2_PERF_DRAM_CNT0 0x2 -+#define HV_N2_PERF_DRAM_CTL1 0x3 -+#define HV_N2_PERF_DRAM_CNT1 0x4 -+#define HV_N2_PERF_DRAM_CTL2 0x5 -+#define HV_N2_PERF_DRAM_CNT2 0x6 -+#define HV_N2_PERF_DRAM_CTL3 0x7 -+#define HV_N2_PERF_DRAM_CNT3 0x8 -+ -+#define HV_FAST_N2_GET_PERFREG 0x104 -+#define HV_FAST_N2_SET_PERFREG 0x105 -+ -+#ifndef __ASSEMBLY__ -+extern unsigned long sun4v_niagara_getperf(unsigned long reg, -+ unsigned long *val); -+extern unsigned long sun4v_niagara_setperf(unsigned long reg, -+ unsigned long val); -+extern unsigned long sun4v_niagara2_getperf(unsigned long reg, -+ unsigned long *val); -+extern unsigned long sun4v_niagara2_setperf(unsigned long reg, -+ unsigned long val); -+#endif -+ - /* MMU statistics services. - * - * The hypervisor maintains MMU statistics and privileged code provides -diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h -index e3dd930..6cf3aec 100644 ---- a/arch/sparc/include/asm/irq_64.h -+++ b/arch/sparc/include/asm/irq_64.h -@@ -67,6 +67,9 @@ extern void virt_irq_free(unsigned int virt_irq); - extern void __init init_IRQ(void); - extern void fixup_irqs(void); - -+extern int register_perfctr_intr(void (*handler)(struct pt_regs *)); -+extern void release_perfctr_intr(void (*handler)(struct pt_regs *)); -+ - static inline void set_softint(unsigned long bits) - { - __asm__ __volatile__("wr %0, 0x0, %%set_softint" -diff --git a/arch/sparc/include/asm/perfmon.h b/arch/sparc/include/asm/perfmon.h -new file mode 100644 -index 0000000..f20cbfa ---- /dev/null -+++ b/arch/sparc/include/asm/perfmon.h -@@ -0,0 +1,11 @@ -+#ifndef _SPARC64_PERFMON_H_ -+#define _SPARC64_PERFMON_H_ -+ -+/* -+ * arch-specific user visible interface definitions -+ */ -+ -+#define PFM_ARCH_MAX_PMCS 2 -+#define PFM_ARCH_MAX_PMDS 3 -+ -+#endif /* _SPARC64_PERFMON_H_ */ -diff --git a/arch/sparc/include/asm/perfmon_kern.h b/arch/sparc/include/asm/perfmon_kern.h -new file mode 100644 -index 0000000..033eff5 ---- /dev/null -+++ b/arch/sparc/include/asm/perfmon_kern.h -@@ -0,0 +1,286 @@ -+#ifndef _SPARC64_PERFMON_KERN_H_ -+#define _SPARC64_PERFMON_KERN_H_ -+ -+#ifdef __KERNEL__ -+ -+#ifdef CONFIG_PERFMON -+ -+#include -+#include -+ -+#define PFM_ARCH_PMD_STK_ARG 2 -+#define PFM_ARCH_PMC_STK_ARG 1 -+ -+struct pfm_arch_pmu_info { -+ u32 pmu_style; -+}; -+ -+static inline void pfm_arch_resend_irq(struct pfm_context *ctx) -+{ -+} -+ -+static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{} -+ -+static inline void pfm_arch_serialize(void) -+{ -+} -+ -+/* -+ * SPARC does not save the PMDs during pfm_arch_intr_freeze_pmu(), thus -+ * this routine needs to do it when switching sets on overflow -+ */ -+static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ pfm_save_pmds(ctx, set); -+} -+ -+extern void pfm_arch_write_pmc(struct pfm_context *ctx, -+ unsigned int cnum, u64 value); -+extern u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum); -+ -+static inline void pfm_arch_write_pmd(struct pfm_context *ctx, -+ unsigned int cnum, u64 value) -+{ -+ u64 pic; -+ -+ value &= pfm_pmu_conf->ovfl_mask; -+ -+ read_pic(pic); -+ -+ switch (cnum) { -+ case 0: -+ pic = (pic & 0xffffffff00000000UL) | -+ (value & 0xffffffffUL); -+ break; -+ case 1: -+ pic = (pic & 0xffffffffUL) | -+ (value << 32UL); -+ break; -+ default: -+ BUG(); -+ } -+ -+ write_pic(pic); -+} -+ -+static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, -+ unsigned int cnum) -+{ -+ u64 pic; -+ -+ read_pic(pic); -+ -+ switch (cnum) { -+ case 0: -+ return pic & 0xffffffffUL; -+ case 1: -+ return pic >> 32UL; -+ default: -+ BUG(); -+ return 0; -+ } -+} -+ -+/* -+ * For some CPUs, the upper bits of a counter must be set in order for the -+ * overflow interrupt to happen. On overflow, the counter has wrapped around, -+ * and the upper bits are cleared. This function may be used to set them back. -+ */ -+static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx, -+ unsigned int cnum) -+{ -+ u64 val = pfm_arch_read_pmd(ctx, cnum); -+ -+ /* This masks out overflow bit 31 */ -+ pfm_arch_write_pmd(ctx, cnum, val); -+} -+ -+/* -+ * At certain points, perfmon needs to know if monitoring has been -+ * explicitely started/stopped by user via pfm_start/pfm_stop. The -+ * information is tracked in ctx.flags.started. However on certain -+ * architectures, it may be possible to start/stop directly from -+ * user level with a single assembly instruction bypassing -+ * the kernel. This function must be used to determine by -+ * an arch-specific mean if monitoring is actually started/stopped. -+ */ -+static inline int pfm_arch_is_active(struct pfm_context *ctx) -+{ -+ return ctx->flags.started; -+} -+ -+static inline void pfm_arch_ctxswout_sys(struct task_struct *task, -+ struct pfm_context *ctx) -+{ -+} -+ -+static inline void pfm_arch_ctxswin_sys(struct task_struct *task, -+ struct pfm_context *ctx) -+{ -+} -+ -+static inline void pfm_arch_ctxswin_thread(struct task_struct *task, -+ struct pfm_context *ctx) -+{ -+} -+ -+int pfm_arch_is_monitoring_active(struct pfm_context *ctx); -+int pfm_arch_ctxswout_thread(struct task_struct *task, -+ struct pfm_context *ctx); -+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx); -+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx); -+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set); -+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set); -+char *pfm_arch_get_pmu_module_name(void); -+ -+static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ pfm_arch_stop(current, ctx); -+ /* -+ * we mark monitoring as stopped to avoid -+ * certain side effects especially in -+ * pfm_switch_sets_from_intr() on -+ * pfm_arch_restore_pmcs() -+ */ -+ ctx->flags.started = 0; -+} -+ -+/* -+ * unfreeze PMU from pfm_do_interrupt_handler() -+ * ctx may be NULL for spurious -+ */ -+static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx) -+{ -+ if (!ctx) -+ return; -+ -+ PFM_DBG_ovfl("state=%d", ctx->state); -+ -+ ctx->flags.started = 1; -+ -+ if (ctx->state == PFM_CTX_MASKED) -+ return; -+ -+ pfm_arch_restore_pmcs(ctx, ctx->active_set); -+} -+ -+/* -+ * this function is called from the PMU interrupt handler ONLY. -+ * On SPARC, the PMU is frozen via arch_stop, masking would be implemented -+ * via arch-stop as well. Given that the PMU is already stopped when -+ * entering the interrupt handler, we do not need to stop it again, so -+ * this function is a nop. -+ */ -+static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+} -+ -+/* -+ * on MIPS masking/unmasking uses the start/stop mechanism, so we simply -+ * need to start here. -+ */ -+static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ pfm_arch_start(current, ctx); -+} -+ -+static inline void pfm_arch_pmu_config_remove(void) -+{ -+} -+ -+static inline int pfm_arch_context_create(struct pfm_context *ctx, -+ u32 ctx_flags) -+{ -+ return 0; -+} -+ -+static inline void pfm_arch_context_free(struct pfm_context *ctx) -+{ -+} -+ -+/* -+ * function called from pfm_setfl_sane(). Context is locked -+ * and interrupts are masked. -+ * The value of flags is the value of ctx_flags as passed by -+ * user. -+ * -+ * function must check arch-specific set flags. -+ * Return: -+ * 1 when flags are valid -+ * 0 on error -+ */ -+static inline int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags) -+{ -+ return 0; -+} -+ -+static inline int pfm_arch_init(void) -+{ -+ return 0; -+} -+ -+static inline void pfm_arch_init_percpu(void) -+{ -+} -+ -+static inline int pfm_arch_load_context(struct pfm_context *ctx) -+{ -+ return 0; -+} -+ -+static inline void pfm_arch_unload_context(struct pfm_context *ctx) -+{} -+ -+extern void perfmon_interrupt(struct pt_regs *); -+ -+static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds) -+{ -+ return register_perfctr_intr(perfmon_interrupt); -+} -+ -+static inline void pfm_arch_pmu_release(void) -+{ -+ release_perfctr_intr(perfmon_interrupt); -+} -+ -+static inline void pfm_arch_arm_handle_work(struct task_struct *task) -+{} -+ -+static inline void pfm_arch_disarm_handle_work(struct task_struct *task) -+{} -+ -+static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg) -+{ -+ return 0; -+} -+ -+static inline int pfm_arch_get_base_syscall(void) -+{ -+ return __NR_pfm_create_context; -+} -+ -+struct pfm_arch_context { -+ /* empty */ -+}; -+ -+#define PFM_ARCH_CTX_SIZE sizeof(struct pfm_arch_context) -+/* -+ * SPARC needs extra alignment for the sampling buffer -+ */ -+#define PFM_ARCH_SMPL_ALIGN_SIZE (16 * 1024) -+ -+static inline void pfm_cacheflush(void *addr, unsigned int len) -+{ -+} -+ -+#endif /* CONFIG_PERFMON */ -+ -+#endif /* __KERNEL__ */ -+ -+#endif /* _SPARC64_PERFMON_KERN_H_ */ -diff --git a/arch/sparc/include/asm/system_64.h b/arch/sparc/include/asm/system_64.h -index db9e742..2a9ddb9 100644 ---- a/arch/sparc/include/asm/system_64.h -+++ b/arch/sparc/include/asm/system_64.h -@@ -30,6 +30,9 @@ enum sparc_cpu { - #define ARCH_SUN4C_SUN4 0 - #define ARCH_SUN4 0 - -+extern char *sparc_cpu_type; -+extern char *sparc_fpu_type; -+extern char *sparc_pmu_type; - extern char reboot_command[]; - - /* These are here in an effort to more fully work around Spitfire Errata -@@ -104,15 +107,13 @@ do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ - #define write_pcr(__p) __asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (__p)) - #define read_pic(__p) __asm__ __volatile__("rd %%pic, %0" : "=r" (__p)) - --/* Blackbird errata workaround. See commentary in -- * arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt() -- * for more information. -- */ --#define reset_pic() \ -- __asm__ __volatile__("ba,pt %xcc, 99f\n\t" \ -+/* Blackbird errata workaround. */ -+#define write_pic(val) \ -+ __asm__ __volatile__("ba,pt %%xcc, 99f\n\t" \ - ".align 64\n" \ -- "99:wr %g0, 0x0, %pic\n\t" \ -- "rd %pic, %g0") -+ "99:wr %0, 0x0, %%pic\n\t" \ -+ "rd %%pic, %%g0" : : "r" (val)) -+#define reset_pic() write_pic(0) - - #ifndef __ASSEMBLY__ - -@@ -145,14 +146,10 @@ do { \ - * and 2 stores in this critical code path. -DaveM - */ - #define switch_to(prev, next, last) \ --do { if (test_thread_flag(TIF_PERFCTR)) { \ -- unsigned long __tmp; \ -- read_pcr(__tmp); \ -- current_thread_info()->pcr_reg = __tmp; \ -- read_pic(__tmp); \ -- current_thread_info()->kernel_cntd0 += (unsigned int)(__tmp);\ -- current_thread_info()->kernel_cntd1 += ((__tmp) >> 32); \ -- } \ -+do { if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW)) \ -+ pfm_ctxsw_out(prev, next); \ -+ if (test_tsk_thread_flag(next, TIF_PERFMON_CTXSW)) \ -+ pfm_ctxsw_in(prev, next); \ - flush_tlb_pending(); \ - save_and_clear_fpu(); \ - /* If you are tempted to conditionalize the following */ \ -@@ -197,11 +194,6 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \ - "l1", "l2", "l3", "l4", "l5", "l6", "l7", \ - "i0", "i1", "i2", "i3", "i4", "i5", \ - "o0", "o1", "o2", "o3", "o4", "o5", "o7"); \ -- /* If you fuck with this, update ret_from_syscall code too. */ \ -- if (test_thread_flag(TIF_PERFCTR)) { \ -- write_pcr(current_thread_info()->pcr_reg); \ -- reset_pic(); \ -- } \ - } while(0) - - static inline unsigned long xchg32(__volatile__ unsigned int *m, unsigned int val) -diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h -index c0a737d..53857f7 100644 ---- a/arch/sparc/include/asm/thread_info_64.h -+++ b/arch/sparc/include/asm/thread_info_64.h -@@ -58,11 +58,6 @@ struct thread_info { - unsigned long gsr[7]; - unsigned long xfsr[7]; - -- __u64 __user *user_cntd0; -- __u64 __user *user_cntd1; -- __u64 kernel_cntd0, kernel_cntd1; -- __u64 pcr_reg; -- - struct restart_block restart_block; - - struct pt_regs *kern_una_regs; -@@ -96,15 +91,10 @@ struct thread_info { - #define TI_RWIN_SPTRS 0x000003c8 - #define TI_GSR 0x00000400 - #define TI_XFSR 0x00000438 --#define TI_USER_CNTD0 0x00000470 --#define TI_USER_CNTD1 0x00000478 --#define TI_KERN_CNTD0 0x00000480 --#define TI_KERN_CNTD1 0x00000488 --#define TI_PCR 0x00000490 --#define TI_RESTART_BLOCK 0x00000498 --#define TI_KUNA_REGS 0x000004c0 --#define TI_KUNA_INSN 0x000004c8 --#define TI_FPREGS 0x00000500 -+#define TI_RESTART_BLOCK 0x00000470 -+#define TI_KUNA_REGS 0x00000498 -+#define TI_KUNA_INSN 0x000004a0 -+#define TI_FPREGS 0x000004c0 - - /* We embed this in the uppermost byte of thread_info->flags */ - #define FAULT_CODE_WRITE 0x01 /* Write access, implies D-TLB */ -@@ -222,11 +212,11 @@ register struct thread_info *current_thread_info_reg asm("g6"); - #define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ - #define TIF_SIGPENDING 2 /* signal pending */ - #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ --#define TIF_PERFCTR 4 /* performance counters active */ -+/* Bit 4 is available */ - #define TIF_UNALIGNED 5 /* allowed to do unaligned accesses */ - /* flag bit 6 is available */ - #define TIF_32BIT 7 /* 32-bit binary */ --/* flag bit 8 is available */ -+#define TIF_PERFMON_WORK 8 /* work for pfm_handle_work() */ - #define TIF_SECCOMP 9 /* secure computing */ - #define TIF_SYSCALL_AUDIT 10 /* syscall auditing active */ - /* flag bit 11 is available */ -@@ -237,22 +227,24 @@ register struct thread_info *current_thread_info_reg asm("g6"); - #define TIF_ABI_PENDING 12 - #define TIF_MEMDIE 13 - #define TIF_POLLING_NRFLAG 14 -+#define TIF_PERFMON_CTXSW 15 /* perfmon needs ctxsw calls */ - - #define _TIF_SYSCALL_TRACE (1< - #include - #include -+#include - - #include - #include -@@ -385,11 +386,7 @@ void exit_thread(void) - t->utraps[0]--; - } - -- if (test_and_clear_thread_flag(TIF_PERFCTR)) { -- t->user_cntd0 = t->user_cntd1 = NULL; -- t->pcr_reg = 0; -- write_pcr(0); -- } -+ pfm_exit_thread(); - } - - void flush_thread(void) -@@ -411,13 +408,6 @@ void flush_thread(void) - - set_thread_wsaved(0); - -- /* Turn off performance counters if on. */ -- if (test_and_clear_thread_flag(TIF_PERFCTR)) { -- t->user_cntd0 = t->user_cntd1 = NULL; -- t->pcr_reg = 0; -- write_pcr(0); -- } -- - /* Clear FPU register state. */ - t->fpsaved[0] = 0; - -@@ -631,16 +621,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, - t->kregs->u_regs[UREG_FP] = - ((unsigned long) child_sf) - STACK_BIAS; - -- /* Special case, if we are spawning a kernel thread from -- * a userspace task (usermode helper, NFS or similar), we -- * must disable performance counters in the child because -- * the address space and protection realm are changing. -- */ -- if (t->flags & _TIF_PERFCTR) { -- t->user_cntd0 = t->user_cntd1 = NULL; -- t->pcr_reg = 0; -- t->flags &= ~_TIF_PERFCTR; -- } - t->flags |= ((long)ASI_P << TI_FLAG_CURRENT_DS_SHIFT); - t->kregs->u_regs[UREG_G6] = (unsigned long) t; - t->kregs->u_regs[UREG_G4] = (unsigned long) t->task; -@@ -673,6 +653,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, - if (clone_flags & CLONE_SETTLS) - t->kregs->u_regs[UREG_G7] = regs->u_regs[UREG_I3]; - -+ pfm_copy_thread(p); -+ - return 0; - } - -diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S -index 97a993c..c2af29d 100644 ---- a/arch/sparc64/kernel/rtrap.S -+++ b/arch/sparc64/kernel/rtrap.S -@@ -65,55 +65,14 @@ __handle_user_windows: - ba,pt %xcc, __handle_user_windows_continue - - andn %l1, %l4, %l1 --__handle_perfctrs: -- call update_perfctrs -- wrpr %g0, RTRAP_PSTATE, %pstate -- wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate -- ldub [%g6 + TI_WSAVED], %o2 -- brz,pt %o2, 1f -- nop -- /* Redo userwin+sched+sig checks */ -- call fault_in_user_windows -- -- wrpr %g0, RTRAP_PSTATE, %pstate -- wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate -- ldx [%g6 + TI_FLAGS], %l0 -- andcc %l0, _TIF_NEED_RESCHED, %g0 -- be,pt %xcc, 1f -- -- nop -- call schedule -- wrpr %g0, RTRAP_PSTATE, %pstate -- wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate -- ldx [%g6 + TI_FLAGS], %l0 --1: andcc %l0, _TIF_DO_NOTIFY_RESUME_MASK, %g0 -- -- be,pt %xcc, __handle_perfctrs_continue -- sethi %hi(TSTATE_PEF), %o0 -- mov %l5, %o1 -- add %sp, PTREGS_OFF, %o0 -- mov %l0, %o2 -- call do_notify_resume -- -- wrpr %g0, RTRAP_PSTATE, %pstate -- wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate -- /* Signal delivery can modify pt_regs tstate, so we must -- * reload it. -- */ -- ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %l1 -- sethi %hi(0xf << 20), %l4 -- and %l1, %l4, %l4 -- andn %l1, %l4, %l1 -- ba,pt %xcc, __handle_perfctrs_continue -- -- sethi %hi(TSTATE_PEF), %o0 - __handle_userfpu: - rd %fprs, %l5 - andcc %l5, FPRS_FEF, %g0 - sethi %hi(TSTATE_PEF), %o0 - be,a,pn %icc, __handle_userfpu_continue - andn %l1, %o0, %l1 -- ba,a,pt %xcc, __handle_userfpu_continue -+ ba,pt %xcc, __handle_userfpu_continue -+ nop - - __handle_signal: - mov %l5, %o1 -@@ -202,12 +161,8 @@ __handle_signal_continue: - brnz,pn %o2, __handle_user_windows - nop - __handle_user_windows_continue: -- ldx [%g6 + TI_FLAGS], %l5 -- andcc %l5, _TIF_PERFCTR, %g0 - sethi %hi(TSTATE_PEF), %o0 -- bne,pn %xcc, __handle_perfctrs --__handle_perfctrs_continue: -- andcc %l1, %o0, %g0 -+ andcc %l1, %o0, %g0 - - /* This fpdepth clear is necessary for non-syscall rtraps only */ - user_nowork: -diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c -index c8b03a4..248aa1f 100644 ---- a/arch/sparc64/kernel/setup.c -+++ b/arch/sparc64/kernel/setup.c -@@ -352,6 +352,7 @@ static int show_cpuinfo(struct seq_file *m, void *__unused) - seq_printf(m, - "cpu\t\t: %s\n" - "fpu\t\t: %s\n" -+ "pmu\t\t: %s\n" - "prom\t\t: %s\n" - "type\t\t: %s\n" - "ncpus probed\t: %d\n" -@@ -364,6 +365,7 @@ static int show_cpuinfo(struct seq_file *m, void *__unused) - , - sparc_cpu_type, - sparc_fpu_type, -+ sparc_pmu_type, - prom_version, - ((tlb_type == hypervisor) ? - "sun4v" : -diff --git a/arch/sparc64/kernel/signal.c b/arch/sparc64/kernel/signal.c -index ec82d76..cea1082 100644 ---- a/arch/sparc64/kernel/signal.c -+++ b/arch/sparc64/kernel/signal.c -@@ -23,6 +23,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -608,6 +609,9 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0) - - void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long thread_info_flags) - { -+ if (thread_info_flags & _TIF_PERFMON_WORK) -+ pfm_handle_work(regs); -+ - if (thread_info_flags & _TIF_SIGPENDING) - do_signal(regs, orig_i0); - if (thread_info_flags & _TIF_NOTIFY_RESUME) { -diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c -index 39749e3..384004b 100644 ---- a/arch/sparc64/kernel/sys_sparc.c -+++ b/arch/sparc64/kernel/sys_sparc.c -@@ -26,7 +26,6 @@ - - #include - #include --#include - #include - - #include "entry.h" -@@ -791,106 +790,10 @@ asmlinkage long sys_rt_sigaction(int sig, - return ret; - } - --/* Invoked by rtrap code to update performance counters in -- * user space. -- */ --asmlinkage void update_perfctrs(void) --{ -- unsigned long pic, tmp; -- -- read_pic(pic); -- tmp = (current_thread_info()->kernel_cntd0 += (unsigned int)pic); -- __put_user(tmp, current_thread_info()->user_cntd0); -- tmp = (current_thread_info()->kernel_cntd1 += (pic >> 32)); -- __put_user(tmp, current_thread_info()->user_cntd1); -- reset_pic(); --} -- - asmlinkage long sys_perfctr(int opcode, unsigned long arg0, unsigned long arg1, unsigned long arg2) - { -- int err = 0; -- -- switch(opcode) { -- case PERFCTR_ON: -- current_thread_info()->pcr_reg = arg2; -- current_thread_info()->user_cntd0 = (u64 __user *) arg0; -- current_thread_info()->user_cntd1 = (u64 __user *) arg1; -- current_thread_info()->kernel_cntd0 = -- current_thread_info()->kernel_cntd1 = 0; -- write_pcr(arg2); -- reset_pic(); -- set_thread_flag(TIF_PERFCTR); -- break; -- -- case PERFCTR_OFF: -- err = -EINVAL; -- if (test_thread_flag(TIF_PERFCTR)) { -- current_thread_info()->user_cntd0 = -- current_thread_info()->user_cntd1 = NULL; -- current_thread_info()->pcr_reg = 0; -- write_pcr(0); -- clear_thread_flag(TIF_PERFCTR); -- err = 0; -- } -- break; -- -- case PERFCTR_READ: { -- unsigned long pic, tmp; -- -- if (!test_thread_flag(TIF_PERFCTR)) { -- err = -EINVAL; -- break; -- } -- read_pic(pic); -- tmp = (current_thread_info()->kernel_cntd0 += (unsigned int)pic); -- err |= __put_user(tmp, current_thread_info()->user_cntd0); -- tmp = (current_thread_info()->kernel_cntd1 += (pic >> 32)); -- err |= __put_user(tmp, current_thread_info()->user_cntd1); -- reset_pic(); -- break; -- } -- -- case PERFCTR_CLRPIC: -- if (!test_thread_flag(TIF_PERFCTR)) { -- err = -EINVAL; -- break; -- } -- current_thread_info()->kernel_cntd0 = -- current_thread_info()->kernel_cntd1 = 0; -- reset_pic(); -- break; -- -- case PERFCTR_SETPCR: { -- u64 __user *user_pcr = (u64 __user *)arg0; -- -- if (!test_thread_flag(TIF_PERFCTR)) { -- err = -EINVAL; -- break; -- } -- err |= __get_user(current_thread_info()->pcr_reg, user_pcr); -- write_pcr(current_thread_info()->pcr_reg); -- current_thread_info()->kernel_cntd0 = -- current_thread_info()->kernel_cntd1 = 0; -- reset_pic(); -- break; -- } -- -- case PERFCTR_GETPCR: { -- u64 __user *user_pcr = (u64 __user *)arg0; -- -- if (!test_thread_flag(TIF_PERFCTR)) { -- err = -EINVAL; -- break; -- } -- err |= __put_user(current_thread_info()->pcr_reg, user_pcr); -- break; -- } -- -- default: -- err = -EINVAL; -- break; -- }; -- return err; -+ /* Superceded by perfmon2 */ -+ return -ENOSYS; - } - - /* -diff --git a/arch/sparc64/kernel/syscalls.S b/arch/sparc64/kernel/syscalls.S -index a2f2427..b20bf1e 100644 ---- a/arch/sparc64/kernel/syscalls.S -+++ b/arch/sparc64/kernel/syscalls.S -@@ -117,26 +117,9 @@ ret_from_syscall: - stb %g0, [%g6 + TI_NEW_CHILD] - ldx [%g6 + TI_FLAGS], %l0 - call schedule_tail -- mov %g7, %o0 -- andcc %l0, _TIF_PERFCTR, %g0 -- be,pt %icc, 1f -- nop -- ldx [%g6 + TI_PCR], %o7 -- wr %g0, %o7, %pcr -- -- /* Blackbird errata workaround. See commentary in -- * smp.c:smp_percpu_timer_interrupt() for more -- * information. -- */ -- ba,pt %xcc, 99f -- nop -- -- .align 64 --99: wr %g0, %g0, %pic -- rd %pic, %g0 -- --1: ba,pt %xcc, ret_sys_call -- ldx [%sp + PTREGS_OFF + PT_V9_I0], %o0 -+ mov %g7, %o0 -+ ba,pt %xcc, ret_sys_call -+ ldx [%sp + PTREGS_OFF + PT_V9_I0], %o0 - - .globl sparc_exit - .type sparc_exit,#function -diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S -index 0fdbf3b..1a1a296 100644 ---- a/arch/sparc64/kernel/systbls.S -+++ b/arch/sparc64/kernel/systbls.S -@@ -82,7 +82,9 @@ sys_call_table32: - .word compat_sys_set_mempolicy, compat_sys_kexec_load, compat_sys_move_pages, sys_getcpu, compat_sys_epoll_pwait - /*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate - .word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1 --/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1 -+/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_pfm_create_context, sys_pfm_write_pmcs -+ .word sys_pfm_write_pmds, sys_pfm_read_pmds, sys_pfm_load_context, sys_pfm_start, sys_pfm_stop -+/*330*/ .word sys_pfm_restart, sys_pfm_create_evtsets, sys_pfm_getinfo_evtsets, sys_pfm_delete_evtsets, sys_pfm_unload_context - - #endif /* CONFIG_COMPAT */ - -@@ -156,4 +158,6 @@ sys_call_table: - .word sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait - /*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate - .word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 --/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1 -+/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_pfm_create_context, sys_pfm_write_pmcs -+ .word sys_pfm_write_pmds, sys_pfm_read_pmds, sys_pfm_load_context, sys_pfm_start, sys_pfm_stop -+/*330*/ .word sys_pfm_restart, sys_pfm_create_evtsets, sys_pfm_getinfo_evtsets, sys_pfm_delete_evtsets, sys_pfm_unload_context -diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c -index c824df1..be45d09 100644 ---- a/arch/sparc64/kernel/traps.c -+++ b/arch/sparc64/kernel/traps.c -@@ -2470,86 +2470,90 @@ extern void tsb_config_offsets_are_bolixed_dave(void); - /* Only invoked on boot processor. */ - void __init trap_init(void) - { -- /* Compile time sanity check. */ -- if (TI_TASK != offsetof(struct thread_info, task) || -- TI_FLAGS != offsetof(struct thread_info, flags) || -- TI_CPU != offsetof(struct thread_info, cpu) || -- TI_FPSAVED != offsetof(struct thread_info, fpsaved) || -- TI_KSP != offsetof(struct thread_info, ksp) || -- TI_FAULT_ADDR != offsetof(struct thread_info, fault_address) || -- TI_KREGS != offsetof(struct thread_info, kregs) || -- TI_UTRAPS != offsetof(struct thread_info, utraps) || -- TI_EXEC_DOMAIN != offsetof(struct thread_info, exec_domain) || -- TI_REG_WINDOW != offsetof(struct thread_info, reg_window) || -- TI_RWIN_SPTRS != offsetof(struct thread_info, rwbuf_stkptrs) || -- TI_GSR != offsetof(struct thread_info, gsr) || -- TI_XFSR != offsetof(struct thread_info, xfsr) || -- TI_USER_CNTD0 != offsetof(struct thread_info, user_cntd0) || -- TI_USER_CNTD1 != offsetof(struct thread_info, user_cntd1) || -- TI_KERN_CNTD0 != offsetof(struct thread_info, kernel_cntd0) || -- TI_KERN_CNTD1 != offsetof(struct thread_info, kernel_cntd1) || -- TI_PCR != offsetof(struct thread_info, pcr_reg) || -- TI_PRE_COUNT != offsetof(struct thread_info, preempt_count) || -- TI_NEW_CHILD != offsetof(struct thread_info, new_child) || -- TI_SYS_NOERROR != offsetof(struct thread_info, syscall_noerror) || -- TI_RESTART_BLOCK != offsetof(struct thread_info, restart_block) || -- TI_KUNA_REGS != offsetof(struct thread_info, kern_una_regs) || -- TI_KUNA_INSN != offsetof(struct thread_info, kern_una_insn) || -- TI_FPREGS != offsetof(struct thread_info, fpregs) || -- (TI_FPREGS & (64 - 1))) -- thread_info_offsets_are_bolixed_dave(); -- -- if (TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu, thread) || -- (TRAP_PER_CPU_PGD_PADDR != -- offsetof(struct trap_per_cpu, pgd_paddr)) || -- (TRAP_PER_CPU_CPU_MONDO_PA != -- offsetof(struct trap_per_cpu, cpu_mondo_pa)) || -- (TRAP_PER_CPU_DEV_MONDO_PA != -- offsetof(struct trap_per_cpu, dev_mondo_pa)) || -- (TRAP_PER_CPU_RESUM_MONDO_PA != -- offsetof(struct trap_per_cpu, resum_mondo_pa)) || -- (TRAP_PER_CPU_RESUM_KBUF_PA != -- offsetof(struct trap_per_cpu, resum_kernel_buf_pa)) || -- (TRAP_PER_CPU_NONRESUM_MONDO_PA != -- offsetof(struct trap_per_cpu, nonresum_mondo_pa)) || -- (TRAP_PER_CPU_NONRESUM_KBUF_PA != -- offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) || -- (TRAP_PER_CPU_FAULT_INFO != -- offsetof(struct trap_per_cpu, fault_info)) || -- (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA != -- offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) || -- (TRAP_PER_CPU_CPU_LIST_PA != -- offsetof(struct trap_per_cpu, cpu_list_pa)) || -- (TRAP_PER_CPU_TSB_HUGE != -- offsetof(struct trap_per_cpu, tsb_huge)) || -- (TRAP_PER_CPU_TSB_HUGE_TEMP != -- offsetof(struct trap_per_cpu, tsb_huge_temp)) || -- (TRAP_PER_CPU_IRQ_WORKLIST_PA != -- offsetof(struct trap_per_cpu, irq_worklist_pa)) || -- (TRAP_PER_CPU_CPU_MONDO_QMASK != -- offsetof(struct trap_per_cpu, cpu_mondo_qmask)) || -- (TRAP_PER_CPU_DEV_MONDO_QMASK != -- offsetof(struct trap_per_cpu, dev_mondo_qmask)) || -- (TRAP_PER_CPU_RESUM_QMASK != -- offsetof(struct trap_per_cpu, resum_qmask)) || -- (TRAP_PER_CPU_NONRESUM_QMASK != -- offsetof(struct trap_per_cpu, nonresum_qmask))) -- trap_per_cpu_offsets_are_bolixed_dave(); -- -- if ((TSB_CONFIG_TSB != -- offsetof(struct tsb_config, tsb)) || -- (TSB_CONFIG_RSS_LIMIT != -- offsetof(struct tsb_config, tsb_rss_limit)) || -- (TSB_CONFIG_NENTRIES != -- offsetof(struct tsb_config, tsb_nentries)) || -- (TSB_CONFIG_REG_VAL != -- offsetof(struct tsb_config, tsb_reg_val)) || -- (TSB_CONFIG_MAP_VADDR != -- offsetof(struct tsb_config, tsb_map_vaddr)) || -- (TSB_CONFIG_MAP_PTE != -- offsetof(struct tsb_config, tsb_map_pte))) -- tsb_config_offsets_are_bolixed_dave(); -- -+ BUILD_BUG_ON(TI_TASK != offsetof(struct thread_info, task)); -+ BUILD_BUG_ON(TI_FLAGS != offsetof(struct thread_info, flags)); -+ BUILD_BUG_ON(TI_CPU != offsetof(struct thread_info, cpu)); -+ BUILD_BUG_ON(TI_FPSAVED != offsetof(struct thread_info, fpsaved)); -+ BUILD_BUG_ON(TI_KSP != offsetof(struct thread_info, ksp)); -+ BUILD_BUG_ON(TI_FAULT_ADDR != -+ offsetof(struct thread_info, fault_address)); -+ BUILD_BUG_ON(TI_KREGS != offsetof(struct thread_info, kregs)); -+ BUILD_BUG_ON(TI_UTRAPS != offsetof(struct thread_info, utraps)); -+ BUILD_BUG_ON(TI_EXEC_DOMAIN != -+ offsetof(struct thread_info, exec_domain)); -+ BUILD_BUG_ON(TI_REG_WINDOW != -+ offsetof(struct thread_info, reg_window)); -+ BUILD_BUG_ON(TI_RWIN_SPTRS != -+ offsetof(struct thread_info, rwbuf_stkptrs)); -+ BUILD_BUG_ON(TI_GSR != offsetof(struct thread_info, gsr)); -+ BUILD_BUG_ON(TI_XFSR != offsetof(struct thread_info, xfsr)); -+ BUILD_BUG_ON(TI_PRE_COUNT != -+ offsetof(struct thread_info, preempt_count)); -+ BUILD_BUG_ON(TI_NEW_CHILD != -+ offsetof(struct thread_info, new_child)); -+ BUILD_BUG_ON(TI_SYS_NOERROR != -+ offsetof(struct thread_info, syscall_noerror)); -+ BUILD_BUG_ON(TI_RESTART_BLOCK != -+ offsetof(struct thread_info, restart_block)); -+ BUILD_BUG_ON(TI_KUNA_REGS != -+ offsetof(struct thread_info, kern_una_regs)); -+ BUILD_BUG_ON(TI_KUNA_INSN != -+ offsetof(struct thread_info, kern_una_insn)); -+ BUILD_BUG_ON(TI_FPREGS != offsetof(struct thread_info, fpregs)); -+ BUILD_BUG_ON((TI_FPREGS & (64 - 1))); -+ -+ BUILD_BUG_ON(TRAP_PER_CPU_THREAD != -+ offsetof(struct trap_per_cpu, thread)); -+ BUILD_BUG_ON(TRAP_PER_CPU_PGD_PADDR != -+ offsetof(struct trap_per_cpu, pgd_paddr)); -+ BUILD_BUG_ON(TRAP_PER_CPU_CPU_MONDO_PA != -+ offsetof(struct trap_per_cpu, cpu_mondo_pa)); -+ BUILD_BUG_ON(TRAP_PER_CPU_DEV_MONDO_PA != -+ offsetof(struct trap_per_cpu, dev_mondo_pa)); -+ BUILD_BUG_ON(TRAP_PER_CPU_RESUM_MONDO_PA != -+ offsetof(struct trap_per_cpu, resum_mondo_pa)); -+ BUILD_BUG_ON(TRAP_PER_CPU_RESUM_KBUF_PA != -+ offsetof(struct trap_per_cpu, resum_kernel_buf_pa)); -+ BUILD_BUG_ON(TRAP_PER_CPU_NONRESUM_MONDO_PA != -+ offsetof(struct trap_per_cpu, nonresum_mondo_pa)); -+ BUILD_BUG_ON(TRAP_PER_CPU_NONRESUM_KBUF_PA != -+ offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)); -+ BUILD_BUG_ON(TRAP_PER_CPU_FAULT_INFO != -+ offsetof(struct trap_per_cpu, fault_info)); -+ BUILD_BUG_ON(TRAP_PER_CPU_CPU_MONDO_BLOCK_PA != -+ offsetof(struct trap_per_cpu, cpu_mondo_block_pa)); -+ BUILD_BUG_ON(TRAP_PER_CPU_CPU_LIST_PA != -+ offsetof(struct trap_per_cpu, cpu_list_pa)); -+ BUILD_BUG_ON(TRAP_PER_CPU_TSB_HUGE != -+ offsetof(struct trap_per_cpu, tsb_huge)); -+ BUILD_BUG_ON(TRAP_PER_CPU_TSB_HUGE_TEMP != -+ offsetof(struct trap_per_cpu, tsb_huge_temp)); -+#if 0 -+ BUILD_BUG_ON(TRAP_PER_CPU_IRQ_WORKLIST != -+ offsetof(struct trap_per_cpu, irq_worklist)); -+#endif -+ BUILD_BUG_ON(TRAP_PER_CPU_CPU_MONDO_QMASK != -+ offsetof(struct trap_per_cpu, cpu_mondo_qmask)); -+ BUILD_BUG_ON(TRAP_PER_CPU_DEV_MONDO_QMASK != -+ offsetof(struct trap_per_cpu, dev_mondo_qmask)); -+ BUILD_BUG_ON(TRAP_PER_CPU_RESUM_QMASK != -+ offsetof(struct trap_per_cpu, resum_qmask)); -+ BUILD_BUG_ON(TRAP_PER_CPU_NONRESUM_QMASK != -+ offsetof(struct trap_per_cpu, nonresum_qmask)); -+ -+ BUILD_BUG_ON(TSB_CONFIG_TSB != -+ offsetof(struct tsb_config, tsb)); -+ BUILD_BUG_ON(TSB_CONFIG_RSS_LIMIT != -+ offsetof(struct tsb_config, tsb_rss_limit)); -+ BUILD_BUG_ON(TSB_CONFIG_NENTRIES != -+ offsetof(struct tsb_config, tsb_nentries)); -+ BUILD_BUG_ON(TSB_CONFIG_REG_VAL != -+ offsetof(struct tsb_config, tsb_reg_val)); -+ BUILD_BUG_ON(TSB_CONFIG_MAP_VADDR != -+ offsetof(struct tsb_config, tsb_map_vaddr)); -+ BUILD_BUG_ON(TSB_CONFIG_MAP_PTE != -+ offsetof(struct tsb_config, tsb_map_pte)); -+ - /* Attach to the address space of init_task. On SMP we - * do this in smp.c:smp_callin for other cpus. - */ -diff --git a/arch/sparc64/kernel/ttable.S b/arch/sparc64/kernel/ttable.S -index 1ade3d6..2a31ffa 100644 ---- a/arch/sparc64/kernel/ttable.S -+++ b/arch/sparc64/kernel/ttable.S -@@ -66,7 +66,7 @@ tl0_irq6: BTRAP(0x46) - tl0_irq7: BTRAP(0x47) BTRAP(0x48) BTRAP(0x49) - tl0_irq10: BTRAP(0x4a) BTRAP(0x4b) BTRAP(0x4c) BTRAP(0x4d) - tl0_irq14: TRAP_IRQ(timer_interrupt, 14) --tl0_irq15: TRAP_IRQ(handler_irq, 15) -+tl0_irq15: TRAP_IRQ(perfctr_irq, 15) - tl0_resv050: BTRAP(0x50) BTRAP(0x51) BTRAP(0x52) BTRAP(0x53) BTRAP(0x54) BTRAP(0x55) - tl0_resv056: BTRAP(0x56) BTRAP(0x57) BTRAP(0x58) BTRAP(0x59) BTRAP(0x5a) BTRAP(0x5b) - tl0_resv05c: BTRAP(0x5c) BTRAP(0x5d) BTRAP(0x5e) BTRAP(0x5f) -diff --git a/arch/sparc64/perfmon/Kconfig b/arch/sparc64/perfmon/Kconfig -new file mode 100644 -index 0000000..4672024 ---- /dev/null -+++ b/arch/sparc64/perfmon/Kconfig -@@ -0,0 +1,26 @@ -+menu "Hardware Performance Monitoring support" -+config PERFMON -+ bool "Perfmon2 performance monitoring interface" -+ default n -+ help -+ Enables the perfmon2 interface to access the hardware -+ performance counters. See for -+ more details. -+ -+config PERFMON_DEBUG -+ bool "Perfmon debugging" -+ depends on PERFMON -+ default n -+ help -+ Enables perfmon debugging support -+ -+config PERFMON_DEBUG_FS -+ bool "Enable perfmon statistics reporting via debugfs" -+ default y -+ depends on PERFMON && DEBUG_FS -+ help -+ Enable collection and reporting of perfmon timing statistics under -+ debugfs. This is used for debugging and performance analysis of the -+ subsystem. The debugfs filesystem must be mounted. -+ -+endmenu -diff --git a/arch/sparc64/perfmon/Makefile b/arch/sparc64/perfmon/Makefile -new file mode 100644 -index 0000000..ad2d907 ---- /dev/null -+++ b/arch/sparc64/perfmon/Makefile -@@ -0,0 +1 @@ -+obj-$(CONFIG_PERFMON) += perfmon.o -diff --git a/arch/sparc64/perfmon/perfmon.c b/arch/sparc64/perfmon/perfmon.c -new file mode 100644 -index 0000000..9e29833 ---- /dev/null -+++ b/arch/sparc64/perfmon/perfmon.c -@@ -0,0 +1,422 @@ -+/* perfmon.c: sparc64 perfmon support -+ * -+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) -+ */ -+ -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+ -+struct pcr_ops { -+ void (*write)(u64); -+ u64 (*read)(void); -+}; -+ -+static void direct_write_pcr(u64 val) -+{ -+ write_pcr(val); -+} -+ -+static u64 direct_read_pcr(void) -+{ -+ u64 pcr; -+ -+ read_pcr(pcr); -+ -+ return pcr; -+} -+ -+static struct pcr_ops direct_pcr_ops = { -+ .write = direct_write_pcr, -+ .read = direct_read_pcr, -+}; -+ -+/* Using the hypervisor call is needed so that we can set the -+ * hypervisor trace bit correctly, which is hyperprivileged. -+ */ -+static void n2_write_pcr(u64 val) -+{ -+ unsigned long ret; -+ -+ ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val); -+ if (val != HV_EOK) -+ write_pcr(val); -+} -+ -+static u64 n2_read_pcr(void) -+{ -+ u64 pcr; -+ -+ read_pcr(pcr); -+ -+ return pcr; -+} -+ -+static struct pcr_ops n2_pcr_ops = { -+ .write = n2_write_pcr, -+ .read = n2_read_pcr, -+}; -+ -+static struct pcr_ops *pcr_ops; -+ -+void pfm_arch_write_pmc(struct pfm_context *ctx, -+ unsigned int cnum, u64 value) -+{ -+ /* -+ * we only write to the actual register when monitoring is -+ * active (pfm_start was issued) -+ */ -+ if (ctx && ctx->flags.started == 0) -+ return; -+ -+ pcr_ops->write(value); -+} -+ -+u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum) -+{ -+ return pcr_ops->read(); -+} -+ -+/* -+ * collect pending overflowed PMDs. Called from pfm_ctxsw() -+ * and from PMU interrupt handler. Must fill in set->povfl_pmds[] -+ * and set->npend_ovfls. Interrupts are masked -+ */ -+static void __pfm_get_ovfl_pmds(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ unsigned int max = ctx->regs.max_intr_pmd; -+ u64 wmask = 1ULL << pfm_pmu_conf->counter_width; -+ u64 *intr_pmds = ctx->regs.intr_pmds; -+ u64 *used_mask = set->used_pmds; -+ u64 mask[PFM_PMD_BV]; -+ unsigned int i; -+ -+ bitmap_and(cast_ulp(mask), -+ cast_ulp(intr_pmds), -+ cast_ulp(used_mask), -+ max); -+ -+ /* -+ * check all PMD that can generate interrupts -+ * (that includes counters) -+ */ -+ for (i = 0; i < max; i++) { -+ if (test_bit(i, mask)) { -+ u64 new_val = pfm_arch_read_pmd(ctx, i); -+ -+ PFM_DBG_ovfl("pmd%u new_val=0x%llx bit=%d\n", -+ i, (unsigned long long)new_val, -+ (new_val&wmask) ? 1 : 0); -+ -+ if (new_val & wmask) { -+ __set_bit(i, set->povfl_pmds); -+ set->npend_ovfls++; -+ } -+ } -+ } -+} -+ -+static void pfm_stop_active(struct task_struct *task, struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ unsigned int i, max = ctx->regs.max_pmc; -+ -+ /* -+ * clear enable bits, assume all pmcs are enable pmcs -+ */ -+ for (i = 0; i < max; i++) { -+ if (test_bit(i, set->used_pmcs)) -+ pfm_arch_write_pmc(ctx, i, 0); -+ } -+ -+ if (set->npend_ovfls) -+ return; -+ -+ __pfm_get_ovfl_pmds(ctx, set); -+} -+ -+/* -+ * Called from pfm_ctxsw(). Task is guaranteed to be current. -+ * Context is locked. Interrupts are masked. Monitoring is active. -+ * PMU access is guaranteed. PMC and PMD registers are live in PMU. -+ * -+ * for per-thread: -+ * must stop monitoring for the task -+ * -+ * Return: -+ * non-zero : did not save PMDs (as part of stopping the PMU) -+ * 0 : saved PMDs (no need to save them in caller) -+ */ -+int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx) -+{ -+ /* -+ * disable lazy restore of PMC registers. -+ */ -+ ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS; -+ -+ pfm_stop_active(task, ctx, ctx->active_set); -+ -+ return 1; -+} -+ -+/* -+ * Called from pfm_stop() and idle notifier -+ * -+ * Interrupts are masked. Context is locked. Set is the active set. -+ * -+ * For per-thread: -+ * task is not necessarily current. If not current task, then -+ * task is guaranteed stopped and off any cpu. Access to PMU -+ * is not guaranteed. Interrupts are masked. Context is locked. -+ * Set is the active set. -+ * -+ * For system-wide: -+ * task is current -+ * -+ * must disable active monitoring. ctx cannot be NULL -+ */ -+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx) -+{ -+ /* -+ * no need to go through stop_save() -+ * if we are already stopped -+ */ -+ if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED) -+ return; -+ -+ /* -+ * stop live registers and collect pending overflow -+ */ -+ if (task == current) -+ pfm_stop_active(task, ctx, ctx->active_set); -+} -+ -+/* -+ * Enable active monitoring. Called from pfm_start() and -+ * pfm_arch_unmask_monitoring(). -+ * -+ * Interrupts are masked. Context is locked. Set is the active set. -+ * -+ * For per-trhead: -+ * Task is not necessarily current. If not current task, then task -+ * is guaranteed stopped and off any cpu. Access to PMU is not guaranteed. -+ * -+ * For system-wide: -+ * task is always current -+ * -+ * must enable active monitoring. -+ */ -+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx) -+{ -+ struct pfm_event_set *set; -+ unsigned int max_pmc = ctx->regs.max_pmc; -+ unsigned int i; -+ -+ if (task != current) -+ return; -+ -+ set = ctx->active_set; -+ for (i = 0; i < max_pmc; i++) { -+ if (test_bit(i, set->used_pmcs)) -+ pfm_arch_write_pmc(ctx, i, set->pmcs[i]); -+ } -+} -+ -+/* -+ * function called from pfm_switch_sets(), pfm_context_load_thread(), -+ * pfm_context_load_sys(), pfm_ctxsw(), pfm_switch_sets() -+ * context is locked. Interrupts are masked. set cannot be NULL. -+ * Access to the PMU is guaranteed. -+ * -+ * function must restore all PMD registers from set. -+ */ -+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ unsigned int max_pmd = ctx->regs.max_pmd; -+ u64 ovfl_mask = pfm_pmu_conf->ovfl_mask; -+ u64 *impl_pmds = ctx->regs.pmds; -+ unsigned int i; -+ -+ /* -+ * must restore all pmds to avoid leaking -+ * information to user. -+ */ -+ for (i = 0; i < max_pmd; i++) { -+ u64 val; -+ -+ if (test_bit(i, impl_pmds) == 0) -+ continue; -+ -+ val = set->pmds[i].value; -+ -+ /* -+ * set upper bits for counter to ensure -+ * overflow will trigger -+ */ -+ val &= ovfl_mask; -+ -+ pfm_arch_write_pmd(ctx, i, val); -+ } -+} -+ -+/* -+ * function called from pfm_switch_sets(), pfm_context_load_thread(), -+ * pfm_context_load_sys(), pfm_ctxsw(). -+ * Context is locked. Interrupts are masked. set cannot be NULL. -+ * Access to the PMU is guaranteed. -+ * -+ * function must restore all PMC registers from set, if needed. -+ */ -+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ unsigned int max_pmc = ctx->regs.max_pmc; -+ u64 *impl_pmcs = ctx->regs.pmcs; -+ unsigned int i; -+ -+ /* If we're masked or stopped we don't need to bother restoring -+ * the PMCs now. -+ */ -+ if (ctx->state == PFM_CTX_MASKED || ctx->flags.started == 0) -+ return; -+ -+ /* -+ * restore all pmcs -+ */ -+ for (i = 0; i < max_pmc; i++) -+ if (test_bit(i, impl_pmcs)) -+ pfm_arch_write_pmc(ctx, i, set->pmcs[i]); -+} -+ -+char *pfm_arch_get_pmu_module_name(void) -+{ -+ return NULL; -+} -+ -+void perfmon_interrupt(struct pt_regs *regs) -+{ -+ pfm_interrupt_handler(instruction_pointer(regs), regs); -+} -+ -+static struct pfm_regmap_desc pfm_sparc64_pmc_desc[] = { -+ PMC_D(PFM_REG_I, "PCR", 0, 0, 0, 0), -+}; -+ -+static struct pfm_regmap_desc pfm_sparc64_pmd_desc[] = { -+ PMD_D(PFM_REG_C, "PIC0", 0), -+ PMD_D(PFM_REG_C, "PIC1", 0), -+}; -+ -+static int pfm_sparc64_probe(void) -+{ -+ return 0; -+} -+ -+static struct pfm_pmu_config pmu_sparc64_pmu_conf = { -+ .counter_width = 31, -+ .pmd_desc = pfm_sparc64_pmd_desc, -+ .num_pmd_entries = 2, -+ .pmc_desc = pfm_sparc64_pmc_desc, -+ .num_pmc_entries = 1, -+ .probe_pmu = pfm_sparc64_probe, -+ .flags = PFM_PMU_BUILTIN_FLAG, -+ .owner = THIS_MODULE, -+}; -+ -+static unsigned long perf_hsvc_group; -+static unsigned long perf_hsvc_major; -+static unsigned long perf_hsvc_minor; -+ -+static int __init register_perf_hsvc(void) -+{ -+ if (tlb_type == hypervisor) { -+ switch (sun4v_chip_type) { -+ case SUN4V_CHIP_NIAGARA1: -+ perf_hsvc_group = HV_GRP_N2_CPU; -+ break; -+ -+ case SUN4V_CHIP_NIAGARA2: -+ perf_hsvc_group = HV_GRP_N2_CPU; -+ break; -+ -+ default: -+ return -ENODEV; -+ } -+ -+ -+ perf_hsvc_major = 1; -+ perf_hsvc_minor = 0; -+ if (sun4v_hvapi_register(perf_hsvc_group, -+ perf_hsvc_major, -+ &perf_hsvc_minor)) { -+ printk("perfmon: Could not register N2 hvapi.\n"); -+ return -ENODEV; -+ } -+ } -+ return 0; -+} -+ -+static void unregister_perf_hsvc(void) -+{ -+ if (tlb_type != hypervisor) -+ return; -+ sun4v_hvapi_unregister(perf_hsvc_group); -+} -+ -+static int __init pfm_sparc64_pmu_init(void) -+{ -+ u64 mask; -+ int err; -+ -+ err = register_perf_hsvc(); -+ if (err) -+ return err; -+ -+ if (tlb_type == hypervisor && -+ sun4v_chip_type == SUN4V_CHIP_NIAGARA2) -+ pcr_ops = &n2_pcr_ops; -+ else -+ pcr_ops = &direct_pcr_ops; -+ -+ if (!strcmp(sparc_pmu_type, "ultra12")) -+ mask = (0xf << 11) | (0xf << 4) | 0x7; -+ else if (!strcmp(sparc_pmu_type, "ultra3") || -+ !strcmp(sparc_pmu_type, "ultra3i") || -+ !strcmp(sparc_pmu_type, "ultra3+") || -+ !strcmp(sparc_pmu_type, "ultra4+")) -+ mask = (0x3f << 11) | (0x3f << 4) | 0x7; -+ else if (!strcmp(sparc_pmu_type, "niagara2")) -+ mask = ((1UL << 63) | (1UL << 62) | -+ (1UL << 31) | (0xfUL << 27) | (0xffUL << 19) | -+ (1UL << 18) | (0xfUL << 14) | (0xff << 6) | -+ (0x3UL << 4) | 0x7UL); -+ else if (!strcmp(sparc_pmu_type, "niagara")) -+ mask = ((1UL << 9) | (1UL << 8) | -+ (0x7UL << 4) | 0x7UL); -+ else { -+ err = -ENODEV; -+ goto out_err; -+ } -+ -+ pmu_sparc64_pmu_conf.pmu_name = sparc_pmu_type; -+ pfm_sparc64_pmc_desc[0].rsvd_msk = ~mask; -+ -+ return pfm_pmu_register(&pmu_sparc64_pmu_conf); -+ -+out_err: -+ unregister_perf_hsvc(); -+ return err; -+} -+ -+static void __exit pfm_sparc64_pmu_exit(void) -+{ -+ unregister_perf_hsvc(); -+ return pfm_pmu_unregister(&pmu_sparc64_pmu_conf); -+} -+ -+module_init(pfm_sparc64_pmu_init); -+module_exit(pfm_sparc64_pmu_exit); -diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index ed92864..3a2b544 100644 ---- a/arch/x86/Kconfig -+++ b/arch/x86/Kconfig -@@ -1378,6 +1378,8 @@ config COMPAT_VDSO - - If unsure, say Y. - -+source "arch/x86/perfmon/Kconfig" -+ - endmenu - - config ARCH_ENABLE_MEMORY_HOTPLUG -diff --git a/arch/x86/Makefile b/arch/x86/Makefile -index f5631da..c868ad6 100644 ---- a/arch/x86/Makefile -+++ b/arch/x86/Makefile -@@ -150,6 +150,8 @@ core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/ - core-y += arch/x86/kernel/ - core-y += arch/x86/mm/ - -+core-$(CONFIG_PERFMON) += arch/x86/perfmon/ -+ - # Remaining sub architecture files - core-y += $(mcore-y) - -diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S -index ffc1bb4..58e00cb 100644 ---- a/arch/x86/ia32/ia32entry.S -+++ b/arch/x86/ia32/ia32entry.S -@@ -832,4 +832,16 @@ ia32_sys_call_table: - .quad sys_dup3 /* 330 */ - .quad sys_pipe2 - .quad sys_inotify_init1 -+ .quad sys_pfm_create_context -+ .quad sys_pfm_write_pmcs -+ .quad sys_pfm_write_pmds /* 335 */ -+ .quad sys_pfm_read_pmds -+ .quad sys_pfm_load_context -+ .quad sys_pfm_start -+ .quad sys_pfm_stop -+ .quad sys_pfm_restart /* 340 */ -+ .quad sys_pfm_create_evtsets -+ .quad sys_pfm_getinfo_evtsets -+ .quad sys_pfm_delete_evtsets -+ .quad sys_pfm_unload_context - ia32_syscall_end: -diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c -index f88bd0d..53fe335 100644 ---- a/arch/x86/kernel/apic_32.c -+++ b/arch/x86/kernel/apic_32.c -@@ -28,6 +28,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -669,6 +670,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) - setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_IBS; - } -+EXPORT_SYMBOL(setup_APIC_eilvt_ibs); - - /* - * Local APIC start and shutdown -@@ -1367,6 +1369,9 @@ void __init apic_intr_init(void) - #ifdef CONFIG_X86_MCE_P4THERMAL - alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); - #endif -+#ifdef CONFIG_PERFMON -+ set_intr_gate(LOCAL_PERFMON_VECTOR, pmu_interrupt); -+#endif - } - - /** -diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c -index 446c062..574cd3b 100644 ---- a/arch/x86/kernel/apic_64.c -+++ b/arch/x86/kernel/apic_64.c -@@ -228,6 +228,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) - setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_IBS; - } -+EXPORT_SYMBOL(setup_APIC_eilvt_ibs); - - /* - * Program the next event, relative to now -diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c -index 4e456bd..5b6d6ca 100644 ---- a/arch/x86/kernel/cpu/common.c -+++ b/arch/x86/kernel/cpu/common.c -@@ -5,6 +5,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -726,6 +727,8 @@ void __cpuinit cpu_init(void) - current_thread_info()->status = 0; - clear_used_math(); - mxcsr_feature_mask_init(); -+ -+ pfm_init_percpu(); - } - - #ifdef CONFIG_HOTPLUG_CPU -diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S -index 109792b..0b6e34c 100644 ---- a/arch/x86/kernel/entry_32.S -+++ b/arch/x86/kernel/entry_32.S -@@ -513,7 +513,7 @@ ENDPROC(system_call) - ALIGN - RING0_PTREGS_FRAME # can't unwind into user space anyway - work_pending: -- testb $_TIF_NEED_RESCHED, %cl -+ testw $(_TIF_NEED_RESCHED|_TIF_PERFMON_WORK), %cx - jz work_notifysig - work_resched: - call schedule -diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S -index 89434d4..34e44f5 100644 ---- a/arch/x86/kernel/entry_64.S -+++ b/arch/x86/kernel/entry_64.S -@@ -888,7 +888,13 @@ END(error_interrupt) - ENTRY(spurious_interrupt) - apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt - END(spurious_interrupt) -- -+ -+#ifdef CONFIG_PERFMON -+ENTRY(pmu_interrupt) -+ apicinterrupt LOCAL_PERFMON_VECTOR,smp_pmu_interrupt -+END(pmu_interrupt) -+#endif -+ - /* - * Exception entry points. - */ -diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c -index 1f26fd9..83f6bc1 100644 ---- a/arch/x86/kernel/irqinit_64.c -+++ b/arch/x86/kernel/irqinit_64.c -@@ -11,6 +11,7 @@ - #include - #include - #include -+#include - - #include - #include -@@ -217,6 +218,10 @@ void __init native_init_IRQ(void) - alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); - -+#ifdef CONFIG_PERFMON -+ alloc_intr_gate(LOCAL_PERFMON_VECTOR, pmu_interrupt); -+#endif -+ - if (!acpi_ioapic) - setup_irq(2, &irq2); - } -diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c -index 31f40b2..ed27150 100644 ---- a/arch/x86/kernel/process_32.c -+++ b/arch/x86/kernel/process_32.c -@@ -36,6 +36,7 @@ - #include - #include - #include -+#include - #include - - #include -@@ -277,6 +278,7 @@ void exit_thread(void) - tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; - put_cpu(); - } -+ pfm_exit_thread(); - } - - void flush_thread(void) -@@ -334,6 +336,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, - - savesegment(gs, p->thread.gs); - -+ pfm_copy_thread(p); -+ - tsk = current; - if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { - p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, -@@ -448,6 +452,9 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - prev = &prev_p->thread; - next = &next_p->thread; - -+ if (test_tsk_thread_flag(prev_p, TIF_PERFMON_CTXSW)) -+ pfm_ctxsw_out(prev_p, next_p); -+ - debugctl = prev->debugctlmsr; - if (next->ds_area_msr != prev->ds_area_msr) { - /* we clear debugctl to make sure DS -@@ -460,6 +467,9 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - if (next->debugctlmsr != debugctl) - update_debugctlmsr(next->debugctlmsr); - -+ if (test_tsk_thread_flag(next_p, TIF_PERFMON_CTXSW)) -+ pfm_ctxsw_in(prev_p, next_p); -+ - if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { - set_debugreg(next->debugreg0, 0); - set_debugreg(next->debugreg1, 1); -diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c -index e12e0e4..97d49ce 100644 ---- a/arch/x86/kernel/process_64.c -+++ b/arch/x86/kernel/process_64.c -@@ -36,6 +36,7 @@ - #include - #include - #include -+#include - #include - - #include -@@ -240,6 +241,7 @@ void exit_thread(void) - t->io_bitmap_max = 0; - put_cpu(); - } -+ pfm_exit_thread(); - } - - void flush_thread(void) -@@ -344,6 +346,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, - savesegment(es, p->thread.es); - savesegment(ds, p->thread.ds); - -+ pfm_copy_thread(p); -+ - if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { - p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); - if (!p->thread.io_bitmap_ptr) { -@@ -472,6 +476,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, - prev = &prev_p->thread, - next = &next_p->thread; - -+ if (test_tsk_thread_flag(prev_p, TIF_PERFMON_CTXSW)) -+ pfm_ctxsw_out(prev_p, next_p); -+ - debugctl = prev->debugctlmsr; - if (next->ds_area_msr != prev->ds_area_msr) { - /* we clear debugctl to make sure DS -@@ -484,6 +491,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, - if (next->debugctlmsr != debugctl) - update_debugctlmsr(next->debugctlmsr); - -+ if (test_tsk_thread_flag(next_p, TIF_PERFMON_CTXSW)) -+ pfm_ctxsw_in(prev_p, next_p); -+ - if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { - loaddebug(next, 0); - loaddebug(next, 1); -diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c -index 6fb5bcd..53e6665 100644 ---- a/arch/x86/kernel/signal_32.c -+++ b/arch/x86/kernel/signal_32.c -@@ -18,6 +18,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -657,6 +658,10 @@ static void do_signal(struct pt_regs *regs) - void - do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) - { -+ /* process perfmon asynchronous work (e.g. block thread or reset) */ -+ if (thread_info_flags & _TIF_PERFMON_WORK) -+ pfm_handle_work(regs); -+ - /* deal with pending signal delivery */ - if (thread_info_flags & _TIF_SIGPENDING) - do_signal(regs); -diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c -index ca316b5..6e9fa74 100644 ---- a/arch/x86/kernel/signal_64.c -+++ b/arch/x86/kernel/signal_64.c -@@ -19,6 +19,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -549,12 +550,17 @@ static void do_signal(struct pt_regs *regs) - void do_notify_resume(struct pt_regs *regs, void *unused, - __u32 thread_info_flags) - { -+ - #ifdef CONFIG_X86_MCE - /* notify userspace of pending MCEs */ - if (thread_info_flags & _TIF_MCE_NOTIFY) - mce_notify_user(); - #endif /* CONFIG_X86_MCE */ - -+ /* process perfmon asynchronous work (e.g. block thread or reset) */ -+ if (thread_info_flags & _TIF_PERFMON_WORK) -+ pfm_handle_work(regs); -+ - /* deal with pending signal delivery */ - if (thread_info_flags & _TIF_SIGPENDING) - do_signal(regs); -diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c -index 7985c5b..9ddf6db 100644 ---- a/arch/x86/kernel/smpboot.c -+++ b/arch/x86/kernel/smpboot.c -@@ -42,6 +42,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -1382,6 +1383,7 @@ int __cpu_disable(void) - remove_cpu_from_maps(cpu); - unlock_vector_lock(); - fixup_irqs(cpu_online_map); -+ pfm_cpu_disable(); - return 0; - } - -diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S -index d44395f..e1384a9 100644 ---- a/arch/x86/kernel/syscall_table_32.S -+++ b/arch/x86/kernel/syscall_table_32.S -@@ -332,3 +332,15 @@ ENTRY(sys_call_table) - .long sys_dup3 /* 330 */ - .long sys_pipe2 - .long sys_inotify_init1 -+ .long sys_pfm_create_context -+ .long sys_pfm_write_pmcs -+ .long sys_pfm_write_pmds /* 335 */ -+ .long sys_pfm_read_pmds -+ .long sys_pfm_load_context -+ .long sys_pfm_start -+ .long sys_pfm_stop -+ .long sys_pfm_restart /* 340 */ -+ .long sys_pfm_create_evtsets -+ .long sys_pfm_getinfo_evtsets -+ .long sys_pfm_delete_evtsets -+ .long sys_pfm_unload_context -diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c -index 8a5f161..10faef5 100644 ---- a/arch/x86/oprofile/nmi_int.c -+++ b/arch/x86/oprofile/nmi_int.c -@@ -16,6 +16,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -217,12 +218,18 @@ static int nmi_setup(void) - int err = 0; - int cpu; - -- if (!allocate_msrs()) -+ if (pfm_session_allcpus_acquire()) -+ return -EBUSY; -+ -+ if (!allocate_msrs()) { -+ pfm_session_allcpus_release(); - return -ENOMEM; -+ } - - err = register_die_notifier(&profile_exceptions_nb); - if (err) { - free_msrs(); -+ pfm_session_allcpus_release(); - return err; - } - -@@ -304,6 +311,7 @@ static void nmi_shutdown(void) - model->shutdown(msrs); - free_msrs(); - put_cpu_var(cpu_msrs); -+ pfm_session_allcpus_release(); - } - - static void nmi_cpu_start(void *dummy) -diff --git a/arch/x86/perfmon/Kconfig b/arch/x86/perfmon/Kconfig -new file mode 100644 -index 0000000..08842e6 ---- /dev/null -+++ b/arch/x86/perfmon/Kconfig -@@ -0,0 +1,89 @@ -+menu "Hardware Performance Monitoring support" -+config PERFMON -+ bool "Perfmon2 performance monitoring interface" -+ select X86_LOCAL_APIC -+ default n -+ help -+ Enables the perfmon2 interface to access the hardware -+ performance counters. See for -+ more details. -+ -+config PERFMON_DEBUG -+ bool "Perfmon debugging" -+ default n -+ depends on PERFMON -+ help -+ Enables perfmon debugging support -+ -+config PERFMON_DEBUG_FS -+ bool "Enable perfmon statistics reporting via debugfs" -+ default y -+ depends on PERFMON && DEBUG_FS -+ help -+ Enable collection and reporting of perfmon timing statistics under -+ debugfs. This is used for debugging and performance analysis of the -+ subsystem.The debugfs filesystem must be mounted. -+ -+config X86_PERFMON_P6 -+ tristate "Support for Intel P6/Pentium M processor hardware performance counters" -+ depends on PERFMON && X86_32 -+ default n -+ help -+ Enables support for Intel P6-style hardware performance counters. -+ To be used for with Intel Pentium III, PentiumPro, Pentium M processors. -+ -+config X86_PERFMON_P4 -+ tristate "Support for Intel Pentium 4/Xeon hardware performance counters" -+ depends on PERFMON -+ default n -+ help -+ Enables support for Intel Pentium 4/Xeon (Netburst) hardware performance -+ counters. -+ -+config X86_PERFMON_PEBS_P4 -+ tristate "Support for Intel Netburst Precise Event-Based Sampling (PEBS)" -+ depends on PERFMON && X86_PERFMON_P4 -+ default n -+ help -+ Enables support for Precise Event-Based Sampling (PEBS) on the Intel -+ Netburst processors such as Pentium 4, Xeon which support it. -+ -+config X86_PERFMON_CORE -+ tristate "Support for Intel Core-based performance counters" -+ depends on PERFMON -+ default n -+ help -+ Enables support for Intel Core-based performance counters. Enable -+ this option to support Intel Core 2 processors. -+ -+config X86_PERFMON_PEBS_CORE -+ tristate "Support for Intel Core Precise Event-Based Sampling (PEBS)" -+ depends on PERFMON && X86_PERFMON_CORE -+ default n -+ help -+ Enables support for Precise Event-Based Sampling (PEBS) on the Intel -+ Core processors. -+ -+config X86_PERFMON_INTEL_ATOM -+ tristate "Support for Intel Atom processor" -+ depends on PERFMON -+ default n -+ help -+ Enables support for Intel Atom processors. -+ -+config X86_PERFMON_INTEL_ARCH -+ tristate "Support for Intel architectural perfmon v1/v2" -+ depends on PERFMON -+ default n -+ help -+ Enables support for Intel architectural performance counters. -+ This feature was introduced with Intel Core Solo/Core Duo processors. -+ -+config X86_PERFMON_AMD64 -+ tristate "Support AMD Athlon64/Opteron64 hardware performance counters" -+ depends on PERFMON -+ default n -+ help -+ Enables support for Athlon64/Opterton64 hardware performance counters. -+ Support for family 6, 15 and 16(10H) processors. -+endmenu -diff --git a/arch/x86/perfmon/Makefile b/arch/x86/perfmon/Makefile -new file mode 100644 -index 0000000..1cbed3e ---- /dev/null -+++ b/arch/x86/perfmon/Makefile -@@ -0,0 +1,13 @@ -+# -+# Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. -+# Contributed by Stephane Eranian -+# -+obj-$(CONFIG_PERFMON) += perfmon.o -+obj-$(CONFIG_X86_PERFMON_P6) += perfmon_p6.o -+obj-$(CONFIG_X86_PERFMON_P4) += perfmon_p4.o -+obj-$(CONFIG_X86_PERFMON_CORE) += perfmon_intel_core.o -+obj-$(CONFIG_X86_PERFMON_INTEL_ARCH) += perfmon_intel_arch.o -+obj-$(CONFIG_X86_PERFMON_PEBS_P4) += perfmon_pebs_p4_smpl.o -+obj-$(CONFIG_X86_PERFMON_PEBS_CORE) += perfmon_pebs_core_smpl.o -+obj-$(CONFIG_X86_PERFMON_AMD64) += perfmon_amd64.o -+obj-$(CONFIG_X86_PERFMON_INTEL_ATOM) += perfmon_intel_atom.o -diff --git a/arch/x86/perfmon/perfmon.c b/arch/x86/perfmon/perfmon.c -new file mode 100644 -index 0000000..e727fed ---- /dev/null -+++ b/arch/x86/perfmon/perfmon.c -@@ -0,0 +1,761 @@ -+/* -+ * This file implements the X86 specific support for the perfmon2 interface -+ * -+ * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * Copyright (c) 2007 Advanced Micro Devices, Inc. -+ * Contributed by Robert Richter -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+ -+DEFINE_PER_CPU(unsigned long, real_iip); -+DEFINE_PER_CPU(int, pfm_using_nmi); -+DEFINE_PER_CPU(unsigned long, saved_lvtpc); -+ -+/** -+ * pfm_arch_ctxswin_thread - thread context switch in -+ * @task: task switched in -+ * @ctx: context for the task -+ * -+ * Called from pfm_ctxsw(). Task is guaranteed to be current. -+ * set cannot be NULL. Context is locked. Interrupts are masked. -+ * -+ * Caller has already restored all PMD and PMC registers, if -+ * necessary (i.e., lazy restore scheme). -+ * -+ * On x86, the only common code just needs to unsecure RDPMC if necessary -+ * -+ * On model-specific features, e.g., PEBS, IBS, are taken care of in the -+ * corresponding PMU description module -+ */ -+void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx) -+{ -+ struct pfm_arch_context *ctx_arch; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ -+ /* -+ * restore saved real iip -+ */ -+ if (ctx->active_set->npend_ovfls) -+ __get_cpu_var(real_iip) = ctx_arch->saved_real_iip; -+ -+ /* -+ * enable RDPMC on this CPU -+ */ -+ if (ctx_arch->flags.insecure) -+ set_in_cr4(X86_CR4_PCE); -+} -+ -+/** -+ * pfm_arch_ctxswout_thread - context switch out thread -+ * @task: task switched out -+ * @ctx : context switched out -+ * -+ * Called from pfm_ctxsw(). Task is guaranteed to be current. -+ * Context is locked. Interrupts are masked. Monitoring may be active. -+ * PMU access is guaranteed. PMC and PMD registers are live in PMU. -+ * -+ * Return: -+ * non-zero : did not save PMDs (as part of stopping the PMU) -+ * 0 : saved PMDs (no need to save them in caller) -+ */ -+int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx) -+{ -+ struct pfm_arch_context *ctx_arch; -+ struct pfm_arch_pmu_info *pmu_info; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ pmu_info = pfm_pmu_info(); -+ -+ /* -+ * disable lazy restore of PMCS on ctxswin because -+ * we modify some of them. -+ */ -+ ctx->active_set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS; -+ -+ if (ctx->active_set->npend_ovfls) -+ ctx_arch->saved_real_iip = __get_cpu_var(real_iip); -+ -+ /* -+ * disable RDPMC on this CPU -+ */ -+ if (ctx_arch->flags.insecure) -+ clear_in_cr4(X86_CR4_PCE); -+ -+ if (ctx->state == PFM_CTX_MASKED) -+ return 1; -+ -+ return pmu_info->stop_save(ctx, ctx->active_set); -+} -+ -+/** -+ * pfm_arch_stop - deactivate monitoring -+ * @task: task to stop -+ * @ctx: context to stop -+ * -+ * Called from pfm_stop() -+ * Interrupts are masked. Context is locked. Set is the active set. -+ * -+ * For per-thread: -+ * task is not necessarily current. If not current task, then -+ * task is guaranteed stopped and off any cpu. Access to PMU -+ * is not guaranteed. -+ * -+ * For system-wide: -+ * task is current -+ * -+ * must disable active monitoring. ctx cannot be NULL -+ */ -+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ -+ pmu_info = pfm_pmu_info(); -+ -+ /* -+ * no need to go through stop_save() -+ * if we are already stopped -+ */ -+ if (!ctx->flags.started || ctx->state == PFM_CTX_MASKED) -+ return; -+ -+ if (task != current) -+ return; -+ -+ pmu_info->stop_save(ctx, ctx->active_set); -+} -+ -+ -+/** -+ * pfm_arch_start - activate monitoring -+ * @task: task to start -+ * @ctx: context to stop -+ * -+ * Interrupts are masked. Context is locked. -+ * -+ * For per-thread: -+ * Task is not necessarily current. If not current task, then task -+ * is guaranteed stopped and off any cpu. No access to PMU is task -+ * is not current. -+ * -+ * For system-wide: -+ * task is always current -+ */ -+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx) -+{ -+ struct pfm_event_set *set; -+ -+ set = ctx->active_set; -+ -+ if (task != current) -+ return; -+ -+ /* -+ * cannot restore PMC if no access to PMU. Will be done -+ * when the thread is switched back in -+ */ -+ -+ pfm_arch_restore_pmcs(ctx, set); -+} -+ -+/** -+ * pfm_arch_restore_pmds - reload PMD registers -+ * @ctx: context to restore from -+ * @set: current event set -+ * -+ * function called from pfm_switch_sets(), pfm_context_load_thread(), -+ * pfm_context_load_sys(), pfm_ctxsw() -+ * -+ * Context is locked. Interrupts are masked. Set cannot be NULL. -+ * Access to the PMU is guaranteed. -+ */ -+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ u16 i, num; -+ -+ pmu_info = pfm_pmu_info(); -+ -+ num = set->nused_pmds; -+ -+ /* -+ * model-specific override -+ */ -+ if (pmu_info->restore_pmds) { -+ pmu_info->restore_pmds(ctx, set); -+ return; -+ } -+ -+ /* -+ * we can restore only the PMD we use because: -+ * -+ * - can only read with pfm_read_pmds() the registers -+ * declared used via pfm_write_pmds(), smpl_pmds, reset_pmds -+ * -+ * - if cr4.pce=1, only counters are exposed to user. RDPMC -+ * does not work with other types of PMU registers.Thus, no -+ * address is ever exposed by counters -+ * -+ * - there is never a dependency between one pmd register and -+ * another -+ */ -+ for (i = 0; num; i++) { -+ if (likely(test_bit(i, cast_ulp(set->used_pmds)))) { -+ pfm_write_pmd(ctx, i, set->pmds[i].value); -+ num--; -+ } -+ } -+} -+ -+/** -+ * pfm_arch_restore_pmcs - reload PMC registers -+ * @ctx: context to restore from -+ * @set: current event set -+ * -+ * function called from pfm_switch_sets(), pfm_context_load_thread(), -+ * pfm_context_load_sys(), pfm_ctxsw(). -+ * -+ * Context is locked. Interrupts are masked. set cannot be NULL. -+ * Access to the PMU is guaranteed. -+ * -+ * function must restore all PMC registers from set -+ */ -+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ u64 *mask; -+ u16 i, num; -+ -+ pmu_info = pfm_pmu_info(); -+ -+ /* -+ * we need to restore PMCs only when: -+ * - context is not masked -+ * - monitoring activated -+ * -+ * Masking monitoring after an overflow does not change the -+ * value of flags.started -+ */ -+ if (ctx->state == PFM_CTX_MASKED || !ctx->flags.started) -+ return; -+ -+ /* -+ * model-specific override -+ */ -+ if (pmu_info->restore_pmcs) { -+ pmu_info->restore_pmcs(ctx, set); -+ return; -+ } -+ /* -+ * restore all pmcs -+ * -+ * It is not possible to restore only the pmcs we used because -+ * certain PMU models (e.g. Pentium 4) have dependencies. Thus -+ * we do not want one application using stale PMC coming from -+ * another one. -+ * -+ * On PMU models where there is no dependencies between pmc, then -+ * it is possible to optimize by only restoring the registers that -+ * are used, and this can be done with the models-specific override -+ * for this function. -+ * -+ * The default code takes the safest approach, i.e., assume the worse -+ */ -+ mask = ctx->regs.pmcs; -+ num = ctx->regs.num_pmcs; -+ for (i = 0; num; i++) { -+ if (test_bit(i, cast_ulp(mask))) { -+ pfm_arch_write_pmc(ctx, i, set->pmcs[i]); -+ num--; -+ } -+ } -+} -+ -+/** -+ * smp_pmu_interrupt - lowest level PMU interrupt handler for X86 -+ * @regs: machine state -+ * -+ * The PMU interrupt is handled through an interrupt gate, therefore -+ * the CPU automatically clears the EFLAGS.IF, i.e., masking interrupts. -+ * -+ * The perfmon interrupt handler MUST run with interrupts disabled due -+ * to possible race with other, higher priority interrupts, such as timer -+ * or IPI function calls. -+ * -+ * See description in IA-32 architecture manual, Vol 3 section 5.8.1 -+ */ -+void smp_pmu_interrupt(struct pt_regs *regs) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ struct pfm_context *ctx; -+ unsigned long iip; -+ int using_nmi; -+ -+ using_nmi = __get_cpu_var(pfm_using_nmi); -+ -+ ack_APIC_irq(); -+ -+ irq_enter(); -+ -+ /* -+ * when using NMI, pfm_handle_nmi() gets called -+ * first. It stops monitoring and record the -+ * iip into real_iip, then it repost the interrupt -+ * using the lower priority vector LOCAL_PERFMON_VECTOR -+ * -+ * On some processors, e.g., P4, it may be that some -+ * state is already recorded from pfm_handle_nmi() -+ * and it only needs to be copied back into the normal -+ * fields so it can be used transparently by higher level -+ * code. -+ */ -+ if (using_nmi) { -+ ctx = __get_cpu_var(pmu_ctx); -+ pmu_info = pfm_pmu_info(); -+ iip = __get_cpu_var(real_iip); -+ if (ctx && pmu_info->nmi_copy_state) -+ pmu_info->nmi_copy_state(ctx); -+ } else -+ iip = instruction_pointer(regs); -+ -+ pfm_interrupt_handler(iip, regs); -+ -+ /* -+ * On Intel P6, Pentium M, P4, Intel Core: -+ * - it is necessary to clear the MASK field for the LVTPC -+ * vector. Otherwise interrupts remain masked. See -+ * section 8.5.1 -+ * AMD X86-64: -+ * - the documentation does not stipulate the behavior. -+ * To be safe, we also rewrite the vector to clear the -+ * mask field -+ */ -+ if (!using_nmi && current_cpu_data.x86_vendor == X86_VENDOR_INTEL) -+ apic_write(APIC_LVTPC, LOCAL_PERFMON_VECTOR); -+ -+ irq_exit(); -+} -+ -+/** -+ * pfm_handle_nmi - PMU NMI handler notifier callback -+ * @nb ; notifier block -+ * @val: type of die notifier -+ * @data: die notifier-specific data -+ * -+ * called from notify_die() notifier from an trap handler path. We only -+ * care about NMI related callbacks, and ignore everything else. -+ * -+ * Cannot grab any locks, include the perfmon context lock -+ * -+ * Must detect if NMI interrupt comes from perfmon, and if so it must -+ * stop the PMU and repost a lower-priority interrupt. The perfmon interrupt -+ * handler needs to grab the context lock, thus is cannot be run directly -+ * from the NMI interrupt call path. -+ */ -+static int __kprobes pfm_handle_nmi(struct notifier_block *nb, -+ unsigned long val, -+ void *data) -+{ -+ struct die_args *args = data; -+ struct pfm_context *ctx; -+ struct pfm_arch_pmu_info *pmu_info; -+ -+ /* -+ * only NMI related calls -+ */ -+ if (val != DIE_NMI_IPI) -+ return NOTIFY_DONE; -+ -+ /* -+ * perfmon not using NMI -+ */ -+ if (!__get_cpu_var(pfm_using_nmi)) -+ return NOTIFY_DONE; -+ -+ /* -+ * No context -+ */ -+ ctx = __get_cpu_var(pmu_ctx); -+ if (!ctx) { -+ PFM_DBG_ovfl("no ctx"); -+ return NOTIFY_DONE; -+ } -+ -+ /* -+ * Detect if we have overflows, i.e., NMI interrupt -+ * caused by PMU -+ */ -+ pmu_info = pfm_pmu_conf->pmu_info; -+ if (!pmu_info->has_ovfls(ctx)) { -+ PFM_DBG_ovfl("no ovfl"); -+ return NOTIFY_DONE; -+ } -+ -+ /* -+ * we stop the PMU to avoid further overflow before this -+ * one is treated by lower priority interrupt handler -+ */ -+ pmu_info->quiesce(); -+ -+ /* -+ * record actual instruction pointer -+ */ -+ __get_cpu_var(real_iip) = instruction_pointer(args->regs); -+ -+ /* -+ * post lower priority interrupt (LOCAL_PERFMON_VECTOR) -+ */ -+ pfm_arch_resend_irq(ctx); -+ -+ pfm_stats_inc(ovfl_intr_nmi_count); -+ -+ /* -+ * we need to rewrite the APIC vector on Intel -+ */ -+ if (current_cpu_data.x86_vendor == X86_VENDOR_INTEL) -+ apic_write(APIC_LVTPC, APIC_DM_NMI); -+ -+ /* -+ * the notification was for us -+ */ -+ return NOTIFY_STOP; -+} -+ -+static struct notifier_block pfm_nmi_nb = { -+ .notifier_call = pfm_handle_nmi -+}; -+ -+/** -+ * pfm_arch_get_pmu_module_name - get PMU description module name for autoload -+ * -+ * called from pfm_pmu_request_module -+ */ -+char *pfm_arch_get_pmu_module_name(void) -+{ -+ switch (current_cpu_data.x86) { -+ case 6: -+ switch (current_cpu_data.x86_model) { -+ case 3: /* Pentium II */ -+ case 7 ... 11: -+ case 13: -+ return "perfmon_p6"; -+ case 15: /* Merom */ -+ case 23: /* Penryn */ -+ return "perfmon_intel_core"; -+ case 28: /* Atom/Silverthorne */ -+ return "perfmon_intel_atom"; -+ case 29: /* Dunnington */ -+ return "perfmon_intel_core"; -+ default: -+ goto try_arch; -+ } -+ case 15: -+ case 16: -+ /* All Opteron processors */ -+ if (current_cpu_data.x86_vendor == X86_VENDOR_AMD) -+ return "perfmon_amd64"; -+ -+ switch (current_cpu_data.x86_model) { -+ case 0 ... 6: -+ return "perfmon_p4"; -+ } -+ /* FALL THROUGH */ -+ default: -+try_arch: -+ if (boot_cpu_has(X86_FEATURE_ARCH_PERFMON)) -+ return "perfmon_intel_arch"; -+ return NULL; -+ } -+ return NULL; -+} -+ -+/** -+ * pfm_arch_resend_irq - post perfmon interrupt on regular vector -+ * -+ * called from pfm_ctxswin_thread() and pfm_handle_nmi() -+ */ -+void pfm_arch_resend_irq(struct pfm_context *ctx) -+{ -+ unsigned long val, dest; -+ /* -+ * we cannot use hw_resend_irq() because it goes to -+ * the I/O APIC. We need to go to the Local APIC. -+ * -+ * The "int vec" is not the right solution either -+ * because it triggers a software intr. We need -+ * to regenerate the interrupt and have it pended -+ * until we unmask interrupts. -+ * -+ * Instead we send ourself an IPI on the perfmon -+ * vector. -+ */ -+ val = APIC_DEST_SELF|APIC_INT_ASSERT| -+ APIC_DM_FIXED|LOCAL_PERFMON_VECTOR; -+ -+ dest = apic_read(APIC_ID); -+ apic_write(APIC_ICR2, dest); -+ apic_write(APIC_ICR, val); -+} -+ -+/** -+ * pfm_arch_pmu_acquire_percpu - setup APIC per CPU -+ * @data: contains pmu flags -+ */ -+static void pfm_arch_pmu_acquire_percpu(void *data) -+{ -+ -+ struct pfm_arch_pmu_info *pmu_info; -+ unsigned int tmp, vec; -+ unsigned long flags = (unsigned long)data; -+ unsigned long lvtpc; -+ -+ pmu_info = pfm_pmu_conf->pmu_info; -+ -+ /* -+ * we only reprogram the LVTPC vector if we have detected -+ * no sharing, otherwise it means the APIC is already programmed -+ * and we use whatever vector (likely NMI) is there -+ */ -+ if (!(flags & PFM_X86_FL_SHARING)) { -+ if (flags & PFM_X86_FL_USE_NMI) -+ vec = APIC_DM_NMI; -+ else -+ vec = LOCAL_PERFMON_VECTOR; -+ -+ tmp = apic_read(APIC_LVTERR); -+ apic_write(APIC_LVTERR, tmp | APIC_LVT_MASKED); -+ apic_write(APIC_LVTPC, vec); -+ apic_write(APIC_LVTERR, tmp); -+ } -+ lvtpc = (unsigned long)apic_read(APIC_LVTPC); -+ -+ __get_cpu_var(pfm_using_nmi) = lvtpc == APIC_DM_NMI; -+ -+ PFM_DBG("LTVPC=0x%lx using_nmi=%d", lvtpc, __get_cpu_var(pfm_using_nmi)); -+ -+ /* -+ * invoke model specific acquire routine. May be used for -+ * model-specific initializations -+ */ -+ if (pmu_info->acquire_pmu_percpu) -+ pmu_info->acquire_pmu_percpu(); -+} -+ -+/** -+ * pfm_arch_pmu_acquire - acquire PMU resource from system -+ * @unavail_pmcs : bitmask to use to set unavailable pmcs -+ * @unavail_pmds : bitmask to use to set unavailable pmds -+ * -+ * interrupts are not masked -+ * -+ * Grab PMU registers from lower level MSR allocator -+ * -+ * Program the APIC according the possible interrupt vector -+ * either LOCAL_PERFMON_VECTOR or NMI -+ */ -+int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ struct pfm_regmap_desc *d; -+ u16 i, nlost; -+ -+ pmu_info = pfm_pmu_conf->pmu_info; -+ pmu_info->flags &= ~PFM_X86_FL_SHARING; -+ -+ nlost = 0; -+ -+ d = pfm_pmu_conf->pmc_desc; -+ for (i = 0; i < pfm_pmu_conf->num_pmc_entries; i++, d++) { -+ if (!(d->type & PFM_REG_I)) -+ continue; -+ -+ if (d->type & PFM_REG_V) -+ continue; -+ /* -+ * reserve register with lower-level allocator -+ */ -+ if (!reserve_evntsel_nmi(d->hw_addr)) { -+ PFM_DBG("pmc%d(%s) already used", i, d->desc); -+ __set_bit(i, cast_ulp(unavail_pmcs)); -+ nlost++; -+ continue; -+ } -+ } -+ PFM_DBG("nlost=%d info_flags=0x%x\n", nlost, pmu_info->flags); -+ /* -+ * some PMU models (e.g., P6) do not support sharing -+ * so check if we found less than the expected number of PMC registers -+ */ -+ if (nlost) { -+ if (pmu_info->flags & PFM_X86_FL_NO_SHARING) { -+ PFM_INFO("PMU already used by another subsystem, " -+ "PMU does not support sharing, " -+ "try disabling Oprofile or " -+ "reboot with nmi_watchdog=0"); -+ goto undo; -+ } -+ pmu_info->flags |= PFM_X86_FL_SHARING; -+ } -+ -+ d = pfm_pmu_conf->pmd_desc; -+ for (i = 0; i < pfm_pmu_conf->num_pmd_entries; i++, d++) { -+ if (!(d->type & PFM_REG_I)) -+ continue; -+ -+ if (d->type & PFM_REG_V) -+ continue; -+ -+ if (!reserve_perfctr_nmi(d->hw_addr)) { -+ PFM_DBG("pmd%d(%s) already used", i, d->desc); -+ __set_bit(i, cast_ulp(unavail_pmds)); -+ } -+ } -+ /* -+ * program APIC on each CPU -+ */ -+ on_each_cpu(pfm_arch_pmu_acquire_percpu, -+ (void *)(unsigned long)pmu_info->flags , 1); -+ -+ return 0; -+undo: -+ /* -+ * must undo reservation of pmcs in case of error -+ */ -+ d = pfm_pmu_conf->pmc_desc; -+ for (i = 0; i < pfm_pmu_conf->num_pmc_entries; i++, d++) { -+ if (!(d->type & (PFM_REG_I|PFM_REG_V))) -+ continue; -+ if (!test_bit(i, cast_ulp(unavail_pmcs))) -+ release_evntsel_nmi(d->hw_addr); -+ } -+ return -EBUSY; -+} -+/** -+ * pfm-arch_pmu_release_percpu - clear NMI state for one CPU -+ * -+ */ -+static void pfm_arch_pmu_release_percpu(void *data) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ -+ pmu_info = pfm_pmu_conf->pmu_info; -+ -+ __get_cpu_var(pfm_using_nmi) = 0; -+ -+ /* -+ * invoke model specific release routine. -+ * May be used to undo certain initializations -+ * or free some model-specific ressources. -+ */ -+ if (pmu_info->release_pmu_percpu) -+ pmu_info->release_pmu_percpu(); -+} -+ -+/** -+ * pfm_arch_pmu_release - release PMU resource to system -+ * -+ * called from pfm_pmu_release() -+ * interrupts are not masked -+ * -+ * On x86, we return the PMU registers to the MSR allocator -+ */ -+void pfm_arch_pmu_release(void) -+{ -+ struct pfm_regmap_desc *d; -+ u16 i, n; -+ -+ d = pfm_pmu_conf->pmc_desc; -+ n = pfm_pmu_conf->regs_all.num_pmcs; -+ for (i = 0; n; i++, d++) { -+ if (!test_bit(i, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) -+ continue; -+ release_evntsel_nmi(d->hw_addr); -+ n--; -+ PFM_DBG("pmc%u released", i); -+ } -+ d = pfm_pmu_conf->pmd_desc; -+ n = pfm_pmu_conf->regs_all.num_pmds; -+ for (i = 0; n; i++, d++) { -+ if (!test_bit(i, cast_ulp(pfm_pmu_conf->regs_all.pmds))) -+ continue; -+ release_perfctr_nmi(d->hw_addr); -+ n--; -+ PFM_DBG("pmd%u released", i); -+ } -+ -+ /* clear NMI variable if used */ -+ if (__get_cpu_var(pfm_using_nmi)) -+ on_each_cpu(pfm_arch_pmu_release_percpu, NULL , 1); -+} -+ -+/** -+ * pfm_arch_pmu_config_init - validate PMU description structure -+ * @cfg: PMU description structure -+ * -+ * return: -+ * 0 if valid -+ * errno otherwise -+ * -+ * called from pfm_pmu_register() -+ */ -+int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ -+ pmu_info = pfm_pmu_info(); -+ if (!pmu_info) { -+ PFM_DBG("%s missing pmu_info", cfg->pmu_name); -+ return -EINVAL; -+ } -+ if (!pmu_info->has_ovfls) { -+ PFM_DBG("%s missing has_ovfls callback", cfg->pmu_name); -+ return -EINVAL; -+ } -+ if (!pmu_info->quiesce) { -+ PFM_DBG("%s missing quiesce callback", cfg->pmu_name); -+ return -EINVAL; -+ } -+ if (!pmu_info->stop_save) { -+ PFM_DBG("%s missing stop_save callback", cfg->pmu_name); -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+/** -+ * pfm_arch_init - one time global arch-specific initialization -+ * -+ * called from pfm_init() -+ */ -+int __init pfm_arch_init(void) -+{ -+ /* -+ * we need to register our NMI handler when the kernels boots -+ * to avoid a deadlock condition with the NMI watchdog or Oprofile -+ * if we were to try and register/unregister on-demand. -+ */ -+ register_die_notifier(&pfm_nmi_nb); -+ return 0; -+} -diff --git a/arch/x86/perfmon/perfmon_amd64.c b/arch/x86/perfmon/perfmon_amd64.c -new file mode 100644 -index 0000000..f9b5f9c ---- /dev/null -+++ b/arch/x86/perfmon/perfmon_amd64.c -@@ -0,0 +1,754 @@ -+/* -+ * This file contains the PMU description for the Athlon64 and Opteron64 -+ * processors. It supports 32 and 64-bit modes. -+ * -+ * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * Copyright (c) 2007 Advanced Micro Devices, Inc. -+ * Contributed by Robert Richter -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+MODULE_AUTHOR("Stephane Eranian "); -+MODULE_AUTHOR("Robert Richter "); -+MODULE_DESCRIPTION("AMD64 PMU description table"); -+MODULE_LICENSE("GPL"); -+ -+#define PCI_DEVICE_ID_AMD_10H_NB_MISC 0x1203 -+ -+static int force_nmi; -+MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt"); -+module_param(force_nmi, bool, 0600); -+ -+#define HAS_IBS 0x01 /* has IBS support */ -+ -+static u8 ibs_eilvt_off, ibs_status; /* AMD: extended interrupt LVT offset */ -+ -+static void pfm_amd64_restore_pmcs(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+static void __kprobes pfm_amd64_quiesce(void); -+static int pfm_amd64_has_ovfls(struct pfm_context *ctx); -+static int pfm_amd64_stop_save(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+ -+#define IBSFETCHCTL_PMC 4 /* pmc4 */ -+#define IBSFETCHCTL_PMD 4 /* pmd4 */ -+#define IBSOPSCTL_PMC 5 /* pmc5 */ -+#define IBSOPSCTL_PMD 7 /* pmd7 */ -+ -+static u64 enable_mask[PFM_MAX_PMCS]; -+static u16 max_enable; -+ -+static struct pfm_arch_pmu_info pfm_amd64_pmu_info = { -+ .stop_save = pfm_amd64_stop_save, -+ .has_ovfls = pfm_amd64_has_ovfls, -+ .quiesce = pfm_amd64_quiesce, -+ .restore_pmcs = pfm_amd64_restore_pmcs -+}; -+ -+#define PFM_AMD64_IBSFETCHVAL (1ULL<<49) /* valid fetch sample */ -+#define PFM_AMD64_IBSFETCHEN (1ULL<<48) /* fetch sampling enabled */ -+#define PFM_AMD64_IBSOPVAL (1ULL<<18) /* valid execution sample */ -+#define PFM_AMD64_IBSOPEN (1ULL<<17) /* execution sampling enabled */ -+ -+/* -+ * force Local APIC interrupt on overflow -+ */ -+#define PFM_K8_VAL (1ULL<<20) -+#define PFM_K8_NO64 (1ULL<<20) -+ -+/* -+ * reserved bits must be 1 -+ * -+ * for family 15: -+ * - upper 32 bits are reserved -+ * - bit 20, bit 21 -+ * -+ * for family 16: -+ * - bits 36-39 are reserved -+ * - bits 42-63 are reserved -+ * - bit 20, bit 21 -+ * -+ * for IBS registers: -+ * IBSFETCHCTL: all bits are reserved except bits 57, 48, 15:0 -+ * IBSOPSCTL : all bits are reserved except bits 17, 15:0 -+ */ -+#define PFM_K8_RSVD ((~((1ULL<<32)-1)) | (1ULL<<20) | (1ULL<<21)) -+#define PFM_16_RSVD ((0x3fffffULL<<42) | (0xfULL<<36) | (1ULL<<20) | (1ULL<<21)) -+#define PFM_AMD64_IBSFETCHCTL_RSVD (~((1ULL<<48)|(1ULL<<57)|0xffffULL)) -+#define PFM_AMD64_IBSOPCTL_RSVD (~((1ULL<<17)|0xffffULL)) -+ -+static struct pfm_regmap_desc pfm_amd64_pmc_desc[] = { -+/* pmc0 */ PMC_D(PFM_REG_I64, "PERFSEL0", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL0), -+/* pmc1 */ PMC_D(PFM_REG_I64, "PERFSEL1", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL1), -+/* pmc2 */ PMC_D(PFM_REG_I64, "PERFSEL2", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL2), -+/* pmc3 */ PMC_D(PFM_REG_I64, "PERFSEL3", PFM_K8_VAL, PFM_K8_RSVD, PFM_K8_NO64, MSR_K7_EVNTSEL3), -+/* pmc4 */ PMC_D(PFM_REG_I, "IBSFETCHCTL", 0, PFM_AMD64_IBSFETCHCTL_RSVD, 0, MSR_AMD64_IBSFETCHCTL), -+/* pmc5 */ PMC_D(PFM_REG_I, "IBSOPCTL", 0, PFM_AMD64_IBSOPCTL_RSVD, 0, MSR_AMD64_IBSOPCTL), -+}; -+#define PFM_AMD_NUM_PMCS ARRAY_SIZE(pfm_amd64_pmc_desc) -+ -+#define PFM_REG_IBS (PFM_REG_I|PFM_REG_INTR) -+ -+/* -+ * AMD64 counters are 48 bits, upper bits are reserved -+ */ -+#define PFM_AMD64_CTR_RSVD (~((1ULL<<48)-1)) -+ -+#define PFM_AMD_D(n) \ -+ { .type = PFM_REG_C, \ -+ .desc = "PERFCTR"#n, \ -+ .hw_addr = MSR_K7_PERFCTR0+n, \ -+ .rsvd_msk = PFM_AMD64_CTR_RSVD, \ -+ .dep_pmcs[0] = 1ULL << n \ -+ } -+ -+#define PFM_AMD_IBSO(t, s, a) \ -+ { .type = t, \ -+ .desc = s, \ -+ .hw_addr = a, \ -+ .rsvd_msk = 0, \ -+ .dep_pmcs[0] = 1ULL << 5 \ -+ } -+ -+#define PFM_AMD_IBSF(t, s, a) \ -+ { .type = t, \ -+ .desc = s, \ -+ .hw_addr = a, \ -+ .rsvd_msk = 0, \ -+ .dep_pmcs[0] = 1ULL << 6 \ -+ } -+ -+static struct pfm_regmap_desc pfm_amd64_pmd_desc[] = { -+/* pmd0 */ PFM_AMD_D(0), -+/* pmd1 */ PFM_AMD_D(1), -+/* pmd2 */ PFM_AMD_D(2), -+/* pmd3 */ PFM_AMD_D(3), -+/* pmd4 */ PFM_AMD_IBSF(PFM_REG_IBS, "IBSFETCHCTL", MSR_AMD64_IBSFETCHCTL), -+/* pmd5 */ PFM_AMD_IBSF(PFM_REG_IRO, "IBSFETCHLINAD", MSR_AMD64_IBSFETCHLINAD), -+/* pmd6 */ PFM_AMD_IBSF(PFM_REG_IRO, "IBSFETCHPHYSAD", MSR_AMD64_IBSFETCHPHYSAD), -+/* pmd7 */ PFM_AMD_IBSO(PFM_REG_IBS, "IBSOPCTL", MSR_AMD64_IBSOPCTL), -+/* pmd8 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSOPRIP", MSR_AMD64_IBSOPRIP), -+/* pmd9 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSOPDATA", MSR_AMD64_IBSOPDATA), -+/* pmd10 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSOPDATA2", MSR_AMD64_IBSOPDATA2), -+/* pmd11 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSOPDATA3", MSR_AMD64_IBSOPDATA3), -+/* pmd12 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSDCLINAD", MSR_AMD64_IBSDCLINAD), -+/* pmd13 */ PFM_AMD_IBSO(PFM_REG_IRO, "IBSDCPHYSAD", MSR_AMD64_IBSDCPHYSAD), -+}; -+#define PFM_AMD_NUM_PMDS ARRAY_SIZE(pfm_amd64_pmd_desc) -+ -+static struct pfm_context **pfm_nb_sys_owners; -+static struct pfm_context *pfm_nb_task_owner; -+ -+static struct pfm_pmu_config pfm_amd64_pmu_conf; -+ -+#define is_ibs_pmc(x) (x == 4 || x == 5) -+ -+static void pfm_amd64_setup_eilvt_per_cpu(void *info) -+{ -+ u8 lvt_off; -+ -+ /* program the IBS vector to the perfmon vector */ -+ lvt_off = setup_APIC_eilvt_ibs(LOCAL_PERFMON_VECTOR, -+ APIC_EILVT_MSG_FIX, 0); -+ PFM_DBG("APIC_EILVT%d set to 0x%x", lvt_off, LOCAL_PERFMON_VECTOR); -+ ibs_eilvt_off = lvt_off; -+} -+ -+static int pfm_amd64_setup_eilvt(void) -+{ -+#define IBSCTL_LVTOFFSETVAL (1 << 8) -+#define IBSCTL 0x1cc -+ struct pci_dev *cpu_cfg; -+ int nodes; -+ u32 value = 0; -+ -+ /* per CPU setup */ -+ on_each_cpu(pfm_amd64_setup_eilvt_per_cpu, NULL, 1); -+ -+ nodes = 0; -+ cpu_cfg = NULL; -+ do { -+ cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, -+ PCI_DEVICE_ID_AMD_10H_NB_MISC, -+ cpu_cfg); -+ if (!cpu_cfg) -+ break; -+ ++nodes; -+ pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off -+ | IBSCTL_LVTOFFSETVAL); -+ pci_read_config_dword(cpu_cfg, IBSCTL, &value); -+ if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) { -+ PFM_DBG("Failed to setup IBS LVT offset, " -+ "IBSCTL = 0x%08x", value); -+ return 1; -+ } -+ } while (1); -+ -+ if (!nodes) { -+ PFM_DBG("No CPU node configured for IBS"); -+ return 1; -+ } -+ -+#ifdef CONFIG_NUMA -+ /* Sanity check */ -+ /* Works only for 64bit with proper numa implementation. */ -+ if (nodes != num_possible_nodes()) { -+ PFM_DBG("Failed to setup CPU node(s) for IBS, " -+ "found: %d, expected %d", -+ nodes, num_possible_nodes()); -+ return 1; -+ } -+#endif -+ return 0; -+} -+ -+/* -+ * There can only be one user per socket for the Northbridge (NB) events, -+ * so we enforce mutual exclusion as follows: -+ * - per-thread : only one context machine-wide can use NB events -+ * - system-wide: only one context per processor socket -+ * -+ * Exclusion is enforced at: -+ * - pfm_load_context() -+ * - pfm_write_pmcs() for attached contexts -+ * -+ * Exclusion is released at: -+ * - pfm_unload_context() or any calls that implicitely uses it -+ * -+ * return: -+ * 0 : successfully acquire NB access -+ * < 0: errno, failed to acquire NB access -+ */ -+static int pfm_amd64_acquire_nb(struct pfm_context *ctx) -+{ -+ struct pfm_context **entry, *old; -+ int proc_id; -+ -+#ifdef CONFIG_SMP -+ proc_id = cpu_data(smp_processor_id()).phys_proc_id; -+#else -+ proc_id = 0; -+#endif -+ -+ if (ctx->flags.system) -+ entry = &pfm_nb_sys_owners[proc_id]; -+ else -+ entry = &pfm_nb_task_owner; -+ -+ old = cmpxchg(entry, NULL, ctx); -+ if (!old) { -+ if (ctx->flags.system) -+ PFM_DBG("acquired Northbridge event access on socket %u", proc_id); -+ else -+ PFM_DBG("acquired Northbridge event access globally"); -+ } else if (old != ctx) { -+ if (ctx->flags.system) -+ PFM_DBG("NorthBridge event conflict on socket %u", proc_id); -+ else -+ PFM_DBG("global NorthBridge event conflict"); -+ return -EBUSY; -+ } -+ return 0; -+} -+ -+/* -+ * invoked from pfm_write_pmcs() when pfm_nb_sys_owners is not NULL,i.e., -+ * when we have detected a multi-core processor. -+ * -+ * context is locked, interrupts are masked -+ */ -+static int pfm_amd64_pmc_write_check(struct pfm_context *ctx, -+ struct pfm_event_set *set, -+ struct pfarg_pmc *req) -+{ -+ unsigned int event; -+ -+ /* -+ * delay checking NB event until we load the context -+ */ -+ if (ctx->state == PFM_CTX_UNLOADED) -+ return 0; -+ -+ /* -+ * check event is NB event -+ */ -+ event = (unsigned int)(req->reg_value & 0xff); -+ if (event < 0xee) -+ return 0; -+ -+ return pfm_amd64_acquire_nb(ctx); -+} -+ -+/* -+ * invoked on pfm_load_context(). -+ * context is locked, interrupts are masked -+ */ -+static int pfm_amd64_load_context(struct pfm_context *ctx) -+{ -+ struct pfm_event_set *set; -+ unsigned int i, n; -+ -+ /* -+ * scan all sets for NB events -+ */ -+ list_for_each_entry(set, &ctx->set_list, list) { -+ n = set->nused_pmcs; -+ for (i = 0; n; i++) { -+ if (!test_bit(i, cast_ulp(set->used_pmcs))) -+ continue; -+ -+ if (!is_ibs_pmc(i) && (set->pmcs[i] & 0xff) >= 0xee) -+ goto found; -+ n--; -+ } -+ } -+ return 0; -+found: -+ return pfm_amd64_acquire_nb(ctx); -+} -+ -+/* -+ * invoked on pfm_unload_context() -+ */ -+static void pfm_amd64_unload_context(struct pfm_context *ctx) -+{ -+ struct pfm_context **entry, *old; -+ int proc_id; -+ -+#ifdef CONFIG_SMP -+ proc_id = cpu_data(smp_processor_id()).phys_proc_id; -+#else -+ proc_id = 0; -+#endif -+ -+ /* -+ * unload always happens on the monitored CPU in system-wide -+ */ -+ if (ctx->flags.system) -+ entry = &pfm_nb_sys_owners[proc_id]; -+ else -+ entry = &pfm_nb_task_owner; -+ -+ old = cmpxchg(entry, ctx, NULL); -+ if (old == ctx) { -+ if (ctx->flags.system) -+ PFM_DBG("released NorthBridge on socket %u", proc_id); -+ else -+ PFM_DBG("released NorthBridge events globally"); -+ } -+} -+ -+/* -+ * detect if we need to activate NorthBridge event access control -+ */ -+static int pfm_amd64_setup_nb_event_control(void) -+{ -+ unsigned int c, n = 0; -+ unsigned int max_phys = 0; -+ -+#ifdef CONFIG_SMP -+ for_each_possible_cpu(c) { -+ if (cpu_data(c).phys_proc_id > max_phys) -+ max_phys = cpu_data(c).phys_proc_id; -+ } -+#else -+ max_phys = 0; -+#endif -+ if (max_phys > 255) { -+ PFM_INFO("socket id %d is too big to handle", max_phys); -+ return -ENOMEM; -+ } -+ -+ n = max_phys + 1; -+ if (n < 2) -+ return 0; -+ -+ pfm_nb_sys_owners = vmalloc(n * sizeof(*pfm_nb_sys_owners)); -+ if (!pfm_nb_sys_owners) -+ return -ENOMEM; -+ -+ memset(pfm_nb_sys_owners, 0, n * sizeof(*pfm_nb_sys_owners)); -+ pfm_nb_task_owner = NULL; -+ -+ /* -+ * activate write-checker for PMC registers -+ */ -+ for (c = 0; c < PFM_AMD_NUM_PMCS; c++) { -+ if (!is_ibs_pmc(c)) -+ pfm_amd64_pmc_desc[c].type |= PFM_REG_WC; -+ } -+ -+ pfm_amd64_pmu_info.load_context = pfm_amd64_load_context; -+ pfm_amd64_pmu_info.unload_context = pfm_amd64_unload_context; -+ -+ pfm_amd64_pmu_conf.pmc_write_check = pfm_amd64_pmc_write_check; -+ -+ PFM_INFO("NorthBridge event access control enabled"); -+ -+ return 0; -+} -+ -+/* -+ * disable registers which are not available on -+ * the host (applies to IBS registers) -+ */ -+static void pfm_amd64_check_registers(void) -+{ -+ u16 i; -+ -+ PFM_DBG("has_ibs=%d", !!(ibs_status & HAS_IBS)); -+ -+ __set_bit(0, cast_ulp(enable_mask)); -+ __set_bit(1, cast_ulp(enable_mask)); -+ __set_bit(2, cast_ulp(enable_mask)); -+ __set_bit(3, cast_ulp(enable_mask)); -+ max_enable = 3+1; -+ -+ -+ /* -+ * remove IBS registers if feature not present -+ */ -+ if (!(ibs_status & HAS_IBS)) { -+ pfm_amd64_pmc_desc[4].type = PFM_REG_NA; -+ pfm_amd64_pmc_desc[5].type = PFM_REG_NA; -+ for (i = 4; i < 14; i++) -+ pfm_amd64_pmd_desc[i].type = PFM_REG_NA; -+ } else { -+ __set_bit(16, cast_ulp(enable_mask)); -+ __set_bit(17, cast_ulp(enable_mask)); -+ max_enable = 17 + 1; -+ } -+ -+ /* -+ * adjust reserved bit fields for family 16 -+ */ -+ if (current_cpu_data.x86 == 16) { -+ for (i = 0; i < PFM_AMD_NUM_PMCS; i++) -+ if (pfm_amd64_pmc_desc[i].rsvd_msk == PFM_K8_RSVD) -+ pfm_amd64_pmc_desc[i].rsvd_msk = PFM_16_RSVD; -+ } -+} -+ -+static int pfm_amd64_probe_pmu(void) -+{ -+ u64 val = 0; -+ if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) { -+ PFM_INFO("not an AMD processor"); -+ return -1; -+ } -+ -+ switch (current_cpu_data.x86) { -+ case 16: -+ case 15: -+ case 6: -+ break; -+ default: -+ PFM_INFO("unsupported family=%d", current_cpu_data.x86); -+ return -1; -+ } -+ -+ /* check for IBS */ -+ if (cpu_has(¤t_cpu_data, X86_FEATURE_IBS)) { -+ ibs_status |= HAS_IBS; -+ rdmsrl(MSR_AMD64_IBSCTL, val); -+ } -+ -+ PFM_INFO("found family=%d IBSCTL=0x%llx", current_cpu_data.x86, (unsigned long long)val); -+ -+ /* -+ * check for local APIC (required) -+ */ -+ if (!cpu_has_apic) { -+ PFM_INFO("no local APIC, unsupported"); -+ return -1; -+ } -+ -+ if (current_cpu_data.x86_max_cores > 1 -+ && pfm_amd64_setup_nb_event_control()) -+ return -1; -+ -+ if (force_nmi) -+ pfm_amd64_pmu_info.flags |= PFM_X86_FL_USE_NMI; -+ -+ if (ibs_status & HAS_IBS) { -+ /* Setup extended interrupt */ -+ if (pfm_amd64_setup_eilvt()) { -+ PFM_INFO("Failed to initialize extended interrupts " -+ "for IBS"); -+ ibs_status &= ~HAS_IBS; -+ PFM_INFO("Unable to use IBS"); -+ } else { -+ PFM_INFO("IBS supported"); -+ } -+ } -+ -+ pfm_amd64_check_registers(); -+ -+ return 0; -+} -+ -+/* -+ * detect is counters have overflowed. -+ * return: -+ * 0 : no overflow -+ * 1 : at least one overflow -+ */ -+static int __kprobes pfm_amd64_has_ovfls(struct pfm_context *ctx) -+{ -+ struct pfm_regmap_desc *xrd; -+ u64 *cnt_mask; -+ u64 wmask, val; -+ u16 i, num; -+ -+ /* -+ * Check for IBS events -+ */ -+ if (ibs_status & HAS_IBS) { -+ rdmsrl(MSR_AMD64_IBSFETCHCTL, val); -+ if (val & PFM_AMD64_IBSFETCHVAL) -+ return 1; -+ rdmsrl(MSR_AMD64_IBSOPCTL, val); -+ if (val & PFM_AMD64_IBSOPVAL) -+ return 1; -+ } -+ /* -+ * Check regular counters -+ */ -+ cnt_mask = ctx->regs.cnt_pmds; -+ num = ctx->regs.num_counters; -+ wmask = 1ULL << pfm_pmu_conf->counter_width; -+ xrd = pfm_amd64_pmd_desc; -+ -+ for (i = 0; num; i++) { -+ if (test_bit(i, cast_ulp(cnt_mask))) { -+ rdmsrl(xrd[i].hw_addr, val); -+ if (!(val & wmask)) -+ return 1; -+ num--; -+ } -+ } -+ return 0; -+} -+ -+/* -+ * Must check for IBS event BEFORE stop_save_p6 because -+ * stopping monitoring does destroy IBS state information -+ * in IBSFETCHCTL/IBSOPCTL because they are tagged as enable -+ * registers. -+ */ -+static int pfm_amd64_stop_save(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ u64 used_mask[PFM_PMC_BV]; -+ u64 *cnt_pmds; -+ u64 val, wmask, ovfl_mask; -+ u32 i, count, use_ibs; -+ -+ pmu_info = pfm_pmu_info(); -+ -+ /* -+ * IBS used if: -+ * - on family 10h processor with IBS -+ * - at least one of the IBS PMD registers is used -+ */ -+ use_ibs = (ibs_status & HAS_IBS) -+ && (test_bit(IBSFETCHCTL_PMD, cast_ulp(set->used_pmds)) -+ || test_bit(IBSOPSCTL_PMD, cast_ulp(set->used_pmds))); -+ -+ wmask = 1ULL << pfm_pmu_conf->counter_width; -+ -+ bitmap_and(cast_ulp(used_mask), -+ cast_ulp(set->used_pmcs), -+ cast_ulp(enable_mask), -+ max_enable); -+ -+ count = bitmap_weight(cast_ulp(used_mask), max_enable); -+ -+ /* -+ * stop monitoring -+ * Unfortunately, this is very expensive! -+ * wrmsrl() is serializing. -+ * -+ * With IBS, we need to do read-modify-write to preserve the content -+ * for OpsCTL and FetchCTL because they are also used as PMDs and saved -+ * below -+ */ -+ if (use_ibs) { -+ for (i = 0; count; i++) { -+ if (test_bit(i, cast_ulp(used_mask))) { -+ if (i == IBSFETCHCTL_PMC) { -+ rdmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, val); -+ val &= ~PFM_AMD64_IBSFETCHEN; -+ } else if (i == IBSOPSCTL_PMC) { -+ rdmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, val); -+ val &= ~PFM_AMD64_IBSOPEN; -+ } else -+ val = 0; -+ wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, val); -+ count--; -+ } -+ } -+ } else { -+ for (i = 0; count; i++) { -+ if (test_bit(i, cast_ulp(used_mask))) { -+ wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, 0); -+ count--; -+ } -+ } -+ } -+ -+ /* -+ * if we already having a pending overflow condition, we simply -+ * return to take care of this first. -+ */ -+ if (set->npend_ovfls) -+ return 1; -+ -+ ovfl_mask = pfm_pmu_conf->ovfl_mask; -+ cnt_pmds = ctx->regs.cnt_pmds; -+ -+ /* -+ * check for pending overflows and save PMDs (combo) -+ * we employ used_pmds because we also need to save -+ * and not just check for pending interrupts. -+ * -+ * Must check for counting PMDs because of virtual PMDs and IBS -+ */ -+ count = set->nused_pmds; -+ for (i = 0; count; i++) { -+ if (test_bit(i, cast_ulp(set->used_pmds))) { -+ val = pfm_arch_read_pmd(ctx, i); -+ if (likely(test_bit(i, cast_ulp(cnt_pmds)))) { -+ if (!(val & wmask)) { -+ __set_bit(i, cast_ulp(set->povfl_pmds)); -+ set->npend_ovfls++; -+ } -+ val = (set->pmds[i].value & ~ovfl_mask) | (val & ovfl_mask); -+ } -+ set->pmds[i].value = val; -+ count--; -+ } -+ } -+ -+ /* -+ * check if IBS contains valid data, and mark the corresponding -+ * PMD has overflowed -+ */ -+ if (use_ibs) { -+ if (set->pmds[IBSFETCHCTL_PMD].value & PFM_AMD64_IBSFETCHVAL) { -+ __set_bit(IBSFETCHCTL_PMD, cast_ulp(set->povfl_pmds)); -+ set->npend_ovfls++; -+ } -+ if (set->pmds[IBSOPSCTL_PMD].value & PFM_AMD64_IBSOPVAL) { -+ __set_bit(IBSOPSCTL_PMD, cast_ulp(set->povfl_pmds)); -+ set->npend_ovfls++; -+ } -+ } -+ /* 0 means: no need to save PMDs at upper level */ -+ return 0; -+} -+ -+/** -+ * pfm_amd64_quiesce_pmu -- stop monitoring without grabbing any lock -+ * -+ * called from NMI interrupt handler to immediately stop monitoring -+ * cannot grab any lock, including perfmon related locks -+ */ -+static void __kprobes pfm_amd64_quiesce(void) -+{ -+ /* -+ * quiesce PMU by clearing available registers that have -+ * the start/stop capability -+ */ -+ if (test_bit(0, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) -+ wrmsrl(MSR_K7_EVNTSEL0, 0); -+ if (test_bit(1, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) -+ wrmsrl(MSR_K7_EVNTSEL0+1, 0); -+ if (test_bit(2, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) -+ wrmsrl(MSR_K7_EVNTSEL0+2, 0); -+ if (test_bit(3, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) -+ wrmsrl(MSR_K7_EVNTSEL0+3, 0); -+ -+ if (test_bit(4, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) -+ wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); -+ if (test_bit(5, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) -+ wrmsrl(MSR_AMD64_IBSOPCTL, 0); -+} -+ -+/** -+ * pfm_amd64_restore_pmcs - reload PMC registers -+ * @ctx: context to restore from -+ * @set: current event set -+ * -+ * optimized version of pfm_arch_restore_pmcs(). On AMD64, we can -+ * afford to only restore the pmcs registers we use, because they are -+ * all independent from each other. -+ */ -+static void pfm_amd64_restore_pmcs(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ u64 *mask; -+ u16 i, num; -+ -+ mask = set->used_pmcs; -+ num = set->nused_pmcs; -+ for (i = 0; num; i++) { -+ if (test_bit(i, cast_ulp(mask))) { -+ wrmsrl(pfm_amd64_pmc_desc[i].hw_addr, set->pmcs[i]); -+ num--; -+ } -+ } -+} -+ -+static struct pfm_pmu_config pfm_amd64_pmu_conf = { -+ .pmu_name = "AMD64", -+ .counter_width = 47, -+ .pmd_desc = pfm_amd64_pmd_desc, -+ .pmc_desc = pfm_amd64_pmc_desc, -+ .num_pmc_entries = PFM_AMD_NUM_PMCS, -+ .num_pmd_entries = PFM_AMD_NUM_PMDS, -+ .probe_pmu = pfm_amd64_probe_pmu, -+ .version = "1.2", -+ .pmu_info = &pfm_amd64_pmu_info, -+ .flags = PFM_PMU_BUILTIN_FLAG, -+ .owner = THIS_MODULE, -+}; -+ -+static int __init pfm_amd64_pmu_init_module(void) -+{ -+ return pfm_pmu_register(&pfm_amd64_pmu_conf); -+} -+ -+static void __exit pfm_amd64_pmu_cleanup_module(void) -+{ -+ if (pfm_nb_sys_owners) -+ vfree(pfm_nb_sys_owners); -+ -+ pfm_pmu_unregister(&pfm_amd64_pmu_conf); -+} -+ -+module_init(pfm_amd64_pmu_init_module); -+module_exit(pfm_amd64_pmu_cleanup_module); -diff --git a/arch/x86/perfmon/perfmon_intel_arch.c b/arch/x86/perfmon/perfmon_intel_arch.c -new file mode 100644 -index 0000000..e27a732 ---- /dev/null -+++ b/arch/x86/perfmon/perfmon_intel_arch.c -@@ -0,0 +1,610 @@ -+/* -+ * This file contains the Intel architectural perfmon v1, v2, v3 -+ * description tables. -+ * -+ * Architectural perfmon was introduced with Intel Core Solo/Duo -+ * processors. -+ * -+ * Copyright (c) 2006-2007 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+MODULE_AUTHOR("Stephane Eranian "); -+MODULE_DESCRIPTION("Intel architectural perfmon v1"); -+MODULE_LICENSE("GPL"); -+ -+static int force, force_nmi; -+MODULE_PARM_DESC(force, "bool: force module to load succesfully"); -+MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt"); -+module_param(force, bool, 0600); -+module_param(force_nmi, bool, 0600); -+ -+static u64 enable_mask[PFM_MAX_PMCS]; -+static u16 max_enable; -+ -+/* -+ * - upper 32 bits are reserved -+ * - INT: APIC enable bit is reserved (forced to 1) -+ * - bit 21 is reserved -+ * -+ * RSVD: reserved bits are 1 -+ */ -+#define PFM_IA_PMC_RSVD ((~((1ULL<<32)-1)) \ -+ | (1ULL<<20) \ -+ | (1ULL<<21)) -+ -+/* -+ * force Local APIC interrupt on overflow -+ * disable with NO_EMUL64 -+ */ -+#define PFM_IA_PMC_VAL (1ULL<<20) -+#define PFM_IA_NO64 (1ULL<<20) -+ -+/* -+ * architectuture specifies that: -+ * IA32_PMCx MSR : starts at 0x0c1 & occupy a contiguous block of MSR -+ * IA32_PERFEVTSELx MSR : starts at 0x186 & occupy a contiguous block of MSR -+ * MSR_GEN_FIXED_CTR0 : starts at 0x309 & occupy a contiguous block of MSR -+ */ -+#define MSR_GEN_SEL_BASE MSR_P6_EVNTSEL0 -+#define MSR_GEN_PMC_BASE MSR_P6_PERFCTR0 -+#define MSR_GEN_FIXED_PMC_BASE MSR_CORE_PERF_FIXED_CTR0 -+ -+/* -+ * layout of EAX for CPUID.0xa leaf function -+ */ -+struct pmu_eax { -+ unsigned int version:8; /* architectural perfmon version */ -+ unsigned int num_cnt:8; /* number of generic counters */ -+ unsigned int cnt_width:8; /* width of generic counters */ -+ unsigned int ebx_length:8; /* number of architected events */ -+}; -+ -+/* -+ * layout of EDX for CPUID.0xa leaf function when perfmon v2 is detected -+ */ -+struct pmu_edx { -+ unsigned int num_cnt:5; /* number of fixed counters */ -+ unsigned int cnt_width:8; /* width of fixed counters */ -+ unsigned int reserved:19; -+}; -+ -+static void pfm_intel_arch_restore_pmcs(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+static int pfm_intel_arch_stop_save(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+static int pfm_intel_arch_has_ovfls(struct pfm_context *ctx); -+static void __kprobes pfm_intel_arch_quiesce(void); -+ -+/* -+ * physical addresses of MSR controlling the perfevtsel and counter registers -+ */ -+struct pfm_arch_pmu_info pfm_intel_arch_pmu_info = { -+ .stop_save = pfm_intel_arch_stop_save, -+ .has_ovfls = pfm_intel_arch_has_ovfls, -+ .quiesce = pfm_intel_arch_quiesce, -+ .restore_pmcs = pfm_intel_arch_restore_pmcs -+}; -+ -+#define PFM_IA_C(n) { \ -+ .type = PFM_REG_I64, \ -+ .desc = "PERFEVTSEL"#n, \ -+ .dfl_val = PFM_IA_PMC_VAL, \ -+ .rsvd_msk = PFM_IA_PMC_RSVD, \ -+ .no_emul64_msk = PFM_IA_NO64, \ -+ .hw_addr = MSR_GEN_SEL_BASE+(n) \ -+ } -+ -+#define PFM_IA_D(n) \ -+ { .type = PFM_REG_C, \ -+ .desc = "PMC"#n, \ -+ .hw_addr = MSR_P6_PERFCTR0+n, \ -+ .dep_pmcs[0] = 1ULL << n \ -+ } -+ -+#define PFM_IA_FD(n) \ -+ { .type = PFM_REG_C, \ -+ .desc = "FIXED_CTR"#n, \ -+ .hw_addr = MSR_CORE_PERF_FIXED_CTR0+n,\ -+ .dep_pmcs[0] = 1ULL << 16 \ -+ } -+ -+static struct pfm_regmap_desc pfm_intel_arch_pmc_desc[] = { -+/* pmc0 */ PFM_IA_C(0), PFM_IA_C(1), PFM_IA_C(2), PFM_IA_C(3), -+/* pmc4 */ PFM_IA_C(4), PFM_IA_C(5), PFM_IA_C(6), PFM_IA_C(7), -+/* pmc8 */ PFM_IA_C(8), PFM_IA_C(9), PFM_IA_C(10), PFM_IA_C(11), -+/* pmc12 */ PFM_IA_C(12), PFM_IA_C(13), PFM_IA_C(14), PFM_IA_C(15), -+ -+/* pmc16 */ { .type = PFM_REG_I, -+ .desc = "FIXED_CTRL", -+ .dfl_val = 0x8888888888888888ULL, /* force PMI */ -+ .rsvd_msk = 0, /* set dynamically */ -+ .no_emul64_msk = 0, -+ .hw_addr = MSR_CORE_PERF_FIXED_CTR_CTRL -+ }, -+}; -+#define PFM_IA_MAX_PMCS ARRAY_SIZE(pfm_intel_arch_pmc_desc) -+ -+static struct pfm_regmap_desc pfm_intel_arch_pmd_desc[] = { -+/* pmd0 */ PFM_IA_D(0), PFM_IA_D(1), PFM_IA_D(2), PFM_IA_D(3), -+/* pmd4 */ PFM_IA_D(4), PFM_IA_D(5), PFM_IA_D(6), PFM_IA_D(7), -+/* pmd8 */ PFM_IA_D(8), PFM_IA_D(9), PFM_IA_D(10), PFM_IA_D(11), -+/* pmd12 */ PFM_IA_D(12), PFM_IA_D(13), PFM_IA_D(14), PFM_IA_D(15), -+ -+/* pmd16 */ PFM_IA_FD(0), PFM_IA_FD(1), PFM_IA_FD(2), PFM_IA_FD(3), -+/* pmd20 */ PFM_IA_FD(4), PFM_IA_FD(5), PFM_IA_FD(6), PFM_IA_FD(7), -+/* pmd24 */ PFM_IA_FD(8), PFM_IA_FD(9), PFM_IA_FD(10), PFM_IA_FD(11), -+/* pmd28 */ PFM_IA_FD(16), PFM_IA_FD(17), PFM_IA_FD(18), PFM_IA_FD(19) -+}; -+#define PFM_IA_MAX_PMDS ARRAY_SIZE(pfm_intel_arch_pmd_desc) -+ -+#define PFM_IA_MAX_CNT 16 /* # generic counters in mapping table */ -+#define PFM_IA_MAX_FCNT 16 /* # of fixed counters in mapping table */ -+#define PFM_IA_FCNT_BASE 16 /* base index of fixed counters PMD */ -+ -+static struct pfm_pmu_config pfm_intel_arch_pmu_conf; -+ -+static void pfm_intel_arch_check_errata(void) -+{ -+ /* -+ * Core Duo errata AE49 (no fix). Both counters share a single -+ * enable bit in PERFEVTSEL0 -+ */ -+ if (current_cpu_data.x86 == 6 && current_cpu_data.x86_model == 14) -+ pfm_intel_arch_pmu_info.flags |= PFM_X86_FL_NO_SHARING; -+} -+ -+static inline void set_enable_mask(unsigned int i) -+{ -+ __set_bit(i, cast_ulp(enable_mask)); -+ -+ /* max_enable = highest + 1 */ -+ if ((i+1) > max_enable) -+ max_enable = i+ 1; -+} -+ -+static void pfm_intel_arch_setup_generic(unsigned int version, -+ unsigned int width, -+ unsigned int count) -+{ -+ u64 rsvd; -+ unsigned int i; -+ -+ /* -+ * first we handle the generic counters: -+ * -+ * - ensure HW does not have more registers than hardcoded in the tables -+ * - adjust rsvd_msk to actual counter width -+ * - initialize enable_mask (list of PMC with start/stop capability) -+ * - mark unused hardcoded generic counters as unimplemented -+ */ -+ -+ /* -+ * min of number of Hw counters and hardcoded in the tables -+ */ -+ if (count >= PFM_IA_MAX_CNT) { -+ printk(KERN_INFO "perfmon: Limiting number of generic counters" -+ " to %u, HW supports %u", -+ PFM_IA_MAX_CNT, count); -+ count = PFM_IA_MAX_CNT; -+ } -+ -+ /* -+ * adjust rsvd_msk for generic counters based on actual width -+ * initialize enable_mask (1 per pmd) -+ */ -+ rsvd = ~((1ULL << width)-1); -+ for (i = 0; i < count; i++) { -+ pfm_intel_arch_pmd_desc[i].rsvd_msk = rsvd; -+ set_enable_mask(i); -+ } -+ -+ /* -+ * handle version 3 new anythread bit (21) -+ */ -+ if (version == 3) { -+ for (i = 0; i < count; i++) -+ pfm_intel_arch_pmc_desc[i].rsvd_msk &= ~(1ULL << 21); -+ } -+ -+ -+ /* -+ * mark unused generic counters as not available -+ */ -+ for (i = count ; i < PFM_IA_MAX_CNT; i++) { -+ pfm_intel_arch_pmd_desc[i].type = PFM_REG_NA; -+ pfm_intel_arch_pmc_desc[i].type = PFM_REG_NA; -+ } -+} -+ -+static void pfm_intel_arch_setup_fixed(unsigned int version, -+ unsigned int width, -+ unsigned int count) -+{ -+ u64 rsvd, dfl; -+ unsigned int i; -+ -+ /* -+ * handle the fixed counters (if any): -+ * -+ * - ensure HW does not have more registers than hardcoded in the tables -+ * - adjust rsvd_msk to actual counter width -+ * - initialize enable_mask (list of PMC with start/stop capability) -+ * - mark unused hardcoded generic counters as unimplemented -+ */ -+ if (count >= PFM_IA_MAX_FCNT) { -+ printk(KERN_INFO "perfmon: Limiting number of fixed counters" -+ " to %u, HW supports %u", -+ PFM_IA_MAX_FCNT, count); -+ count = PFM_IA_MAX_FCNT; -+ } -+ /* -+ * adjust rsvd_msk for fixed counters based on actual width -+ */ -+ rsvd = ~((1ULL << width)-1); -+ for (i = 0; i < count; i++) -+ pfm_intel_arch_pmd_desc[PFM_IA_FCNT_BASE+i].rsvd_msk = rsvd; -+ -+ /* -+ * handle version new anythread bit (bit 2) -+ */ -+ if (version == 3) -+ rsvd = 1ULL << 3; -+ else -+ rsvd = 3ULL << 2; -+ -+ pfm_intel_arch_pmc_desc[16].rsvd_msk = 0; -+ for (i = 0; i < count; i++) -+ pfm_intel_arch_pmc_desc[16].rsvd_msk |= rsvd << (i<<2); -+ -+ /* -+ * mark unused fixed counters as unimplemented -+ * -+ * update the rsvd_msk, dfl_val in FIXED_CTRL: -+ * - rsvd_msk: set all 4 bits -+ * - dfl_val : clear all 4 bits -+ */ -+ dfl = pfm_intel_arch_pmc_desc[16].dfl_val; -+ rsvd = pfm_intel_arch_pmc_desc[16].rsvd_msk; -+ -+ for (i = count ; i < PFM_IA_MAX_FCNT; i++) { -+ pfm_intel_arch_pmd_desc[PFM_IA_FCNT_BASE+i].type = PFM_REG_NA; -+ rsvd |= 0xfULL << (i<<2); -+ dfl &= ~(0xfULL << (i<<2)); -+ } -+ -+ /* -+ * FIXED_CTR_CTRL unavailable when no fixed counters are defined -+ */ -+ if (!count) { -+ pfm_intel_arch_pmc_desc[16].type = PFM_REG_NA; -+ } else { -+ /* update rsvd_mask and dfl_val */ -+ pfm_intel_arch_pmc_desc[16].rsvd_msk = rsvd; -+ pfm_intel_arch_pmc_desc[16].dfl_val = dfl; -+ set_enable_mask(16); -+ } -+} -+ -+static int pfm_intel_arch_probe_pmu(void) -+{ -+ union { -+ unsigned int val; -+ struct pmu_eax eax; -+ struct pmu_edx edx; -+ } eax, edx; -+ unsigned int ebx, ecx; -+ unsigned int width = 0; -+ -+ edx.val = 0; -+ -+ if (!(cpu_has_arch_perfmon || force)) { -+ PFM_INFO("no support for Intel architectural PMU"); -+ return -1; -+ } -+ -+ if (!cpu_has_apic) { -+ PFM_INFO("no Local APIC, try rebooting with lapic option"); -+ return -1; -+ } -+ -+ /* cpuid() call protected by cpu_has_arch_perfmon */ -+ cpuid(0xa, &eax.val, &ebx, &ecx, &edx.val); -+ -+ /* -+ * reject processors supported by perfmon_intel_core -+ * -+ * We need to do this explicitely to avoid depending -+ * on the link order in case, the modules are compiled as -+ * builtin. -+ * -+ * non Intel processors are rejected by cpu_has_arch_perfmon -+ */ -+ if (current_cpu_data.x86 == 6 && !force) { -+ switch (current_cpu_data.x86_model) { -+ case 15: /* Merom: use perfmon_intel_core */ -+ case 23: /* Penryn: use perfmon_intel_core */ -+ return -1; -+ default: -+ break; -+ } -+ } -+ -+ /* -+ * some 6/15 models have buggy BIOS -+ */ -+ if (eax.eax.version == 0 -+ && current_cpu_data.x86 == 6 && current_cpu_data.x86_model == 15) { -+ PFM_INFO("buggy v2 BIOS, adjusting for 2 generic counters"); -+ eax.eax.version = 2; -+ eax.eax.num_cnt = 2; -+ eax.eax.cnt_width = 40; -+ } -+ -+ /* -+ * Intel Atom processors have a buggy firmware which does not report -+ * the correct number of fixed counters -+ */ -+ if (eax.eax.version == 3 && edx.edx.num_cnt < 3 -+ && current_cpu_data.x86 == 6 && current_cpu_data.x86_model == 28) { -+ PFM_INFO("buggy v3 BIOS, adjusting for 3 fixed counters"); -+ edx.edx.num_cnt = 3; -+ } -+ -+ /* -+ * some v2 BIOSes are incomplete -+ */ -+ if (eax.eax.version == 2 && !edx.edx.num_cnt) { -+ PFM_INFO("buggy v2 BIOS, adjusting for 3 fixed counters"); -+ edx.edx.num_cnt = 3; -+ edx.edx.cnt_width = 40; -+ } -+ -+ /* -+ * no fixed counters on earlier versions -+ */ -+ if (eax.eax.version < 2) { -+ edx.val = 0; -+ } else { -+ /* -+ * use the min value of both widths until we support -+ * variable width counters -+ */ -+ width = eax.eax.cnt_width < edx.edx.cnt_width ? -+ eax.eax.cnt_width : edx.edx.cnt_width; -+ } -+ -+ PFM_INFO("detected architecural perfmon v%d", eax.eax.version); -+ PFM_INFO("num_gen=%d width=%d num_fixed=%d width=%d", -+ eax.eax.num_cnt, -+ eax.eax.cnt_width, -+ edx.edx.num_cnt, -+ edx.edx.cnt_width); -+ -+ -+ pfm_intel_arch_setup_generic(eax.eax.version, -+ width, -+ eax.eax.num_cnt); -+ -+ pfm_intel_arch_setup_fixed(eax.eax.version, -+ width, -+ edx.edx.num_cnt); -+ -+ if (force_nmi) -+ pfm_intel_arch_pmu_info.flags |= PFM_X86_FL_USE_NMI; -+ -+ pfm_intel_arch_check_errata(); -+ -+ return 0; -+} -+ -+/** -+ * pfm_intel_arch_has_ovfls - check for pending overflow condition -+ * @ctx: context to work on -+ * -+ * detect if counters have overflowed. -+ * return: -+ * 0 : no overflow -+ * 1 : at least one overflow -+ */ -+static int __kprobes pfm_intel_arch_has_ovfls(struct pfm_context *ctx) -+{ -+ u64 *cnt_mask; -+ u64 wmask, val; -+ u16 i, num; -+ -+ cnt_mask = ctx->regs.cnt_pmds; -+ num = ctx->regs.num_counters; -+ wmask = 1ULL << pfm_pmu_conf->counter_width; -+ -+ /* -+ * we can leverage the fact that we know the mapping -+ * to hardcode the MSR address and avoid accessing -+ * more cachelines -+ * -+ * We need to check cnt_mask because not all registers -+ * may be available. -+ */ -+ for (i = 0; num; i++) { -+ if (test_bit(i, cast_ulp(cnt_mask))) { -+ rdmsrl(pfm_intel_arch_pmd_desc[i].hw_addr, val); -+ if (!(val & wmask)) -+ return 1; -+ num--; -+ } -+ } -+ return 0; -+} -+ -+static int pfm_intel_arch_stop_save(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ u64 used_mask[PFM_PMC_BV]; -+ u64 *cnt_pmds; -+ u64 val, wmask, ovfl_mask; -+ u32 i, count; -+ -+ wmask = 1ULL << pfm_pmu_conf->counter_width; -+ -+ bitmap_and(cast_ulp(used_mask), -+ cast_ulp(set->used_pmcs), -+ cast_ulp(enable_mask), -+ max_enable); -+ -+ count = bitmap_weight(cast_ulp(used_mask), max_enable); -+ -+ /* -+ * stop monitoring -+ * Unfortunately, this is very expensive! -+ * wrmsrl() is serializing. -+ */ -+ for (i = 0; count; i++) { -+ if (test_bit(i, cast_ulp(used_mask))) { -+ wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, 0); -+ count--; -+ } -+ } -+ -+ /* -+ * if we already having a pending overflow condition, we simply -+ * return to take care of this first. -+ */ -+ if (set->npend_ovfls) -+ return 1; -+ -+ ovfl_mask = pfm_pmu_conf->ovfl_mask; -+ cnt_pmds = ctx->regs.cnt_pmds; -+ -+ /* -+ * check for pending overflows and save PMDs (combo) -+ * we employ used_pmds because we also need to save -+ * and not just check for pending interrupts. -+ * -+ * Must check for counting PMDs because of virtual PMDs -+ */ -+ count = set->nused_pmds; -+ for (i = 0; count; i++) { -+ if (test_bit(i, cast_ulp(set->used_pmds))) { -+ val = pfm_arch_read_pmd(ctx, i); -+ if (likely(test_bit(i, cast_ulp(cnt_pmds)))) { -+ if (!(val & wmask)) { -+ __set_bit(i, cast_ulp(set->povfl_pmds)); -+ set->npend_ovfls++; -+ } -+ val = (set->pmds[i].value & ~ovfl_mask) -+ | (val & ovfl_mask); -+ } -+ set->pmds[i].value = val; -+ count--; -+ } -+ } -+ /* 0 means: no need to save PMDs at upper level */ -+ return 0; -+} -+ -+/** -+ * pfm_intel_arch_quiesce - stop monitoring without grabbing any lock -+ * -+ * called from NMI interrupt handler to immediately stop monitoring -+ * cannot grab any lock, including perfmon related locks -+ */ -+static void __kprobes pfm_intel_arch_quiesce(void) -+{ -+ u16 i; -+ -+ /* -+ * PMC16 is the fixed control control register so it has a -+ * distinct MSR address -+ * -+ * We do not use the hw_addr field in the table to avoid touching -+ * too many cachelines -+ */ -+ for (i = 0; i < pfm_pmu_conf->regs_all.max_pmc; i++) { -+ if (test_bit(i, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) { -+ if (i == 16) -+ wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0); -+ else -+ wrmsrl(MSR_P6_EVNTSEL0+i, 0); -+ } -+ } -+} -+ -+/** -+ * pfm_intel_arch_restore_pmcs - reload PMC registers -+ * @ctx: context to restore from -+ * @set: current event set -+ * -+ * optimized version of pfm_arch_restore_pmcs(). On architectural perfmon, -+ * we can afford to only restore the pmcs registers we use, because they -+ * are all independent from each other. -+ */ -+static void pfm_intel_arch_restore_pmcs(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ u64 *mask; -+ u16 i, num; -+ -+ mask = set->used_pmcs; -+ num = set->nused_pmcs; -+ for (i = 0; num; i++) { -+ if (test_bit(i, cast_ulp(mask))) { -+ wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, set->pmcs[i]); -+ num--; -+ } -+ } -+} -+/* -+ * Counters may have model-specific width. Yet the documentation says -+ * that only the lower 32 bits can be written to due to the specification -+ * of wrmsr. bits [32-(w-1)] are sign extensions of bit 31. Bits [w-63] must -+ * not be set (see rsvd_msk for PMDs). As such the effective width of a -+ * counter is 31 bits only regardless of what CPUID.0xa returns. -+ * -+ * See IA-32 Intel Architecture Software developer manual Vol 3B chapter 18 -+ */ -+static struct pfm_pmu_config pfm_intel_arch_pmu_conf = { -+ .pmu_name = "Intel architectural", -+ .pmd_desc = pfm_intel_arch_pmd_desc, -+ .counter_width = 31, -+ .num_pmc_entries = PFM_IA_MAX_PMCS, -+ .num_pmd_entries = PFM_IA_MAX_PMDS, -+ .pmc_desc = pfm_intel_arch_pmc_desc, -+ .probe_pmu = pfm_intel_arch_probe_pmu, -+ .version = "1.0", -+ .flags = PFM_PMU_BUILTIN_FLAG, -+ .owner = THIS_MODULE, -+ .pmu_info = &pfm_intel_arch_pmu_info -+}; -+ -+static int __init pfm_intel_arch_pmu_init_module(void) -+{ -+ return pfm_pmu_register(&pfm_intel_arch_pmu_conf); -+} -+ -+static void __exit pfm_intel_arch_pmu_cleanup_module(void) -+{ -+ pfm_pmu_unregister(&pfm_intel_arch_pmu_conf); -+} -+ -+module_init(pfm_intel_arch_pmu_init_module); -+module_exit(pfm_intel_arch_pmu_cleanup_module); -diff --git a/arch/x86/perfmon/perfmon_intel_atom.c b/arch/x86/perfmon/perfmon_intel_atom.c -new file mode 100644 -index 0000000..9b94863 ---- /dev/null -+++ b/arch/x86/perfmon/perfmon_intel_atom.c -@@ -0,0 +1,541 @@ -+/* -+ * perfmon support for Intel Atom (architectural perfmon v3 + PEBS) -+ * -+ * Copyright (c) 2008 Google,Inc -+ * Contributed by Stephane Eranian -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+#include -+ -+MODULE_AUTHOR("Stephane Eranian "); -+MODULE_DESCRIPTION("Intel Atom"); -+MODULE_LICENSE("GPL"); -+ -+static int force, force_nmi; -+MODULE_PARM_DESC(force, "bool: force module to load succesfully"); -+MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt"); -+module_param(force, bool, 0600); -+module_param(force_nmi, bool, 0600); -+ -+/* -+ * - upper 32 bits are reserved -+ * - INT: APIC enable bit is reserved (forced to 1) -+ * -+ * RSVD: reserved bits are 1 -+ */ -+#define PFM_ATOM_PMC_RSVD ((~((1ULL<<32)-1)) | (1ULL<<20)) -+ -+/* -+ * force Local APIC interrupt on overflow -+ * disable with NO_EMUL64 -+ */ -+#define PFM_ATOM_PMC_VAL (1ULL<<20) -+#define PFM_ATOM_NO64 (1ULL<<20) -+ -+/* -+ * Atom counters are 40-bits. 40-bits can be read but ony 31 can be written -+ * to due to a limitation of wrmsr. Bits [[63-32] are sign extensions of bit 31. -+ * Bits [63-40] must not be set -+ * -+ * See IA-32 Intel Architecture Software developer manual Vol 3B chapter 18 -+ */ -+#define PFM_ATOM_PMD_WIDTH 31 -+#define PFM_ATOM_PMD_RSVD ~((1ULL << 40)-1) -+ -+static void pfm_intel_atom_acquire_pmu_percpu(void); -+static void pfm_intel_atom_release_pmu_percpu(void); -+static void pfm_intel_atom_restore_pmcs(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+static int pfm_intel_atom_stop_save(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+static int pfm_intel_atom_has_ovfls(struct pfm_context *ctx); -+static void __kprobes pfm_intel_atom_quiesce(void); -+ -+struct pfm_arch_pmu_info pfm_intel_atom_pmu_info = { -+ .stop_save = pfm_intel_atom_stop_save, -+ .has_ovfls = pfm_intel_atom_has_ovfls, -+ .quiesce = pfm_intel_atom_quiesce, -+ .restore_pmcs = pfm_intel_atom_restore_pmcs, -+ .acquire_pmu_percpu = pfm_intel_atom_acquire_pmu_percpu, -+ .release_pmu_percpu = pfm_intel_atom_release_pmu_percpu -+ -+}; -+ -+#define PFM_ATOM_C(n) { \ -+ .type = PFM_REG_I64, \ -+ .desc = "PERFEVTSEL"#n, \ -+ .dfl_val = PFM_ATOM_PMC_VAL, \ -+ .rsvd_msk = PFM_ATOM_PMC_RSVD, \ -+ .no_emul64_msk = PFM_ATOM_NO64, \ -+ .hw_addr = MSR_P6_EVNTSEL0 + (n) \ -+ } -+ -+ -+static struct pfm_regmap_desc pfm_intel_atom_pmc_desc[] = { -+/* pmc0 */ PFM_ATOM_C(0), -+/* pmc1 */ PFM_ATOM_C(1), -+/* pmc2 */ PMX_NA, PMX_NA, -+/* pmc4 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc8 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc12 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc16 */ { .type = PFM_REG_I, -+ .desc = "FIXED_CTRL", -+ .dfl_val = 0x0000000000000888ULL, /* force PMI */ -+ .rsvd_msk = 0xfffffffffffffcccULL, /* 3 fixed counters defined */ -+ .no_emul64_msk = 0, -+ .hw_addr = MSR_CORE_PERF_FIXED_CTR_CTRL -+ }, -+/* pmc17 */{ .type = PFM_REG_W, -+ .desc = "PEBS_ENABLE", -+ .dfl_val = 0, -+ .rsvd_msk = 0xfffffffffffffffeULL, -+ .no_emul64_msk = 0, -+ .hw_addr = MSR_IA32_PEBS_ENABLE -+ } -+}; -+#define PFM_ATOM_MAX_PMCS ARRAY_SIZE(pfm_intel_atom_pmc_desc) -+ -+#define PFM_ATOM_D(n) \ -+ { .type = PFM_REG_C, \ -+ .desc = "PMC"#n, \ -+ .rsvd_msk = PFM_ATOM_PMD_RSVD, \ -+ .hw_addr = MSR_P6_PERFCTR0+n, \ -+ .dep_pmcs[0] = 1ULL << n \ -+ } -+ -+#define PFM_ATOM_FD(n) \ -+ { .type = PFM_REG_C, \ -+ .desc = "FIXED_CTR"#n, \ -+ .rsvd_msk = PFM_ATOM_PMD_RSVD, \ -+ .hw_addr = MSR_CORE_PERF_FIXED_CTR0+n,\ -+ .dep_pmcs[0] = 1ULL << 16 \ -+ } -+ -+static struct pfm_regmap_desc pfm_intel_atom_pmd_desc[] = { -+/* pmd0 */ PFM_ATOM_D(0), -+/* pmd1 */ PFM_ATOM_D(1), -+/* pmd2 */ PMX_NA, -+/* pmd3 */ PMX_NA, -+/* pmd4 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmd8 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmd12 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmd16 */ PFM_ATOM_FD(0), -+/* pmd17 */ PFM_ATOM_FD(1), -+/* pmd18 */ PFM_ATOM_FD(2) -+}; -+#define PFM_ATOM_MAX_PMDS ARRAY_SIZE(pfm_intel_atom_pmd_desc) -+ -+static struct pfm_pmu_config pfm_intel_atom_pmu_conf; -+ -+static int pfm_intel_atom_probe_pmu(void) -+{ -+ if (force) -+ goto doit; -+ -+ if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL) -+ return -1; -+ -+ if (current_cpu_data.x86 != 6) -+ return -1; -+ -+ if (current_cpu_data.x86_model != 28) -+ return -1; -+doit: -+ /* -+ * having APIC is mandatory, so disregard force option -+ */ -+ if (!cpu_has_apic) { -+ PFM_INFO("no Local APIC, try rebooting with lapic option"); -+ return -1; -+ } -+ -+ PFM_INFO("detected Intel Atom PMU"); -+ -+ if (force_nmi) -+ pfm_intel_atom_pmu_info.flags |= PFM_X86_FL_USE_NMI; -+ -+ return 0; -+} -+ -+/** -+ * pfm_intel_atom_has_ovfls - check for pending overflow condition -+ * @ctx: context to work on -+ * -+ * detect if counters have overflowed. -+ * return: -+ * 0 : no overflow -+ * 1 : at least one overflow -+ */ -+static int __kprobes pfm_intel_atom_has_ovfls(struct pfm_context *ctx) -+{ -+ struct pfm_regmap_desc *d; -+ u64 ovf; -+ -+ d = pfm_pmu_conf->pmd_desc; -+ /* -+ * read global overflow status register -+ * if sharing PMU, then not all bit are ours so must -+ * check only the ones we actually use -+ */ -+ rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, ovf); -+ -+ /* -+ * for pmd0, we also check PEBS overflow on bit 62 -+ */ -+ if ((d[0].type & PFM_REG_I) && (ovf & ((1ull << 62) | 1ull))) -+ return 1; -+ -+ if ((d[1].type & PFM_REG_I) && (ovf & 2ull)) -+ return 1; -+ -+ if ((d[16].type & PFM_REG_I) && (ovf & (1ull << 32))) -+ return 1; -+ -+ if ((d[17].type & PFM_REG_I) && (ovf & (2ull << 32))) -+ return 1; -+ -+ if ((d[18].type & PFM_REG_I) && (ovf & (4ull << 32))) -+ return 1; -+ -+ return 0; -+} -+ -+/** -+ * pfm_intel_atom_stop_save - stop monitoring, collect pending overflow, save pmds -+ * @ctx: context to work on -+ * @set: active set -+ * -+ * return: -+ * 1: caller needs to save pmds -+ * 0: caller does not need to save pmds, they have been saved by this call -+ */ -+static int pfm_intel_atom_stop_save(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+#define PFM_ATOM_WMASK (1ULL << 31) -+#define PFM_ATOM_OMASK ((1ULL << 31)-1) -+ u64 clear_ovf = 0; -+ u64 ovf, ovf2, val; -+ -+ /* -+ * read global overflow status register -+ * if sharing PMU, then not all bit are ours so must -+ * check only the ones we actually use. -+ * -+ * XXX: Atom seems to have a bug with the stickyness of -+ * GLOBAL_STATUS. If we read GLOBAL_STATUS after we -+ * clear the generic counters, then their bits in -+ * GLOBAL_STATUS are cleared. This should not be the -+ * case accoding to architected PMU. To workaround -+ * the problem, we read GLOBAL_STATUS BEFORE we stop -+ * all monitoring. -+ */ -+ rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, ovf); -+ -+ /* -+ * stop monitoring -+ */ -+ if (test_bit(0, cast_ulp(set->used_pmcs))) -+ wrmsrl(MSR_P6_EVNTSEL0, 0); -+ -+ if (test_bit(1, cast_ulp(set->used_pmcs))) -+ wrmsrl(MSR_P6_EVNTSEL1, 0); -+ -+ if (test_bit(16, cast_ulp(set->used_pmcs))) -+ wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0); -+ -+ if (test_bit(17, cast_ulp(set->used_pmcs))) -+ wrmsrl(MSR_IA32_PEBS_ENABLE, 0); -+ -+ /* -+ * XXX: related to bug mentioned above -+ * -+ * read GLOBAL_STATUS again to avoid race condition -+ * with overflows happening after first read and -+ * before stop. That avoids missing overflows on -+ * the fixed counters and PEBS -+ */ -+ rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, ovf2); -+ ovf |= ovf2; -+ -+ /* -+ * if we already have a pending overflow condition, we simply -+ * return to take care of it first. -+ */ -+ if (set->npend_ovfls) -+ return 1; -+ -+ /* -+ * check PMD 0,1,16,17,18 for overflow and save their value -+ */ -+ if (test_bit(0, cast_ulp(set->used_pmds))) { -+ rdmsrl(MSR_P6_PERFCTR0, val); -+ if (ovf & ((1ull<<62)|1ull)) { -+ __set_bit(0, cast_ulp(set->povfl_pmds)); -+ set->npend_ovfls++; -+ clear_ovf = (1ull << 62) | 1ull; -+ } -+ val = (set->pmds[0].value & ~PFM_ATOM_OMASK) -+ | (val & PFM_ATOM_OMASK); -+ set->pmds[0].value = val; -+ } -+ -+ if (test_bit(1, cast_ulp(set->used_pmds))) { -+ rdmsrl(MSR_P6_PERFCTR1, val); -+ if (ovf & 2ull) { -+ __set_bit(1, cast_ulp(set->povfl_pmds)); -+ set->npend_ovfls++; -+ clear_ovf |= 2ull; -+ } -+ val = (set->pmds[1].value & ~PFM_ATOM_OMASK) -+ | (val & PFM_ATOM_OMASK); -+ set->pmds[1].value = val; -+ } -+ -+ if (test_bit(16, cast_ulp(set->used_pmds))) { -+ rdmsrl(MSR_CORE_PERF_FIXED_CTR0, val); -+ if (ovf & (1ull << 32)) { -+ __set_bit(16, cast_ulp(set->povfl_pmds)); -+ set->npend_ovfls++; -+ clear_ovf |= 1ull << 32; -+ } -+ val = (set->pmds[16].value & ~PFM_ATOM_OMASK) -+ | (val & PFM_ATOM_OMASK); -+ set->pmds[16].value = val; -+ } -+ -+ if (test_bit(17, cast_ulp(set->used_pmds))) { -+ rdmsrl(MSR_CORE_PERF_FIXED_CTR0+1, val); -+ if (ovf & (2ull << 32)) { -+ __set_bit(17, cast_ulp(set->povfl_pmds)); -+ set->npend_ovfls++; -+ clear_ovf |= 2ull << 32; -+ } -+ val = (set->pmds[17].value & ~PFM_ATOM_OMASK) -+ | (val & PFM_ATOM_OMASK); -+ set->pmds[17].value = val; -+ } -+ -+ if (test_bit(18, cast_ulp(set->used_pmds))) { -+ rdmsrl(MSR_CORE_PERF_FIXED_CTR0+2, val); -+ if (ovf & (4ull << 32)) { -+ __set_bit(18, cast_ulp(set->povfl_pmds)); -+ set->npend_ovfls++; -+ clear_ovf |= 4ull << 32; -+ } -+ val = (set->pmds[18].value & ~PFM_ATOM_OMASK) -+ | (val & PFM_ATOM_OMASK); -+ set->pmds[18].value = val; -+ } -+ -+ if (clear_ovf) -+ wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, clear_ovf); -+ -+ /* 0 means: no need to save PMDs at upper level */ -+ return 0; -+} -+ -+/** -+ * pfm_intel_atom_quiesce - stop monitoring without grabbing any lock -+ * -+ * called from NMI interrupt handler to immediately stop monitoring -+ * cannot grab any lock, including perfmon related locks -+ */ -+static void __kprobes pfm_intel_atom_quiesce(void) -+{ -+ /* -+ * quiesce PMU by clearing available registers that have -+ * the start/stop capability -+ */ -+ if (test_bit(0, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) -+ wrmsrl(MSR_P6_EVNTSEL0, 0); -+ -+ if (test_bit(1, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) -+ wrmsrl(MSR_P6_EVNTSEL1, 0); -+ -+ if (test_bit(16, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) -+ wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0); -+ -+ if (test_bit(17, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) -+ wrmsrl(MSR_IA32_PEBS_ENABLE, 0); -+} -+ -+/** -+ * pfm_intel_atom_restore_pmcs - reload PMC registers -+ * @ctx: context to restore from -+ * @set: current event set -+ * -+ * restores pmcs and also PEBS Data Save area pointer -+ */ -+static void pfm_intel_atom_restore_pmcs(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ struct pfm_arch_context *ctx_arch; -+ u64 clear_ovf = 0; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ /* -+ * must restore DS pointer before restoring PMCs -+ * as this can potentially reactivate monitoring -+ */ -+ if (ctx_arch->flags.use_ds) -+ wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ctx_arch->ds_area); -+ -+ if (test_bit(0, cast_ulp(set->used_pmcs))) { -+ wrmsrl(MSR_P6_EVNTSEL0, set->pmcs[0]); -+ clear_ovf = 1ull; -+ } -+ -+ if (test_bit(1, cast_ulp(set->used_pmcs))) { -+ wrmsrl(MSR_P6_EVNTSEL1, set->pmcs[1]); -+ clear_ovf |= 2ull; -+ } -+ -+ if (test_bit(16, cast_ulp(set->used_pmcs))) { -+ wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, set->pmcs[16]); -+ clear_ovf |= 7ull << 32; -+ } -+ -+ if (test_bit(17, cast_ulp(set->used_pmcs))) { -+ wrmsrl(MSR_IA32_PEBS_ENABLE, set->pmcs[17]); -+ clear_ovf |= 1ull << 62; -+ } -+ -+ if (clear_ovf) -+ wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, clear_ovf); -+} -+ -+static int pfm_intel_atom_pmc17_check(struct pfm_context *ctx, -+ struct pfm_event_set *set, -+ struct pfarg_pmc *req) -+{ -+ struct pfm_arch_context *ctx_arch; -+ ctx_arch = pfm_ctx_arch(ctx); -+ -+ /* -+ * if user activates PEBS_ENABLE, then we need to have a valid -+ * DS Area setup. This only happens when the PEBS sampling format is -+ * used in which case PFM_X86_USE_PEBS is set. We must reject all other -+ * requests. -+ * -+ * Otherwise we may pickup stale MSR_IA32_DS_AREA values. It appears -+ * that a value of 0 for this MSR does crash the system with -+ * PEBS_ENABLE=1. -+ */ -+ if (!ctx_arch->flags.use_pebs && req->reg_value) { -+ PFM_DBG("pmc17 useable only with a PEBS sampling format"); -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+DEFINE_PER_CPU(u64, saved_global_ctrl); -+ -+/** -+ * pfm_intel_atom_acquire_pmu_percpu - acquire PMU resource per CPU -+ * -+ * For Atom, it is necessary to enable all available -+ * registers. The firmware rightfully has the fixed counters -+ * disabled for backward compatibility with architectural perfmon -+ * v1 -+ * -+ * This function is invoked on each online CPU -+ */ -+static void pfm_intel_atom_acquire_pmu_percpu(void) -+{ -+ struct pfm_regmap_desc *d; -+ u64 mask = 0; -+ unsigned int i; -+ -+ /* -+ * build bitmask of registers that are available to -+ * us. In some cases, there may be fewer registers than -+ * what Atom supports due to sharing with other kernel -+ * subsystems, such as NMI -+ */ -+ d = pfm_pmu_conf->pmd_desc; -+ for (i=0; i < 16; i++) { -+ if ((d[i].type & PFM_REG_I) == 0) -+ continue; -+ mask |= 1ull << i; -+ } -+ for (i=16; i < PFM_ATOM_MAX_PMDS; i++) { -+ if ((d[i].type & PFM_REG_I) == 0) -+ continue; -+ mask |= 1ull << (32+i-16); -+ } -+ -+ /* -+ * keep a local copy of the current MSR_CORE_PERF_GLOBAL_CTRL -+ */ -+ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, __get_cpu_var(saved_global_ctrl)); -+ -+ PFM_DBG("global=0x%llx set to 0x%llx", -+ __get_cpu_var(saved_global_ctrl), -+ mask); -+ -+ /* -+ * enable all registers -+ * -+ * No need to quiesce PMU. If there is a overflow, it will be -+ * treated as spurious by the handler -+ */ -+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, mask); -+} -+ -+/** -+ * pfm_intel_atom_release_pmu_percpu - release PMU resource per CPU -+ * -+ * For Atom, we restore MSR_CORE_PERF_GLOBAL_CTRL to its orginal value -+ */ -+static void pfm_intel_atom_release_pmu_percpu(void) -+{ -+ PFM_DBG("global_ctrl restored to 0x%llx\n", -+ __get_cpu_var(saved_global_ctrl)); -+ -+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, __get_cpu_var(saved_global_ctrl)); -+} -+ -+static struct pfm_pmu_config pfm_intel_atom_pmu_conf = { -+ .pmu_name = "Intel Atom", -+ .pmd_desc = pfm_intel_atom_pmd_desc, -+ .counter_width = PFM_ATOM_PMD_WIDTH, -+ .num_pmc_entries = PFM_ATOM_MAX_PMCS, -+ .num_pmd_entries = PFM_ATOM_MAX_PMDS, -+ .pmc_desc = pfm_intel_atom_pmc_desc, -+ .probe_pmu = pfm_intel_atom_probe_pmu, -+ .version = "1.0", -+ .flags = PFM_PMU_BUILTIN_FLAG, -+ .owner = THIS_MODULE, -+ .pmc_write_check = pfm_intel_atom_pmc17_check, -+ .pmu_info = &pfm_intel_atom_pmu_info -+}; -+ -+static int __init pfm_intel_atom_pmu_init_module(void) -+{ -+ return pfm_pmu_register(&pfm_intel_atom_pmu_conf); -+} -+ -+static void __exit pfm_intel_atom_pmu_cleanup_module(void) -+{ -+ pfm_pmu_unregister(&pfm_intel_atom_pmu_conf); -+} -+ -+module_init(pfm_intel_atom_pmu_init_module); -+module_exit(pfm_intel_atom_pmu_cleanup_module); -diff --git a/arch/x86/perfmon/perfmon_intel_core.c b/arch/x86/perfmon/perfmon_intel_core.c -new file mode 100644 -index 0000000..fddc436 ---- /dev/null -+++ b/arch/x86/perfmon/perfmon_intel_core.c -@@ -0,0 +1,449 @@ -+/* -+ * This file contains the Intel Core PMU registers description tables. -+ * Intel Core-based processors support architectural perfmon v2 + PEBS -+ * -+ * Copyright (c) 2006-2007 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ */ -+#include -+#include -+#include -+#include -+ -+MODULE_AUTHOR("Stephane Eranian "); -+MODULE_DESCRIPTION("Intel Core"); -+MODULE_LICENSE("GPL"); -+ -+static int force_nmi; -+MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt"); -+module_param(force_nmi, bool, 0600); -+ -+/* -+ * - upper 32 bits are reserved -+ * - INT: APIC enable bit is reserved (forced to 1) -+ * - bit 21 is reserved -+ * -+ * RSVD: reserved bits must be 1 -+ */ -+#define PFM_CORE_PMC_RSVD ((~((1ULL<<32)-1)) \ -+ | (1ULL<<20) \ -+ | (1ULL<<21)) -+ -+/* -+ * Core counters are 40-bits -+ */ -+#define PFM_CORE_CTR_RSVD (~((1ULL<<40)-1)) -+ -+/* -+ * force Local APIC interrupt on overflow -+ * disable with NO_EMUL64 -+ */ -+#define PFM_CORE_PMC_VAL (1ULL<<20) -+#define PFM_CORE_NO64 (1ULL<<20) -+ -+#define PFM_CORE_NA { .reg_type = PFM_REGT_NA} -+ -+#define PFM_CORE_CA(m, c, t) \ -+ { \ -+ .addrs[0] = m, \ -+ .ctr = c, \ -+ .reg_type = t \ -+ } -+ -+struct pfm_ds_area_intel_core { -+ u64 bts_buf_base; -+ u64 bts_index; -+ u64 bts_abs_max; -+ u64 bts_intr_thres; -+ u64 pebs_buf_base; -+ u64 pebs_index; -+ u64 pebs_abs_max; -+ u64 pebs_intr_thres; -+ u64 pebs_cnt_reset; -+}; -+ -+static void pfm_core_restore_pmcs(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+static int pfm_core_has_ovfls(struct pfm_context *ctx); -+static int pfm_core_stop_save(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+static void __kprobes pfm_core_quiesce(void); -+ -+static u64 enable_mask[PFM_MAX_PMCS]; -+static u16 max_enable; -+ -+struct pfm_arch_pmu_info pfm_core_pmu_info = { -+ .stop_save = pfm_core_stop_save, -+ .has_ovfls = pfm_core_has_ovfls, -+ .quiesce = pfm_core_quiesce, -+ .restore_pmcs = pfm_core_restore_pmcs -+}; -+ -+static struct pfm_regmap_desc pfm_core_pmc_desc[] = { -+/* pmc0 */ { -+ .type = PFM_REG_I64, -+ .desc = "PERFEVTSEL0", -+ .dfl_val = PFM_CORE_PMC_VAL, -+ .rsvd_msk = PFM_CORE_PMC_RSVD, -+ .no_emul64_msk = PFM_CORE_NO64, -+ .hw_addr = MSR_P6_EVNTSEL0 -+ }, -+/* pmc1 */ { -+ .type = PFM_REG_I64, -+ .desc = "PERFEVTSEL1", -+ .dfl_val = PFM_CORE_PMC_VAL, -+ .rsvd_msk = PFM_CORE_PMC_RSVD, -+ .no_emul64_msk = PFM_CORE_NO64, -+ .hw_addr = MSR_P6_EVNTSEL1 -+ }, -+/* pmc2 */ PMX_NA, PMX_NA, -+/* pmc4 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc8 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc12 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmc16 */ { .type = PFM_REG_I, -+ .desc = "FIXED_CTRL", -+ .dfl_val = 0x888ULL, -+ .rsvd_msk = 0xfffffffffffffcccULL, -+ .no_emul64_msk = 0, -+ .hw_addr = MSR_CORE_PERF_FIXED_CTR_CTRL -+ }, -+/* pmc17 */ { .type = PFM_REG_W, -+ .desc = "PEBS_ENABLE", -+ .dfl_val = 0, -+ .rsvd_msk = 0xfffffffffffffffeULL, -+ .no_emul64_msk = 0, -+ .hw_addr = MSR_IA32_PEBS_ENABLE -+ } -+}; -+ -+#define PFM_CORE_D(n) \ -+ { .type = PFM_REG_C, \ -+ .desc = "PMC"#n, \ -+ .rsvd_msk = PFM_CORE_CTR_RSVD, \ -+ .hw_addr = MSR_P6_PERFCTR0+n, \ -+ .dep_pmcs[0] = 1ULL << n \ -+ } -+ -+#define PFM_CORE_FD(n) \ -+ { .type = PFM_REG_C, \ -+ .desc = "FIXED_CTR"#n, \ -+ .rsvd_msk = PFM_CORE_CTR_RSVD, \ -+ .hw_addr = MSR_CORE_PERF_FIXED_CTR0+n,\ -+ .dep_pmcs[0] = 1ULL << 16 \ -+ } -+ -+static struct pfm_regmap_desc pfm_core_pmd_desc[] = { -+/* pmd0 */ PFM_CORE_D(0), -+/* pmd1 */ PFM_CORE_D(1), -+/* pmd2 */ PMX_NA, PMX_NA, -+/* pmd4 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmd8 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmd12 */ PMX_NA, PMX_NA, PMX_NA, PMX_NA, -+/* pmd16 */ PFM_CORE_FD(0), -+/* pmd17 */ PFM_CORE_FD(1), -+/* pmd18 */ PFM_CORE_FD(2) -+}; -+#define PFM_CORE_NUM_PMCS ARRAY_SIZE(pfm_core_pmc_desc) -+#define PFM_CORE_NUM_PMDS ARRAY_SIZE(pfm_core_pmd_desc) -+ -+static struct pfm_pmu_config pfm_core_pmu_conf; -+ -+static int pfm_core_probe_pmu(void) -+{ -+ /* -+ * Check for Intel Core processor explicitely -+ * Checking for cpu_has_perfmon is not enough as this -+ * matches intel Core Duo/Core Solo but none supports -+ * PEBS. -+ * -+ * Intel Core = arch perfmon v2 + PEBS -+ */ -+ if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL) { -+ PFM_INFO("not an AMD processor"); -+ return -1; -+ } -+ -+ if (current_cpu_data.x86 != 6) -+ return -1; -+ -+ switch (current_cpu_data.x86_model) { -+ case 15: /* Merom */ -+ break; -+ case 23: /* Penryn */ -+ break; -+ case 29: /* Dunnington */ -+ break; -+ default: -+ return -1; -+ } -+ -+ if (!cpu_has_apic) { -+ PFM_INFO("no Local APIC, unsupported"); -+ return -1; -+ } -+ -+ PFM_INFO("nmi_watchdog=%d nmi_active=%d force_nmi=%d", -+ nmi_watchdog, atomic_read(&nmi_active), force_nmi); -+ -+ /* -+ * Intel Core processors implement DS and PEBS, no need to check -+ */ -+ if (cpu_has_pebs) -+ PFM_INFO("PEBS supported, enabled"); -+ -+ /* -+ * initialize bitmask of register with enable capability, i.e., -+ * startstop. This is used to restrict the number of registers to -+ * touch on start/stop -+ * max_enable: number of bits to scan in enable_mask = highest + 1 -+ * -+ * may be adjusted in pfm_arch_pmu_acquire() -+ */ -+ __set_bit(0, cast_ulp(enable_mask)); -+ __set_bit(1, cast_ulp(enable_mask)); -+ __set_bit(16, cast_ulp(enable_mask)); -+ __set_bit(17, cast_ulp(enable_mask)); -+ max_enable = 17+1; -+ -+ if (force_nmi) -+ pfm_core_pmu_info.flags |= PFM_X86_FL_USE_NMI; -+ -+ return 0; -+} -+ -+static int pfm_core_pmc17_check(struct pfm_context *ctx, -+ struct pfm_event_set *set, -+ struct pfarg_pmc *req) -+{ -+ struct pfm_arch_context *ctx_arch; -+ ctx_arch = pfm_ctx_arch(ctx); -+ -+ /* -+ * if user activates PEBS_ENABLE, then we need to have a valid -+ * DS Area setup. This only happens when the PEBS sampling format is -+ * used in which case PFM_X86_USE_PEBS is set. We must reject all other -+ * requests. -+ * -+ * Otherwise we may pickup stale MSR_IA32_DS_AREA values. It appears -+ * that a value of 0 for this MSR does crash the system with -+ * PEBS_ENABLE=1. -+ */ -+ if (!ctx_arch->flags.use_pebs && req->reg_value) { -+ PFM_DBG("pmc17 useable only with a PEBS sampling format"); -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+/* -+ * detect is counters have overflowed. -+ * return: -+ * 0 : no overflow -+ * 1 : at least one overflow -+ * -+ * used by Intel Core-based processors -+ */ -+static int __kprobes pfm_core_has_ovfls(struct pfm_context *ctx) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ u64 *cnt_mask; -+ u64 wmask, val; -+ u16 i, num; -+ -+ pmu_info = &pfm_core_pmu_info; -+ cnt_mask = ctx->regs.cnt_pmds; -+ num = ctx->regs.num_counters; -+ wmask = 1ULL << pfm_pmu_conf->counter_width; -+ -+ for (i = 0; num; i++) { -+ if (test_bit(i, cast_ulp(cnt_mask))) { -+ rdmsrl(pfm_core_pmd_desc[i].hw_addr, val); -+ if (!(val & wmask)) -+ return 1; -+ num--; -+ } -+ } -+ return 0; -+} -+ -+static int pfm_core_stop_save(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ struct pfm_arch_context *ctx_arch; -+ struct pfm_ds_area_intel_core *ds = NULL; -+ u64 used_mask[PFM_PMC_BV]; -+ u64 *cnt_mask; -+ u64 val, wmask, ovfl_mask; -+ u16 count, has_ovfl; -+ u16 i, pebs_idx = ~0; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ -+ wmask = 1ULL << pfm_pmu_conf->counter_width; -+ -+ /* -+ * used enable pmc bitmask -+ */ -+ bitmap_and(cast_ulp(used_mask), -+ cast_ulp(set->used_pmcs), -+ cast_ulp(enable_mask), -+ max_enable); -+ -+ count = bitmap_weight(cast_ulp(used_mask), max_enable); -+ /* -+ * stop monitoring -+ * Unfortunately, this is very expensive! -+ * wrmsrl() is serializing. -+ */ -+ for (i = 0; count; i++) { -+ if (test_bit(i, cast_ulp(used_mask))) { -+ wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, 0); -+ count--; -+ } -+ } -+ /* -+ * if we already having a pending overflow condition, we simply -+ * return to take care of this first. -+ */ -+ if (set->npend_ovfls) -+ return 1; -+ -+ ovfl_mask = pfm_pmu_conf->ovfl_mask; -+ cnt_mask = ctx->regs.cnt_pmds; -+ -+ if (ctx_arch->flags.use_pebs) { -+ ds = ctx_arch->ds_area; -+ pebs_idx = 0; /* PMC0/PMD0 */ -+ PFM_DBG("ds=%p pebs_idx=0x%llx thres=0x%llx", -+ ds, -+ (unsigned long long)ds->pebs_index, -+ (unsigned long long)ds->pebs_intr_thres); -+ } -+ -+ /* -+ * Check for pending overflows and save PMDs (combo) -+ * We employ used_pmds and not intr_pmds because we must -+ * also saved on PMD registers. -+ * Must check for counting PMDs because of virtual PMDs -+ * -+ * XXX: should use the ovf_status register instead, yet -+ * we would have to check if NMI is used and fallback -+ * to individual pmd inspection. -+ */ -+ count = set->nused_pmds; -+ -+ for (i = 0; count; i++) { -+ if (test_bit(i, cast_ulp(set->used_pmds))) { -+ val = pfm_arch_read_pmd(ctx, i); -+ if (likely(test_bit(i, cast_ulp(cnt_mask)))) { -+ if (i == pebs_idx) -+ has_ovfl = (ds->pebs_index >= -+ ds->pebs_intr_thres); -+ else -+ has_ovfl = !(val & wmask); -+ if (has_ovfl) { -+ __set_bit(i, cast_ulp(set->povfl_pmds)); -+ set->npend_ovfls++; -+ } -+ val = (set->pmds[i].value & ~ovfl_mask) -+ | (val & ovfl_mask); -+ } -+ set->pmds[i].value = val; -+ count--; -+ } -+ } -+ /* 0 means: no need to save PMDs at upper level */ -+ return 0; -+} -+ -+/** -+ * pfm_core_quiesce - stop monitoring without grabbing any lock -+ * -+ * called from NMI interrupt handler to immediately stop monitoring -+ * cannot grab any lock, including perfmon related locks -+ */ -+static void __kprobes pfm_core_quiesce(void) -+{ -+ /* -+ * quiesce PMU by clearing available registers that have -+ * the start/stop capability -+ */ -+ if (test_bit(0, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) -+ wrmsrl(MSR_P6_EVNTSEL0, 0); -+ if (test_bit(1, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) -+ wrmsrl(MSR_P6_EVNTSEL1, 0); -+ if (test_bit(16, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) -+ wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0); -+ if (test_bit(17, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) -+ wrmsrl(MSR_IA32_PEBS_ENABLE, 0); -+} -+/** -+ * pfm_core_restore_pmcs - reload PMC registers -+ * @ctx: context to restore from -+ * @set: current event set -+ * -+ * optimized version of pfm_arch_restore_pmcs(). On Core, we can -+ * afford to only restore the pmcs registers we use, because they are -+ * all independent from each other. -+ */ -+static void pfm_core_restore_pmcs(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ struct pfm_arch_context *ctx_arch; -+ u64 *mask; -+ u16 i, num; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ -+ /* -+ * must restore DS pointer before restoring PMCs -+ * as this can potentially reactivate monitoring -+ */ -+ if (ctx_arch->flags.use_ds) -+ wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ctx_arch->ds_area); -+ -+ mask = set->used_pmcs; -+ num = set->nused_pmcs; -+ for (i = 0; num; i++) { -+ if (test_bit(i, cast_ulp(mask))) { -+ wrmsrl(pfm_pmu_conf->pmc_desc[i].hw_addr, set->pmcs[i]); -+ num--; -+ } -+ } -+} -+ -+/* -+ * Counters may have model-specific width which can be probed using -+ * the CPUID.0xa leaf. Yet, the documentation says: " -+ * In the initial implementation, only the read bit width is reported -+ * by CPUID, write operations are limited to the low 32 bits. -+ * Bits [w-32] are sign extensions of bit 31. As such the effective width -+ * of a counter is 31 bits only. -+ */ -+static struct pfm_pmu_config pfm_core_pmu_conf = { -+ .pmu_name = "Intel Core", -+ .pmd_desc = pfm_core_pmd_desc, -+ .counter_width = 31, -+ .num_pmc_entries = PFM_CORE_NUM_PMCS, -+ .num_pmd_entries = PFM_CORE_NUM_PMDS, -+ .pmc_desc = pfm_core_pmc_desc, -+ .probe_pmu = pfm_core_probe_pmu, -+ .version = "1.2", -+ .flags = PFM_PMU_BUILTIN_FLAG, -+ .owner = THIS_MODULE, -+ .pmu_info = &pfm_core_pmu_info, -+ .pmc_write_check = pfm_core_pmc17_check -+}; -+ -+static int __init pfm_core_pmu_init_module(void) -+{ -+ return pfm_pmu_register(&pfm_core_pmu_conf); -+} -+ -+static void __exit pfm_core_pmu_cleanup_module(void) -+{ -+ pfm_pmu_unregister(&pfm_core_pmu_conf); -+} -+ -+module_init(pfm_core_pmu_init_module); -+module_exit(pfm_core_pmu_cleanup_module); -diff --git a/arch/x86/perfmon/perfmon_p4.c b/arch/x86/perfmon/perfmon_p4.c -new file mode 100644 -index 0000000..1ffcf3c ---- /dev/null -+++ b/arch/x86/perfmon/perfmon_p4.c -@@ -0,0 +1,913 @@ -+/* -+ * This file contains the P4/Xeon PMU register description tables -+ * for both 32 and 64 bit modes. -+ * -+ * Copyright (c) 2005 Intel Corporation -+ * Contributed by Bryan Wilkerson -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+MODULE_AUTHOR("Bryan Wilkerson "); -+MODULE_DESCRIPTION("P4/Xeon/EM64T PMU description table"); -+MODULE_LICENSE("GPL"); -+ -+static int force; -+MODULE_PARM_DESC(force, "bool: force module to load succesfully"); -+module_param(force, bool, 0600); -+ -+static int force_nmi; -+MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt"); -+module_param(force_nmi, bool, 0600); -+ -+/* -+ * For extended register information in addition to address that is used -+ * at runtime to figure out the mapping of reg addresses to logical procs -+ * and association of registers to hardware specific features -+ */ -+struct pfm_p4_regmap { -+ /* -+ * one each for the logical CPUs. Index 0 corresponds to T0 and -+ * index 1 corresponds to T1. Index 1 can be zero if no T1 -+ * complement reg exists. -+ */ -+ unsigned long addrs[2]; /* 2 = number of threads */ -+ unsigned int ctr; /* for CCCR/PERFEVTSEL, associated counter */ -+ unsigned int reg_type; -+}; -+ -+/* -+ * bitmask for pfm_p4_regmap.reg_type -+ */ -+#define PFM_REGT_NA 0x0000 /* not available */ -+#define PFM_REGT_EN 0x0001 /* has enable bit (cleared on ctxsw) */ -+#define PFM_REGT_ESCR 0x0002 /* P4: ESCR */ -+#define PFM_REGT_CCCR 0x0004 /* P4: CCCR */ -+#define PFM_REGT_PEBS 0x0010 /* PEBS related */ -+#define PFM_REGT_NOHT 0x0020 /* unavailable with HT */ -+#define PFM_REGT_CTR 0x0040 /* counter */ -+ -+/* -+ * architecture specific context extension. -+ * located at: (struct pfm_arch_context *)(ctx+1) -+ */ -+struct pfm_arch_p4_context { -+ u32 npend_ovfls; /* P4 NMI #pending ovfls */ -+ u32 reserved; -+ u64 povfl_pmds[PFM_PMD_BV]; /* P4 NMI overflowed counters */ -+ u64 saved_cccrs[PFM_MAX_PMCS]; -+}; -+ -+/* -+ * ESCR reserved bitmask: -+ * - bits 31 - 63 reserved -+ * - T1_OS and T1_USR bits are reserved - set depending on logical proc -+ * user mode application should use T0_OS and T0_USR to indicate -+ * RSVD: reserved bits must be 1 -+ */ -+#define PFM_ESCR_RSVD ~0x000000007ffffffcULL -+ -+/* -+ * CCCR default value: -+ * - OVF_PMI_T0=1 (bit 26) -+ * - OVF_PMI_T1=0 (bit 27) (set if necessary in pfm_write_reg()) -+ * - all other bits are zero -+ * -+ * OVF_PMI is forced to zero if PFM_REGFL_NO_EMUL64 is set on CCCR -+ */ -+#define PFM_CCCR_DFL (1ULL<<26) | (3ULL<<16) -+ -+/* -+ * CCCR reserved fields: -+ * - bits 0-11, 25-29, 31-63 -+ * - OVF_PMI (26-27), override with REGFL_NO_EMUL64 -+ * -+ * RSVD: reserved bits must be 1 -+ */ -+#define PFM_CCCR_RSVD ~((0xfull<<12) \ -+ | (0x7full<<18) \ -+ | (0x1ull<<30)) -+ -+#define PFM_P4_NO64 (3ULL<<26) /* use 3 even in non HT mode */ -+ -+#define PEBS_PMD 8 /* thread0: IQ_CTR4, thread1: IQ_CTR5 */ -+ -+/* -+ * With HyperThreading enabled: -+ * -+ * The ESCRs and CCCRs are divided in half with the top half -+ * belonging to logical processor 0 and the bottom half going to -+ * logical processor 1. Thus only half of the PMU resources are -+ * accessible to applications. -+ * -+ * PEBS is not available due to the fact that: -+ * - MSR_PEBS_MATRIX_VERT is shared between the threads -+ * - IA32_PEBS_ENABLE is shared between the threads -+ * -+ * With HyperThreading disabled: -+ * -+ * The full set of PMU resources is exposed to applications. -+ * -+ * The mapping is chosen such that PMCxx -> MSR is the same -+ * in HT and non HT mode, if register is present in HT mode. -+ * -+ */ -+#define PFM_REGT_NHTESCR (PFM_REGT_ESCR|PFM_REGT_NOHT) -+#define PFM_REGT_NHTCCCR (PFM_REGT_CCCR|PFM_REGT_NOHT|PFM_REGT_EN) -+#define PFM_REGT_NHTPEBS (PFM_REGT_PEBS|PFM_REGT_NOHT|PFM_REGT_EN) -+#define PFM_REGT_NHTCTR (PFM_REGT_CTR|PFM_REGT_NOHT) -+#define PFM_REGT_ENAC (PFM_REGT_CCCR|PFM_REGT_EN) -+ -+static void pfm_p4_write_pmc(struct pfm_context *ctx, unsigned int cnum, u64 value); -+static void pfm_p4_write_pmd(struct pfm_context *ctx, unsigned int cnum, u64 value); -+static u64 pfm_p4_read_pmd(struct pfm_context *ctx, unsigned int cnum); -+static u64 pfm_p4_read_pmc(struct pfm_context *ctx, unsigned int cnum); -+static int pfm_p4_create_context(struct pfm_context *ctx, u32 ctx_flags); -+static void pfm_p4_free_context(struct pfm_context *ctx); -+static int pfm_p4_has_ovfls(struct pfm_context *ctx); -+static int pfm_p4_stop_save(struct pfm_context *ctx, struct pfm_event_set *set); -+static void pfm_p4_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set); -+static void pfm_p4_nmi_copy_state(struct pfm_context *ctx); -+static void __kprobes pfm_p4_quiesce(void); -+ -+static u64 enable_mask[PFM_MAX_PMCS]; -+static u16 max_enable; -+ -+static struct pfm_p4_regmap pmc_addrs[PFM_MAX_PMCS] = { -+ /*pmc 0 */ {{MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1}, 0, PFM_REGT_ESCR}, /* BPU_ESCR0,1 */ -+ /*pmc 1 */ {{MSR_P4_IS_ESCR0, MSR_P4_IS_ESCR1}, 0, PFM_REGT_ESCR}, /* IS_ESCR0,1 */ -+ /*pmc 2 */ {{MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1}, 0, PFM_REGT_ESCR}, /* MOB_ESCR0,1 */ -+ /*pmc 3 */ {{MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1}, 0, PFM_REGT_ESCR}, /* ITLB_ESCR0,1 */ -+ /*pmc 4 */ {{MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1}, 0, PFM_REGT_ESCR}, /* PMH_ESCR0,1 */ -+ /*pmc 5 */ {{MSR_P4_IX_ESCR0, MSR_P4_IX_ESCR1}, 0, PFM_REGT_ESCR}, /* IX_ESCR0,1 */ -+ /*pmc 6 */ {{MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1}, 0, PFM_REGT_ESCR}, /* FSB_ESCR0,1 */ -+ /*pmc 7 */ {{MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1}, 0, PFM_REGT_ESCR}, /* BSU_ESCR0,1 */ -+ /*pmc 8 */ {{MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1}, 0, PFM_REGT_ESCR}, /* MS_ESCR0,1 */ -+ /*pmc 9 */ {{MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1}, 0, PFM_REGT_ESCR}, /* TC_ESCR0,1 */ -+ /*pmc 10*/ {{MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1}, 0, PFM_REGT_ESCR}, /* TBPU_ESCR0,1 */ -+ /*pmc 11*/ {{MSR_P4_FLAME_ESCR0, MSR_P4_FLAME_ESCR1}, 0, PFM_REGT_ESCR}, /* FLAME_ESCR0,1 */ -+ /*pmc 12*/ {{MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1}, 0, PFM_REGT_ESCR}, /* FIRM_ESCR0,1 */ -+ /*pmc 13*/ {{MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1}, 0, PFM_REGT_ESCR}, /* SAAT_ESCR0,1 */ -+ /*pmc 14*/ {{MSR_P4_U2L_ESCR0, MSR_P4_U2L_ESCR1}, 0, PFM_REGT_ESCR}, /* U2L_ESCR0,1 */ -+ /*pmc 15*/ {{MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1}, 0, PFM_REGT_ESCR}, /* DAC_ESCR0,1 */ -+ /*pmc 16*/ {{MSR_P4_IQ_ESCR0, MSR_P4_IQ_ESCR1}, 0, PFM_REGT_ESCR}, /* IQ_ESCR0,1 (only model 1 and 2) */ -+ /*pmc 17*/ {{MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1}, 0, PFM_REGT_ESCR}, /* ALF_ESCR0,1 */ -+ /*pmc 18*/ {{MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1}, 0, PFM_REGT_ESCR}, /* RAT_ESCR0,1 */ -+ /*pmc 19*/ {{MSR_P4_SSU_ESCR0, 0}, 0, PFM_REGT_ESCR}, /* SSU_ESCR0 */ -+ /*pmc 20*/ {{MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1}, 0, PFM_REGT_ESCR}, /* CRU_ESCR0,1 */ -+ /*pmc 21*/ {{MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3}, 0, PFM_REGT_ESCR}, /* CRU_ESCR2,3 */ -+ /*pmc 22*/ {{MSR_P4_CRU_ESCR4, MSR_P4_CRU_ESCR5}, 0, PFM_REGT_ESCR}, /* CRU_ESCR4,5 */ -+ -+ /*pmc 23*/ {{MSR_P4_BPU_CCCR0, MSR_P4_BPU_CCCR2}, 0, PFM_REGT_ENAC}, /* BPU_CCCR0,2 */ -+ /*pmc 24*/ {{MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3}, 1, PFM_REGT_ENAC}, /* BPU_CCCR1,3 */ -+ /*pmc 25*/ {{MSR_P4_MS_CCCR0, MSR_P4_MS_CCCR2}, 2, PFM_REGT_ENAC}, /* MS_CCCR0,2 */ -+ /*pmc 26*/ {{MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3}, 3, PFM_REGT_ENAC}, /* MS_CCCR1,3 */ -+ /*pmc 27*/ {{MSR_P4_FLAME_CCCR0, MSR_P4_FLAME_CCCR2}, 4, PFM_REGT_ENAC}, /* FLAME_CCCR0,2 */ -+ /*pmc 28*/ {{MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3}, 5, PFM_REGT_ENAC}, /* FLAME_CCCR1,3 */ -+ /*pmc 29*/ {{MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR2}, 6, PFM_REGT_ENAC}, /* IQ_CCCR0,2 */ -+ /*pmc 30*/ {{MSR_P4_IQ_CCCR1, MSR_P4_IQ_CCCR3}, 7, PFM_REGT_ENAC}, /* IQ_CCCR1,3 */ -+ /*pmc 31*/ {{MSR_P4_IQ_CCCR4, MSR_P4_IQ_CCCR5}, 8, PFM_REGT_ENAC}, /* IQ_CCCR4,5 */ -+ /* non HT extensions */ -+ /*pmc 32*/ {{MSR_P4_BPU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* BPU_ESCR1 */ -+ /*pmc 33*/ {{MSR_P4_IS_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* IS_ESCR1 */ -+ /*pmc 34*/ {{MSR_P4_MOB_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* MOB_ESCR1 */ -+ /*pmc 35*/ {{MSR_P4_ITLB_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* ITLB_ESCR1 */ -+ /*pmc 36*/ {{MSR_P4_PMH_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* PMH_ESCR1 */ -+ /*pmc 37*/ {{MSR_P4_IX_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* IX_ESCR1 */ -+ /*pmc 38*/ {{MSR_P4_FSB_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* FSB_ESCR1 */ -+ /*pmc 39*/ {{MSR_P4_BSU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* BSU_ESCR1 */ -+ /*pmc 40*/ {{MSR_P4_MS_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* MS_ESCR1 */ -+ /*pmc 41*/ {{MSR_P4_TC_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* TC_ESCR1 */ -+ /*pmc 42*/ {{MSR_P4_TBPU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* TBPU_ESCR1 */ -+ /*pmc 43*/ {{MSR_P4_FLAME_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* FLAME_ESCR1 */ -+ /*pmc 44*/ {{MSR_P4_FIRM_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* FIRM_ESCR1 */ -+ /*pmc 45*/ {{MSR_P4_SAAT_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* SAAT_ESCR1 */ -+ /*pmc 46*/ {{MSR_P4_U2L_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* U2L_ESCR1 */ -+ /*pmc 47*/ {{MSR_P4_DAC_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* DAC_ESCR1 */ -+ /*pmc 48*/ {{MSR_P4_IQ_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* IQ_ESCR1 (only model 1 and 2) */ -+ /*pmc 49*/ {{MSR_P4_ALF_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* ALF_ESCR1 */ -+ /*pmc 50*/ {{MSR_P4_RAT_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* RAT_ESCR1 */ -+ /*pmc 51*/ {{MSR_P4_CRU_ESCR1, 0}, 0, PFM_REGT_NHTESCR}, /* CRU_ESCR1 */ -+ /*pmc 52*/ {{MSR_P4_CRU_ESCR3, 0}, 0, PFM_REGT_NHTESCR}, /* CRU_ESCR3 */ -+ /*pmc 53*/ {{MSR_P4_CRU_ESCR5, 0}, 0, PFM_REGT_NHTESCR}, /* CRU_ESCR5 */ -+ /*pmc 54*/ {{MSR_P4_BPU_CCCR1, 0}, 9, PFM_REGT_NHTCCCR}, /* BPU_CCCR1 */ -+ /*pmc 55*/ {{MSR_P4_BPU_CCCR3, 0}, 10, PFM_REGT_NHTCCCR}, /* BPU_CCCR3 */ -+ /*pmc 56*/ {{MSR_P4_MS_CCCR1, 0}, 11, PFM_REGT_NHTCCCR}, /* MS_CCCR1 */ -+ /*pmc 57*/ {{MSR_P4_MS_CCCR3, 0}, 12, PFM_REGT_NHTCCCR}, /* MS_CCCR3 */ -+ /*pmc 58*/ {{MSR_P4_FLAME_CCCR1, 0}, 13, PFM_REGT_NHTCCCR}, /* FLAME_CCCR1 */ -+ /*pmc 59*/ {{MSR_P4_FLAME_CCCR3, 0}, 14, PFM_REGT_NHTCCCR}, /* FLAME_CCCR3 */ -+ /*pmc 60*/ {{MSR_P4_IQ_CCCR2, 0}, 15, PFM_REGT_NHTCCCR}, /* IQ_CCCR2 */ -+ /*pmc 61*/ {{MSR_P4_IQ_CCCR3, 0}, 16, PFM_REGT_NHTCCCR}, /* IQ_CCCR3 */ -+ /*pmc 62*/ {{MSR_P4_IQ_CCCR5, 0}, 17, PFM_REGT_NHTCCCR}, /* IQ_CCCR5 */ -+ /*pmc 63*/ {{0x3f2, 0}, 0, PFM_REGT_NHTPEBS},/* PEBS_MATRIX_VERT */ -+ /*pmc 64*/ {{0x3f1, 0}, 0, PFM_REGT_NHTPEBS} /* PEBS_ENABLE */ -+}; -+ -+static struct pfm_p4_regmap pmd_addrs[PFM_MAX_PMDS] = { -+ /*pmd 0 */ {{MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_PERFCTR2}, 0, PFM_REGT_CTR}, /* BPU_CTR0,2 */ -+ /*pmd 1 */ {{MSR_P4_BPU_PERFCTR1, MSR_P4_BPU_PERFCTR3}, 0, PFM_REGT_CTR}, /* BPU_CTR1,3 */ -+ /*pmd 2 */ {{MSR_P4_MS_PERFCTR0, MSR_P4_MS_PERFCTR2}, 0, PFM_REGT_CTR}, /* MS_CTR0,2 */ -+ /*pmd 3 */ {{MSR_P4_MS_PERFCTR1, MSR_P4_MS_PERFCTR3}, 0, PFM_REGT_CTR}, /* MS_CTR1,3 */ -+ /*pmd 4 */ {{MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_PERFCTR2}, 0, PFM_REGT_CTR}, /* FLAME_CTR0,2 */ -+ /*pmd 5 */ {{MSR_P4_FLAME_PERFCTR1, MSR_P4_FLAME_PERFCTR3}, 0, PFM_REGT_CTR}, /* FLAME_CTR1,3 */ -+ /*pmd 6 */ {{MSR_P4_IQ_PERFCTR0, MSR_P4_IQ_PERFCTR2}, 0, PFM_REGT_CTR}, /* IQ_CTR0,2 */ -+ /*pmd 7 */ {{MSR_P4_IQ_PERFCTR1, MSR_P4_IQ_PERFCTR3}, 0, PFM_REGT_CTR}, /* IQ_CTR1,3 */ -+ /*pmd 8 */ {{MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_PERFCTR5}, 0, PFM_REGT_CTR}, /* IQ_CTR4,5 */ -+ /* -+ * non HT extensions -+ */ -+ /*pmd 9 */ {{MSR_P4_BPU_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* BPU_CTR2 */ -+ /*pmd 10*/ {{MSR_P4_BPU_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* BPU_CTR3 */ -+ /*pmd 11*/ {{MSR_P4_MS_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* MS_CTR2 */ -+ /*pmd 12*/ {{MSR_P4_MS_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* MS_CTR3 */ -+ /*pmd 13*/ {{MSR_P4_FLAME_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* FLAME_CTR2 */ -+ /*pmd 14*/ {{MSR_P4_FLAME_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* FLAME_CTR3 */ -+ /*pmd 15*/ {{MSR_P4_IQ_PERFCTR2, 0}, 0, PFM_REGT_NHTCTR}, /* IQ_CTR2 */ -+ /*pmd 16*/ {{MSR_P4_IQ_PERFCTR3, 0}, 0, PFM_REGT_NHTCTR}, /* IQ_CTR3 */ -+ /*pmd 17*/ {{MSR_P4_IQ_PERFCTR5, 0}, 0, PFM_REGT_NHTCTR}, /* IQ_CTR5 */ -+}; -+ -+static struct pfm_arch_pmu_info pfm_p4_pmu_info = { -+ .write_pmc = pfm_p4_write_pmc, -+ .write_pmd = pfm_p4_write_pmd, -+ .read_pmc = pfm_p4_read_pmc, -+ .read_pmd = pfm_p4_read_pmd, -+ .create_context = pfm_p4_create_context, -+ .free_context = pfm_p4_free_context, -+ .has_ovfls = pfm_p4_has_ovfls, -+ .stop_save = pfm_p4_stop_save, -+ .restore_pmcs = pfm_p4_restore_pmcs, -+ .nmi_copy_state = pfm_p4_nmi_copy_state, -+ .quiesce = pfm_p4_quiesce -+}; -+ -+static struct pfm_regmap_desc pfm_p4_pmc_desc[] = { -+/* pmc0 */ PMC_D(PFM_REG_I, "BPU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BPU_ESCR0), -+/* pmc1 */ PMC_D(PFM_REG_I, "IS_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IQ_ESCR0), -+/* pmc2 */ PMC_D(PFM_REG_I, "MOB_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MOB_ESCR0), -+/* pmc3 */ PMC_D(PFM_REG_I, "ITLB_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ITLB_ESCR0), -+/* pmc4 */ PMC_D(PFM_REG_I, "PMH_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_PMH_ESCR0), -+/* pmc5 */ PMC_D(PFM_REG_I, "IX_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IX_ESCR0), -+/* pmc6 */ PMC_D(PFM_REG_I, "FSB_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FSB_ESCR0), -+/* pmc7 */ PMC_D(PFM_REG_I, "BSU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BSU_ESCR0), -+/* pmc8 */ PMC_D(PFM_REG_I, "MS_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MS_ESCR0), -+/* pmc9 */ PMC_D(PFM_REG_I, "TC_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TC_ESCR0), -+/* pmc10 */ PMC_D(PFM_REG_I, "TBPU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TBPU_ESCR0), -+/* pmc11 */ PMC_D(PFM_REG_I, "FLAME_ESCR0", 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FLAME_ESCR0), -+/* pmc12 */ PMC_D(PFM_REG_I, "FIRM_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FIRM_ESCR0), -+/* pmc13 */ PMC_D(PFM_REG_I, "SAAT_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_SAAT_ESCR0), -+/* pmc14 */ PMC_D(PFM_REG_I, "U2L_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_U2L_ESCR0), -+/* pmc15 */ PMC_D(PFM_REG_I, "DAC_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_DAC_ESCR0), -+/* pmc16 */ PMC_D(PFM_REG_I, "IQ_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IQ_ESCR0), /* only model 1 and 2*/ -+/* pmc17 */ PMC_D(PFM_REG_I, "ALF_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ALF_ESCR0), -+/* pmc18 */ PMC_D(PFM_REG_I, "RAT_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_RAT_ESCR0), -+/* pmc19 */ PMC_D(PFM_REG_I, "SSU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_SSU_ESCR0), -+/* pmc20 */ PMC_D(PFM_REG_I, "CRU_ESCR0" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR0), -+/* pmc21 */ PMC_D(PFM_REG_I, "CRU_ESCR2" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR2), -+/* pmc22 */ PMC_D(PFM_REG_I, "CRU_ESCR4" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR4), -+/* pmc23 */ PMC_D(PFM_REG_I64, "BPU_CCCR0" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR0), -+/* pmc24 */ PMC_D(PFM_REG_I64, "BPU_CCCR1" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR1), -+/* pmc25 */ PMC_D(PFM_REG_I64, "MS_CCCR0" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR0), -+/* pmc26 */ PMC_D(PFM_REG_I64, "MS_CCCR1" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR1), -+/* pmc27 */ PMC_D(PFM_REG_I64, "FLAME_CCCR0", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR0), -+/* pmc28 */ PMC_D(PFM_REG_I64, "FLAME_CCCR1", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR1), -+/* pmc29 */ PMC_D(PFM_REG_I64, "IQ_CCCR0" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR0), -+/* pmc30 */ PMC_D(PFM_REG_I64, "IQ_CCCR1" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR1), -+/* pmc31 */ PMC_D(PFM_REG_I64, "IQ_CCCR4" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR4), -+ /* No HT extension */ -+/* pmc32 */ PMC_D(PFM_REG_I, "BPU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BPU_ESCR1), -+/* pmc33 */ PMC_D(PFM_REG_I, "IS_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IS_ESCR1), -+/* pmc34 */ PMC_D(PFM_REG_I, "MOB_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MOB_ESCR1), -+/* pmc35 */ PMC_D(PFM_REG_I, "ITLB_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ITLB_ESCR1), -+/* pmc36 */ PMC_D(PFM_REG_I, "PMH_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_PMH_ESCR1), -+/* pmc37 */ PMC_D(PFM_REG_I, "IX_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IX_ESCR1), -+/* pmc38 */ PMC_D(PFM_REG_I, "FSB_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FSB_ESCR1), -+/* pmc39 */ PMC_D(PFM_REG_I, "BSU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_BSU_ESCR1), -+/* pmc40 */ PMC_D(PFM_REG_I, "MS_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_MS_ESCR1), -+/* pmc41 */ PMC_D(PFM_REG_I, "TC_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TC_ESCR1), -+/* pmc42 */ PMC_D(PFM_REG_I, "TBPU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_TBPU_ESCR1), -+/* pmc43 */ PMC_D(PFM_REG_I, "FLAME_ESCR1", 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FLAME_ESCR1), -+/* pmc44 */ PMC_D(PFM_REG_I, "FIRM_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_FIRM_ESCR1), -+/* pmc45 */ PMC_D(PFM_REG_I, "SAAT_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_SAAT_ESCR1), -+/* pmc46 */ PMC_D(PFM_REG_I, "U2L_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_U2L_ESCR1), -+/* pmc47 */ PMC_D(PFM_REG_I, "DAC_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_DAC_ESCR1), -+/* pmc48 */ PMC_D(PFM_REG_I, "IQ_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_IQ_ESCR1), /* only model 1 and 2 */ -+/* pmc49 */ PMC_D(PFM_REG_I, "ALF_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_ALF_ESCR1), -+/* pmc50 */ PMC_D(PFM_REG_I, "RAT_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_RAT_ESCR1), -+/* pmc51 */ PMC_D(PFM_REG_I, "CRU_ESCR1" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR1), -+/* pmc52 */ PMC_D(PFM_REG_I, "CRU_ESCR3" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR3), -+/* pmc53 */ PMC_D(PFM_REG_I, "CRU_ESCR5" , 0x0, PFM_ESCR_RSVD, 0, MSR_P4_CRU_ESCR5), -+/* pmc54 */ PMC_D(PFM_REG_I64, "BPU_CCCR2" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR2), -+/* pmc55 */ PMC_D(PFM_REG_I64, "BPU_CCCR3" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_BPU_CCCR3), -+/* pmc56 */ PMC_D(PFM_REG_I64, "MS_CCCR2" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR2), -+/* pmc57 */ PMC_D(PFM_REG_I64, "MS_CCCR3" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_MS_CCCR3), -+/* pmc58 */ PMC_D(PFM_REG_I64, "FLAME_CCCR2", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR2), -+/* pmc59 */ PMC_D(PFM_REG_I64, "FLAME_CCCR3", PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_FLAME_CCCR3), -+/* pmc60 */ PMC_D(PFM_REG_I64, "IQ_CCCR2" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR2), -+/* pmc61 */ PMC_D(PFM_REG_I64, "IQ_CCCR3" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR3), -+/* pmc62 */ PMC_D(PFM_REG_I64, "IQ_CCCR5" , PFM_CCCR_DFL, PFM_CCCR_RSVD, PFM_P4_NO64, MSR_P4_IQ_CCCR5), -+/* pmc63 */ PMC_D(PFM_REG_I, "PEBS_MATRIX_VERT", 0, 0xffffffffffffffecULL, 0, 0x3f2), -+/* pmc64 */ PMC_D(PFM_REG_I, "PEBS_ENABLE", 0, 0xfffffffff8ffe000ULL, 0, 0x3f1) -+}; -+#define PFM_P4_NUM_PMCS ARRAY_SIZE(pfm_p4_pmc_desc) -+ -+/* -+ * See section 15.10.6.6 for details about the IQ block -+ */ -+static struct pfm_regmap_desc pfm_p4_pmd_desc[] = { -+/* pmd0 */ PMD_D(PFM_REG_C, "BPU_CTR0", MSR_P4_BPU_PERFCTR0), -+/* pmd1 */ PMD_D(PFM_REG_C, "BPU_CTR1", MSR_P4_BPU_PERFCTR1), -+/* pmd2 */ PMD_D(PFM_REG_C, "MS_CTR0", MSR_P4_MS_PERFCTR0), -+/* pmd3 */ PMD_D(PFM_REG_C, "MS_CTR1", MSR_P4_MS_PERFCTR1), -+/* pmd4 */ PMD_D(PFM_REG_C, "FLAME_CTR0", MSR_P4_FLAME_PERFCTR0), -+/* pmd5 */ PMD_D(PFM_REG_C, "FLAME_CTR1", MSR_P4_FLAME_PERFCTR1), -+/* pmd6 */ PMD_D(PFM_REG_C, "IQ_CTR0", MSR_P4_IQ_PERFCTR0), -+/* pmd7 */ PMD_D(PFM_REG_C, "IQ_CTR1", MSR_P4_IQ_PERFCTR1), -+/* pmd8 */ PMD_D(PFM_REG_C, "IQ_CTR4", MSR_P4_IQ_PERFCTR4), -+ /* no HT extension */ -+/* pmd9 */ PMD_D(PFM_REG_C, "BPU_CTR2", MSR_P4_BPU_PERFCTR2), -+/* pmd10 */ PMD_D(PFM_REG_C, "BPU_CTR3", MSR_P4_BPU_PERFCTR3), -+/* pmd11 */ PMD_D(PFM_REG_C, "MS_CTR2", MSR_P4_MS_PERFCTR2), -+/* pmd12 */ PMD_D(PFM_REG_C, "MS_CTR3", MSR_P4_MS_PERFCTR3), -+/* pmd13 */ PMD_D(PFM_REG_C, "FLAME_CTR2", MSR_P4_FLAME_PERFCTR2), -+/* pmd14 */ PMD_D(PFM_REG_C, "FLAME_CTR3", MSR_P4_FLAME_PERFCTR3), -+/* pmd15 */ PMD_D(PFM_REG_C, "IQ_CTR2", MSR_P4_IQ_PERFCTR2), -+/* pmd16 */ PMD_D(PFM_REG_C, "IQ_CTR3", MSR_P4_IQ_PERFCTR3), -+/* pmd17 */ PMD_D(PFM_REG_C, "IQ_CTR5", MSR_P4_IQ_PERFCTR5) -+}; -+#define PFM_P4_NUM_PMDS ARRAY_SIZE(pfm_p4_pmd_desc) -+ -+/* -+ * Due to hotplug CPU support, threads may not necessarily -+ * be activated at the time the module is inserted. We need -+ * to check whether they could be activated by looking at -+ * the present CPU (present != online). -+ */ -+static int pfm_p4_probe_pmu(void) -+{ -+ unsigned int i; -+ int ht_enabled; -+ -+ /* -+ * only works on Intel processors -+ */ -+ if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL) { -+ PFM_INFO("not running on Intel processor"); -+ return -1; -+ } -+ -+ if (current_cpu_data.x86 != 15) { -+ PFM_INFO("unsupported family=%d", current_cpu_data.x86); -+ return -1; -+ } -+ -+ switch (current_cpu_data.x86_model) { -+ case 0 ... 2: -+ break; -+ case 3 ... 6: -+ /* -+ * IQ_ESCR0, IQ_ESCR1 only present on model 1, 2 -+ */ -+ pfm_p4_pmc_desc[16].type = PFM_REG_NA; -+ pfm_p4_pmc_desc[48].type = PFM_REG_NA; -+ break; -+ default: -+ /* -+ * do not know if they all work the same, so reject -+ * for now -+ */ -+ if (!force) { -+ PFM_INFO("unsupported model %d", -+ current_cpu_data.x86_model); -+ return -1; -+ } -+ } -+ -+ /* -+ * check for local APIC (required) -+ */ -+ if (!cpu_has_apic) { -+ PFM_INFO("no local APIC, unsupported"); -+ return -1; -+ } -+#ifdef CONFIG_SMP -+ ht_enabled = (cpus_weight(__get_cpu_var(cpu_core_map)) -+ / current_cpu_data.x86_max_cores) > 1; -+#else -+ ht_enabled = 0; -+#endif -+ if (cpu_has_ht) { -+ -+ PFM_INFO("HyperThreading supported, status %s", -+ ht_enabled ? "on": "off"); -+ /* -+ * disable registers not supporting HT -+ */ -+ if (ht_enabled) { -+ PFM_INFO("disabling half the registers for HT"); -+ for (i = 0; i < PFM_P4_NUM_PMCS; i++) { -+ if (pmc_addrs[(i)].reg_type & PFM_REGT_NOHT) -+ pfm_p4_pmc_desc[i].type = PFM_REG_NA; -+ } -+ for (i = 0; i < PFM_P4_NUM_PMDS; i++) { -+ if (pmd_addrs[(i)].reg_type & PFM_REGT_NOHT) -+ pfm_p4_pmd_desc[i].type = PFM_REG_NA; -+ } -+ } -+ } -+ -+ if (cpu_has_ds) { -+ PFM_INFO("Data Save Area (DS) supported"); -+ -+ if (cpu_has_pebs) { -+ /* -+ * PEBS does not work with HyperThreading enabled -+ */ -+ if (ht_enabled) -+ PFM_INFO("PEBS supported, status off (because of HT)"); -+ else -+ PFM_INFO("PEBS supported, status on"); -+ } -+ } -+ -+ /* -+ * build enable mask -+ */ -+ for (i = 0; i < PFM_P4_NUM_PMCS; i++) { -+ if (pmc_addrs[(i)].reg_type & PFM_REGT_EN) { -+ __set_bit(i, cast_ulp(enable_mask)); -+ max_enable = i + 1; -+ } -+ } -+ -+ if (force_nmi) -+ pfm_p4_pmu_info.flags |= PFM_X86_FL_USE_NMI; -+ return 0; -+} -+static inline int get_smt_id(void) -+{ -+#ifdef CONFIG_SMP -+ int cpu = smp_processor_id(); -+ return (cpu != first_cpu(__get_cpu_var(cpu_sibling_map))); -+#else -+ return 0; -+#endif -+} -+ -+static void __pfm_write_reg_p4(const struct pfm_p4_regmap *xreg, u64 val) -+{ -+ u64 pmi; -+ int smt_id; -+ -+ smt_id = get_smt_id(); -+ /* -+ * HT is only supported by P4-style PMU -+ * -+ * Adjust for T1 if necessary: -+ * -+ * - move the T0_OS/T0_USR bits into T1 slots -+ * - move the OVF_PMI_T0 bits into T1 slot -+ * -+ * The P4/EM64T T1 is cleared by description table. -+ * User only works with T0. -+ */ -+ if (smt_id) { -+ if (xreg->reg_type & PFM_REGT_ESCR) { -+ -+ /* copy T0_USR & T0_OS to T1 */ -+ val |= ((val & 0xc) >> 2); -+ -+ /* clear bits T0_USR & T0_OS */ -+ val &= ~0xc; -+ -+ } else if (xreg->reg_type & PFM_REGT_CCCR) { -+ pmi = (val >> 26) & 0x1; -+ if (pmi) { -+ val &= ~(1UL<<26); -+ val |= 1UL<<27; -+ } -+ } -+ } -+ if (xreg->addrs[smt_id]) -+ wrmsrl(xreg->addrs[smt_id], val); -+} -+ -+void __pfm_read_reg_p4(const struct pfm_p4_regmap *xreg, u64 *val) -+{ -+ int smt_id; -+ -+ smt_id = get_smt_id(); -+ -+ if (likely(xreg->addrs[smt_id])) { -+ rdmsrl(xreg->addrs[smt_id], *val); -+ /* -+ * HT is only supported by P4-style PMU -+ * -+ * move the Tx_OS and Tx_USR bits into -+ * T0 slots setting the T1 slots to zero -+ */ -+ if (xreg->reg_type & PFM_REGT_ESCR) { -+ if (smt_id) -+ *val |= (((*val) & 0x3) << 2); -+ -+ /* -+ * zero out bits that are reserved -+ * (including T1_OS and T1_USR) -+ */ -+ *val &= PFM_ESCR_RSVD; -+ } -+ } else { -+ *val = 0; -+ } -+} -+static void pfm_p4_write_pmc(struct pfm_context *ctx, unsigned int cnum, u64 value) -+{ -+ __pfm_write_reg_p4(&pmc_addrs[cnum], value); -+} -+ -+static void pfm_p4_write_pmd(struct pfm_context *ctx, unsigned int cnum, u64 value) -+{ -+ __pfm_write_reg_p4(&pmd_addrs[cnum], value); -+} -+ -+static u64 pfm_p4_read_pmd(struct pfm_context *ctx, unsigned int cnum) -+{ -+ u64 tmp; -+ __pfm_read_reg_p4(&pmd_addrs[cnum], &tmp); -+ return tmp; -+} -+ -+static u64 pfm_p4_read_pmc(struct pfm_context *ctx, unsigned int cnum) -+{ -+ u64 tmp; -+ __pfm_read_reg_p4(&pmc_addrs[cnum], &tmp); -+ return tmp; -+} -+ -+struct pfm_ds_area_p4 { -+ unsigned long bts_buf_base; -+ unsigned long bts_index; -+ unsigned long bts_abs_max; -+ unsigned long bts_intr_thres; -+ unsigned long pebs_buf_base; -+ unsigned long pebs_index; -+ unsigned long pebs_abs_max; -+ unsigned long pebs_intr_thres; -+ u64 pebs_cnt_reset; -+}; -+ -+ -+static int pfm_p4_stop_save(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ struct pfm_arch_context *ctx_arch; -+ struct pfm_ds_area_p4 *ds = NULL; -+ u64 used_mask[PFM_PMC_BV]; -+ u16 i, j, count, pebs_idx = ~0; -+ u16 max_pmc; -+ u64 cccr, ctr1, ctr2, ovfl_mask; -+ -+ pmu_info = &pfm_p4_pmu_info; -+ ctx_arch = pfm_ctx_arch(ctx); -+ max_pmc = ctx->regs.max_pmc; -+ ovfl_mask = pfm_pmu_conf->ovfl_mask; -+ -+ /* -+ * build used enable PMC bitmask -+ * if user did not set any CCCR, then mask is -+ * empty and there is nothing to do because nothing -+ * was started -+ */ -+ bitmap_and(cast_ulp(used_mask), -+ cast_ulp(set->used_pmcs), -+ cast_ulp(enable_mask), -+ max_enable); -+ -+ count = bitmap_weight(cast_ulp(used_mask), max_enable); -+ -+ PFM_DBG_ovfl("npend=%u ena_mask=0x%llx u_pmcs=0x%llx count=%u num=%u", -+ set->npend_ovfls, -+ (unsigned long long)enable_mask[0], -+ (unsigned long long)set->used_pmcs[0], -+ count, max_enable); -+ -+ /* -+ * ensures we do not destroy pending overflow -+ * information. If pended interrupts are already -+ * known, then we just stop monitoring. -+ */ -+ if (set->npend_ovfls) { -+ /* -+ * clear enable bit -+ * unfortunately, this is very expensive! -+ */ -+ for (i = 0; count; i++) { -+ if (test_bit(i, cast_ulp(used_mask))) { -+ __pfm_write_reg_p4(pmc_addrs+i, 0); -+ count--; -+ } -+ } -+ /* need save PMDs at upper level */ -+ return 1; -+ } -+ -+ if (ctx_arch->flags.use_pebs) { -+ ds = ctx_arch->ds_area; -+ pebs_idx = PEBS_PMD; -+ PFM_DBG("ds=%p pebs_idx=0x%llx thres=0x%llx", -+ ds, -+ (unsigned long long)ds->pebs_index, -+ (unsigned long long)ds->pebs_intr_thres); -+ } -+ -+ /* -+ * stop monitoring AND collect pending overflow information AND -+ * save pmds. -+ * -+ * We need to access the CCCR twice, once to get overflow info -+ * and a second to stop monitoring (which destroys the OVF flag) -+ * Similarly, we need to read the counter twice to check whether -+ * it did overflow between the CCR read and the CCCR write. -+ */ -+ for (i = 0; count; i++) { -+ if (i != pebs_idx && test_bit(i, cast_ulp(used_mask))) { -+ /* -+ * controlled counter -+ */ -+ j = pmc_addrs[i].ctr; -+ -+ /* read CCCR (PMC) value */ -+ __pfm_read_reg_p4(pmc_addrs+i, &cccr); -+ -+ /* read counter (PMD) controlled by PMC */ -+ __pfm_read_reg_p4(pmd_addrs+j, &ctr1); -+ -+ /* clear CCCR value: stop counter but destroy OVF */ -+ __pfm_write_reg_p4(pmc_addrs+i, 0); -+ -+ /* read counter controlled by CCCR again */ -+ __pfm_read_reg_p4(pmd_addrs+j, &ctr2); -+ -+ /* -+ * there is an overflow if either: -+ * - CCCR.ovf is set (and we just cleared it) -+ * - ctr2 < ctr1 -+ * in that case we set the bit corresponding to the -+ * overflowed PMD in povfl_pmds. -+ */ -+ if ((cccr & (1ULL<<31)) || (ctr2 < ctr1)) { -+ __set_bit(j, cast_ulp(set->povfl_pmds)); -+ set->npend_ovfls++; -+ } -+ ctr2 = (set->pmds[j].value & ~ovfl_mask) | (ctr2 & ovfl_mask); -+ set->pmds[j].value = ctr2; -+ count--; -+ } -+ } -+ /* -+ * check for PEBS buffer full and set the corresponding PMD overflow -+ */ -+ if (ctx_arch->flags.use_pebs) { -+ PFM_DBG("ds=%p pebs_idx=0x%lx thres=0x%lx", ds, ds->pebs_index, ds->pebs_intr_thres); -+ if (ds->pebs_index >= ds->pebs_intr_thres -+ && test_bit(PEBS_PMD, cast_ulp(set->used_pmds))) { -+ __set_bit(PEBS_PMD, cast_ulp(set->povfl_pmds)); -+ set->npend_ovfls++; -+ } -+ } -+ /* 0 means: no need to save the PMD at higher level */ -+ return 0; -+} -+ -+static int pfm_p4_create_context(struct pfm_context *ctx, u32 ctx_flags) -+{ -+ struct pfm_arch_context *ctx_arch; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ -+ ctx_arch->data = kzalloc(sizeof(struct pfm_arch_p4_context), GFP_KERNEL); -+ if (!ctx_arch->data) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+static void pfm_p4_free_context(struct pfm_context *ctx) -+{ -+ struct pfm_arch_context *ctx_arch; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ /* -+ * we do not check if P4, because it would be NULL and -+ * kfree can deal with NULL -+ */ -+ kfree(ctx_arch->data); -+} -+ -+/* -+ * detect is counters have overflowed. -+ * return: -+ * 0 : no overflow -+ * 1 : at least one overflow -+ * -+ * used by Intel P4 -+ */ -+static int __kprobes pfm_p4_has_ovfls(struct pfm_context *ctx) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ struct pfm_p4_regmap *xrc, *xrd; -+ struct pfm_arch_context *ctx_arch; -+ struct pfm_arch_p4_context *p4; -+ u64 ena_mask[PFM_PMC_BV]; -+ u64 cccr, ctr1, ctr2; -+ int n, i, j; -+ -+ pmu_info = &pfm_p4_pmu_info; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ xrc = pmc_addrs; -+ xrd = pmd_addrs; -+ p4 = ctx_arch->data; -+ -+ bitmap_and(cast_ulp(ena_mask), -+ cast_ulp(ctx->regs.pmcs), -+ cast_ulp(enable_mask), -+ max_enable); -+ -+ n = bitmap_weight(cast_ulp(ena_mask), max_enable); -+ -+ for (i = 0; n; i++) { -+ if (!test_bit(i, cast_ulp(ena_mask))) -+ continue; -+ /* -+ * controlled counter -+ */ -+ j = xrc[i].ctr; -+ -+ /* read CCCR (PMC) value */ -+ __pfm_read_reg_p4(xrc+i, &cccr); -+ -+ /* read counter (PMD) controlled by PMC */ -+ __pfm_read_reg_p4(xrd+j, &ctr1); -+ -+ /* clear CCCR value: stop counter but destroy OVF */ -+ __pfm_write_reg_p4(xrc+i, 0); -+ -+ /* read counter controlled by CCCR again */ -+ __pfm_read_reg_p4(xrd+j, &ctr2); -+ -+ /* -+ * there is an overflow if either: -+ * - CCCR.ovf is set (and we just cleared it) -+ * - ctr2 < ctr1 -+ * in that case we set the bit corresponding to the -+ * overflowed PMD in povfl_pmds. -+ */ -+ if ((cccr & (1ULL<<31)) || (ctr2 < ctr1)) { -+ __set_bit(j, cast_ulp(p4->povfl_pmds)); -+ p4->npend_ovfls++; -+ } -+ p4->saved_cccrs[i] = cccr; -+ n--; -+ } -+ /* -+ * if there was no overflow, then it means the NMI was not really -+ * for us, so we have to resume monitoring -+ */ -+ if (unlikely(!p4->npend_ovfls)) { -+ for (i = 0; n; i++) { -+ if (!test_bit(i, cast_ulp(ena_mask))) -+ continue; -+ __pfm_write_reg_p4(xrc+i, p4->saved_cccrs[i]); -+ } -+ } -+ return 0; -+} -+ -+void pfm_p4_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ struct pfm_arch_context *ctx_arch; -+ u64 *mask; -+ u16 i, num; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ pmu_info = pfm_pmu_info(); -+ -+ /* -+ * must restore DS pointer before restoring PMCs -+ * as this can potentially reactivate monitoring -+ */ -+ if (ctx_arch->flags.use_ds) -+ wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ctx_arch->ds_area); -+ -+ /* -+ * must restore everything because there are some dependencies -+ * (e.g., ESCR and CCCR) -+ */ -+ num = ctx->regs.num_pmcs; -+ mask = ctx->regs.pmcs; -+ for (i = 0; num; i++) { -+ if (test_bit(i, cast_ulp(mask))) { -+ pfm_arch_write_pmc(ctx, i, set->pmcs[i]); -+ num--; -+ } -+ } -+} -+ -+/* -+ * invoked only when NMI is used. Called from the LOCAL_PERFMON_VECTOR -+ * handler to copy P4 overflow state captured when the NMI triggered. -+ * Given that on P4, stopping monitoring destroy the overflow information -+ * we save it in pfm_has_ovfl_p4() where monitoring is also stopped. -+ * -+ * Here we propagate the overflow state to current active set. The -+ * freeze_pmu() call we not overwrite this state because npend_ovfls -+ * is non-zero. -+ */ -+static void pfm_p4_nmi_copy_state(struct pfm_context *ctx) -+{ -+ struct pfm_arch_context *ctx_arch; -+ struct pfm_event_set *set; -+ struct pfm_arch_p4_context *p4; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ p4 = ctx_arch->data; -+ set = ctx->active_set; -+ -+ if (p4->npend_ovfls) { -+ set->npend_ovfls = p4->npend_ovfls; -+ -+ bitmap_copy(cast_ulp(set->povfl_pmds), -+ cast_ulp(p4->povfl_pmds), -+ ctx->regs.max_pmd); -+ -+ p4->npend_ovfls = 0; -+ } -+} -+ -+/** -+ * pfm_p4_quiesce - stop monitoring without grabbing any lock -+ * -+ * called from NMI interrupt handler to immediately stop monitoring -+ * cannot grab any lock, including perfmon related locks -+ */ -+static void __kprobes pfm_p4_quiesce(void) -+{ -+ u16 i; -+ /* -+ * quiesce PMU by clearing available registers that have -+ * the start/stop capability -+ */ -+ for (i = 0; i < pfm_pmu_conf->regs_all.max_pmc; i++) { -+ if (test_bit(i, cast_ulp(pfm_pmu_conf->regs_all.pmcs)) -+ && test_bit(i, cast_ulp(enable_mask))) -+ __pfm_write_reg_p4(pmc_addrs+i, 0); -+ } -+} -+ -+ -+static struct pfm_pmu_config pfm_p4_pmu_conf = { -+ .pmu_name = "Intel P4", -+ .counter_width = 40, -+ .pmd_desc = pfm_p4_pmd_desc, -+ .pmc_desc = pfm_p4_pmc_desc, -+ .num_pmc_entries = PFM_P4_NUM_PMCS, -+ .num_pmd_entries = PFM_P4_NUM_PMDS, -+ .probe_pmu = pfm_p4_probe_pmu, -+ .version = "1.0", -+ .flags = PFM_PMU_BUILTIN_FLAG, -+ .owner = THIS_MODULE, -+ .pmu_info = &pfm_p4_pmu_info -+}; -+ -+static int __init pfm_p4_pmu_init_module(void) -+{ -+ return pfm_pmu_register(&pfm_p4_pmu_conf); -+} -+ -+static void __exit pfm_p4_pmu_cleanup_module(void) -+{ -+ pfm_pmu_unregister(&pfm_p4_pmu_conf); -+} -+ -+module_init(pfm_p4_pmu_init_module); -+module_exit(pfm_p4_pmu_cleanup_module); -diff --git a/arch/x86/perfmon/perfmon_p6.c b/arch/x86/perfmon/perfmon_p6.c -new file mode 100644 -index 0000000..47c0a46 ---- /dev/null -+++ b/arch/x86/perfmon/perfmon_p6.c -@@ -0,0 +1,310 @@ -+/* -+ * This file contains the P6 family processor PMU register description tables -+ * -+ * This module supports original P6 processors -+ * (Pentium II, Pentium Pro, Pentium III) and Pentium M. -+ * -+ * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+#include -+#include -+ -+MODULE_AUTHOR("Stephane Eranian "); -+MODULE_DESCRIPTION("P6 PMU description table"); -+MODULE_LICENSE("GPL"); -+ -+static int force_nmi; -+MODULE_PARM_DESC(force_nmi, "bool: force use of NMI for PMU interrupt"); -+module_param(force_nmi, bool, 0600); -+ -+/* -+ * - upper 32 bits are reserved -+ * - INT: APIC enable bit is reserved (forced to 1) -+ * - bit 21 is reserved -+ * - bit 22 is reserved on PEREVNTSEL1 -+ * -+ * RSVD: reserved bits are 1 -+ */ -+#define PFM_P6_PMC0_RSVD ((~((1ULL<<32)-1)) | (1ULL<<20) | (1ULL<<21)) -+#define PFM_P6_PMC1_RSVD ((~((1ULL<<32)-1)) | (1ULL<<20) | (3ULL<<21)) -+ -+/* -+ * force Local APIC interrupt on overflow -+ * disable with NO_EMUL64 -+ */ -+#define PFM_P6_PMC_VAL (1ULL<<20) -+#define PFM_P6_NO64 (1ULL<<20) -+ -+ -+static void __kprobes pfm_p6_quiesce(void); -+static int pfm_p6_has_ovfls(struct pfm_context *ctx); -+static int pfm_p6_stop_save(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+ -+static u64 enable_mask[PFM_MAX_PMCS]; -+static u16 max_enable; -+ -+/* -+ * PFM_X86_FL_NO_SHARING: because of the single enable bit on MSR_P6_EVNTSEL0 -+ * the PMU cannot be shared with NMI watchdog or Oprofile -+ */ -+struct pfm_arch_pmu_info pfm_p6_pmu_info = { -+ .stop_save = pfm_p6_stop_save, -+ .has_ovfls = pfm_p6_has_ovfls, -+ .quiesce = pfm_p6_quiesce, -+ .flags = PFM_X86_FL_NO_SHARING, -+}; -+ -+static struct pfm_regmap_desc pfm_p6_pmc_desc[] = { -+/* pmc0 */ PMC_D(PFM_REG_I64, "PERFEVTSEL0", PFM_P6_PMC_VAL, PFM_P6_PMC0_RSVD, PFM_P6_NO64, MSR_P6_EVNTSEL0), -+/* pmc1 */ PMC_D(PFM_REG_I64, "PERFEVTSEL1", PFM_P6_PMC_VAL, PFM_P6_PMC1_RSVD, PFM_P6_NO64, MSR_P6_EVNTSEL1) -+}; -+#define PFM_P6_NUM_PMCS ARRAY_SIZE(pfm_p6_pmc_desc) -+ -+#define PFM_P6_D(n) \ -+ { .type = PFM_REG_C, \ -+ .desc = "PERFCTR"#n, \ -+ .hw_addr = MSR_P6_PERFCTR0+n, \ -+ .rsvd_msk = 0, \ -+ .dep_pmcs[0] = 1ULL << n \ -+ } -+ -+static struct pfm_regmap_desc pfm_p6_pmd_desc[] = { -+/* pmd0 */ PFM_P6_D(0), -+/* pmd1 */ PFM_P6_D(1) -+}; -+#define PFM_P6_NUM_PMDS ARRAY_SIZE(pfm_p6_pmd_desc) -+ -+static int pfm_p6_probe_pmu(void) -+{ -+ int high, low; -+ -+ if (current_cpu_data.x86_vendor != X86_VENDOR_INTEL) { -+ PFM_INFO("not an Intel processor"); -+ return -1; -+ } -+ -+ /* -+ * check for P6 processor family -+ */ -+ if (current_cpu_data.x86 != 6) { -+ PFM_INFO("unsupported family=%d", current_cpu_data.x86); -+ return -1; -+ } -+ -+ switch (current_cpu_data.x86_model) { -+ case 1: /* Pentium Pro */ -+ case 3: -+ case 5: /* Pentium II Deschutes */ -+ case 7 ... 11: -+ break; -+ case 13: -+ /* for Pentium M, we need to check if PMU exist */ -+ rdmsr(MSR_IA32_MISC_ENABLE, low, high); -+ if (low & (1U << 7)) -+ break; -+ default: -+ PFM_INFO("unsupported CPU model %d", -+ current_cpu_data.x86_model); -+ return -1; -+ -+ } -+ -+ if (!cpu_has_apic) { -+ PFM_INFO("no Local APIC, try rebooting with lapic"); -+ return -1; -+ } -+ __set_bit(0, cast_ulp(enable_mask)); -+ __set_bit(1, cast_ulp(enable_mask)); -+ max_enable = 1 + 1; -+ /* -+ * force NMI interrupt? -+ */ -+ if (force_nmi) -+ pfm_p6_pmu_info.flags |= PFM_X86_FL_USE_NMI; -+ -+ return 0; -+} -+ -+/** -+ * pfm_p6_has_ovfls - check for pending overflow condition -+ * @ctx: context to work on -+ * -+ * detect if counters have overflowed. -+ * return: -+ * 0 : no overflow -+ * 1 : at least one overflow -+ */ -+static int __kprobes pfm_p6_has_ovfls(struct pfm_context *ctx) -+{ -+ u64 *cnt_mask; -+ u64 wmask, val; -+ u16 i, num; -+ -+ cnt_mask = ctx->regs.cnt_pmds; -+ num = ctx->regs.num_counters; -+ wmask = 1ULL << pfm_pmu_conf->counter_width; -+ -+ /* -+ * we can leverage the fact that we know the mapping -+ * to hardcode the MSR address and avoid accessing -+ * more cachelines -+ * -+ * We need to check cnt_mask because not all registers -+ * may be available. -+ */ -+ for (i = 0; num; i++) { -+ if (test_bit(i, cast_ulp(cnt_mask))) { -+ rdmsrl(MSR_P6_PERFCTR0+i, val); -+ if (!(val & wmask)) -+ return 1; -+ num--; -+ } -+ } -+ return 0; -+} -+ -+/** -+ * pfm_p6_stop_save -- stop monitoring and save PMD values -+ * @ctx: context to work on -+ * @set: current event set -+ * -+ * return value: -+ * 0 - no need to save PMDs in caller -+ * 1 - need to save PMDs in caller -+ */ -+static int pfm_p6_stop_save(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ u64 used_mask[PFM_PMC_BV]; -+ u64 *cnt_pmds; -+ u64 val, wmask, ovfl_mask; -+ u32 i, count; -+ -+ pmu_info = pfm_pmu_info(); -+ -+ wmask = 1ULL << pfm_pmu_conf->counter_width; -+ bitmap_and(cast_ulp(used_mask), -+ cast_ulp(set->used_pmcs), -+ cast_ulp(enable_mask), -+ max_enable); -+ -+ count = bitmap_weight(cast_ulp(used_mask), ctx->regs.max_pmc); -+ -+ /* -+ * stop monitoring -+ * Unfortunately, this is very expensive! -+ * wrmsrl() is serializing. -+ */ -+ for (i = 0; count; i++) { -+ if (test_bit(i, cast_ulp(used_mask))) { -+ wrmsrl(MSR_P6_EVNTSEL0+i, 0); -+ count--; -+ } -+ } -+ -+ /* -+ * if we already having a pending overflow condition, we simply -+ * return to take care of this first. -+ */ -+ if (set->npend_ovfls) -+ return 1; -+ -+ ovfl_mask = pfm_pmu_conf->ovfl_mask; -+ cnt_pmds = ctx->regs.cnt_pmds; -+ -+ /* -+ * check for pending overflows and save PMDs (combo) -+ * we employ used_pmds because we also need to save -+ * and not just check for pending interrupts. -+ * -+ * Must check for counting PMDs because of virtual PMDs -+ */ -+ count = set->nused_pmds; -+ for (i = 0; count; i++) { -+ if (test_bit(i, cast_ulp(set->used_pmds))) { -+ val = pfm_arch_read_pmd(ctx, i); -+ if (likely(test_bit(i, cast_ulp(cnt_pmds)))) { -+ if (!(val & wmask)) { -+ __set_bit(i, cast_ulp(set->povfl_pmds)); -+ set->npend_ovfls++; -+ } -+ val = (set->pmds[i].value & ~ovfl_mask) | (val & ovfl_mask); -+ } -+ set->pmds[i].value = val; -+ count--; -+ } -+ } -+ /* 0 means: no need to save PMDs at upper level */ -+ return 0; -+} -+ -+/** -+ * pfm_p6_quiesce_pmu -- stop monitoring without grabbing any lock -+ * -+ * called from NMI interrupt handler to immediately stop monitoring -+ * cannot grab any lock, including perfmon related locks -+ */ -+static void __kprobes pfm_p6_quiesce(void) -+{ -+ /* -+ * quiesce PMU by clearing available registers that have -+ * the start/stop capability -+ * -+ * P6 processors only have enable bit on PERFEVTSEL0 -+ */ -+ if (test_bit(0, cast_ulp(pfm_pmu_conf->regs_all.pmcs))) -+ wrmsrl(MSR_P6_EVNTSEL0, 0); -+} -+ -+/* -+ * Counters have 40 bits implemented. However they are designed such -+ * that bits [32-39] are sign extensions of bit 31. As such the -+ * effective width of a counter for P6-like PMU is 31 bits only. -+ * -+ * See IA-32 Intel Architecture Software developer manual Vol 3B -+ */ -+static struct pfm_pmu_config pfm_p6_pmu_conf = { -+ .pmu_name = "Intel P6 processor Family", -+ .counter_width = 31, -+ .pmd_desc = pfm_p6_pmd_desc, -+ .pmc_desc = pfm_p6_pmc_desc, -+ .num_pmc_entries = PFM_P6_NUM_PMCS, -+ .num_pmd_entries = PFM_P6_NUM_PMDS, -+ .probe_pmu = pfm_p6_probe_pmu, -+ .version = "1.0", -+ .flags = PFM_PMU_BUILTIN_FLAG, -+ .owner = THIS_MODULE, -+ .pmu_info = &pfm_p6_pmu_info -+}; -+ -+static int __init pfm_p6_pmu_init_module(void) -+{ -+ return pfm_pmu_register(&pfm_p6_pmu_conf); -+} -+ -+static void __exit pfm_p6_pmu_cleanup_module(void) -+{ -+ pfm_pmu_unregister(&pfm_p6_pmu_conf); -+} -+ -+module_init(pfm_p6_pmu_init_module); -+module_exit(pfm_p6_pmu_cleanup_module); -diff --git a/arch/x86/perfmon/perfmon_pebs_core_smpl.c b/arch/x86/perfmon/perfmon_pebs_core_smpl.c -new file mode 100644 -index 0000000..eeb9174 ---- /dev/null -+++ b/arch/x86/perfmon/perfmon_pebs_core_smpl.c -@@ -0,0 +1,256 @@ -+/* -+ * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This file implements the Precise Event Based Sampling (PEBS) -+ * sampling format for Intel Core and Atom processors. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+MODULE_AUTHOR("Stephane Eranian "); -+MODULE_DESCRIPTION("Intel Core Precise Event-Based Sampling (PEBS)"); -+MODULE_LICENSE("GPL"); -+ -+#define ALIGN_PEBS(a, order) \ -+ ((a)+(1UL<<(order))-1) & ~((1UL<<(order))-1) -+ -+#define PEBS_PADDING_ORDER 8 /* log2(256) padding for PEBS alignment constraint */ -+ -+static int pfm_pebs_core_fmt_validate(u32 flags, u16 npmds, void *data) -+{ -+ struct pfm_pebs_core_smpl_arg *arg = data; -+ size_t min_buf_size; -+ -+ /* -+ * need to define at least the size of the buffer -+ */ -+ if (data == NULL) { -+ PFM_DBG("no argument passed"); -+ return -EINVAL; -+ } -+ -+ /* -+ * compute min buf size. npmds is the maximum number -+ * of implemented PMD registers. -+ */ -+ min_buf_size = sizeof(struct pfm_pebs_core_smpl_hdr) -+ + sizeof(struct pfm_pebs_core_smpl_entry) -+ + (1UL<buf_size); -+ -+ /* -+ * must hold at least the buffer header + one minimally sized entry -+ */ -+ if (arg->buf_size < min_buf_size) -+ return -EINVAL; -+ -+ return 0; -+} -+ -+static int pfm_pebs_core_fmt_get_size(unsigned int flags, void *data, size_t *size) -+{ -+ struct pfm_pebs_core_smpl_arg *arg = data; -+ -+ /* -+ * size has been validated in pfm_pebs_core_fmt_validate() -+ */ -+ *size = arg->buf_size + (1UL<ds; -+ -+ /* -+ * align PEBS buffer base -+ */ -+ pebs_start = ALIGN_PEBS((unsigned long)(hdr+1), PEBS_PADDING_ORDER); -+ pebs_end = pebs_start + arg->buf_size + 1; -+ -+ hdr->version = PFM_PEBS_CORE_SMPL_VERSION; -+ hdr->buf_size = arg->buf_size; -+ hdr->overflows = 0; -+ -+ /* -+ * express PEBS buffer base as offset from the end of the header -+ */ -+ hdr->start_offs = pebs_start - (unsigned long)(hdr+1); -+ -+ /* -+ * PEBS buffer boundaries -+ */ -+ ds->pebs_buf_base = pebs_start; -+ ds->pebs_abs_max = pebs_end; -+ -+ /* -+ * PEBS starting position -+ */ -+ ds->pebs_index = pebs_start; -+ -+ /* -+ * PEBS interrupt threshold -+ */ -+ ds->pebs_intr_thres = pebs_start -+ + arg->intr_thres -+ * sizeof(struct pfm_pebs_core_smpl_entry); -+ -+ /* -+ * save counter reset value for PEBS counter -+ */ -+ ds->pebs_cnt_reset = arg->cnt_reset; -+ -+ /* -+ * keep track of DS AREA -+ */ -+ ctx_arch->ds_area = ds; -+ ctx_arch->flags.use_ds = 1; -+ ctx_arch->flags.use_pebs = 1; -+ -+ PFM_DBG("buffer=%p buf_size=%llu offs=%llu pebs_start=0x%llx " -+ "pebs_end=0x%llx ds=%p pebs_thres=0x%llx cnt_reset=0x%llx", -+ buf, -+ (unsigned long long)hdr->buf_size, -+ (unsigned long long)hdr->start_offs, -+ (unsigned long long)pebs_start, -+ (unsigned long long)pebs_end, -+ ds, -+ (unsigned long long)ds->pebs_intr_thres, -+ (unsigned long long)ds->pebs_cnt_reset); -+ -+ return 0; -+} -+ -+static int pfm_pebs_core_fmt_handler(struct pfm_context *ctx, -+ unsigned long ip, u64 tstamp, void *data) -+{ -+ struct pfm_pebs_core_smpl_hdr *hdr; -+ struct pfm_ovfl_arg *arg; -+ -+ hdr = ctx->smpl_addr; -+ arg = &ctx->ovfl_arg; -+ -+ PFM_DBG_ovfl("buffer full"); -+ /* -+ * increment number of buffer overflows. -+ * important to detect duplicate set of samples. -+ */ -+ hdr->overflows++; -+ -+ /* -+ * request notification and masking of monitoring. -+ * Notification is still subject to the overflowed -+ * register having the FL_NOTIFY flag set. -+ */ -+ arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY | PFM_OVFL_CTRL_MASK; -+ -+ return -ENOBUFS; /* we are full, sorry */ -+} -+ -+static int pfm_pebs_core_fmt_restart(int is_active, u32 *ovfl_ctrl, -+ void *buf) -+{ -+ struct pfm_pebs_core_smpl_hdr *hdr = buf; -+ -+ /* -+ * reset index to base of buffer -+ */ -+ hdr->ds.pebs_index = hdr->ds.pebs_buf_base; -+ -+ *ovfl_ctrl = PFM_OVFL_CTRL_RESET; -+ -+ return 0; -+} -+ -+static int pfm_pebs_core_fmt_exit(void *buf) -+{ -+ return 0; -+} -+ -+static struct pfm_smpl_fmt pebs_core_fmt = { -+ .fmt_name = PFM_PEBS_CORE_SMPL_NAME, -+ .fmt_version = 0x1, -+ .fmt_arg_size = sizeof(struct pfm_pebs_core_smpl_arg), -+ .fmt_validate = pfm_pebs_core_fmt_validate, -+ .fmt_getsize = pfm_pebs_core_fmt_get_size, -+ .fmt_init = pfm_pebs_core_fmt_init, -+ .fmt_handler = pfm_pebs_core_fmt_handler, -+ .fmt_restart = pfm_pebs_core_fmt_restart, -+ .fmt_exit = pfm_pebs_core_fmt_exit, -+ .fmt_flags = PFM_FMT_BUILTIN_FLAG, -+ .owner = THIS_MODULE, -+}; -+ -+static int __init pfm_pebs_core_fmt_init_module(void) -+{ -+ if (!cpu_has_pebs) { -+ PFM_INFO("processor does not have PEBS support"); -+ return -1; -+ } -+ /* -+ * cpu_has_pebs is not enough to identify Intel Core PEBS -+ * which is different fro Pentium 4 PEBS. Therefore we do -+ * a more detailed check here -+ */ -+ if (current_cpu_data.x86 != 6) { -+ PFM_INFO("not a supported Intel processor"); -+ return -1; -+ } -+ -+ switch (current_cpu_data.x86_model) { -+ case 15: /* Merom */ -+ case 23: /* Penryn */ -+ case 28: /* Atom (Silverthorne) */ -+ case 29: /* Dunnington */ -+ break; -+ default: -+ PFM_INFO("not a supported Intel processor"); -+ return -1; -+ } -+ return pfm_fmt_register(&pebs_core_fmt); -+} -+ -+static void __exit pfm_pebs_core_fmt_cleanup_module(void) -+{ -+ pfm_fmt_unregister(&pebs_core_fmt); -+} -+ -+module_init(pfm_pebs_core_fmt_init_module); -+module_exit(pfm_pebs_core_fmt_cleanup_module); -diff --git a/arch/x86/perfmon/perfmon_pebs_p4_smpl.c b/arch/x86/perfmon/perfmon_pebs_p4_smpl.c -new file mode 100644 -index 0000000..f4e9fd2 ---- /dev/null -+++ b/arch/x86/perfmon/perfmon_pebs_p4_smpl.c -@@ -0,0 +1,253 @@ -+/* -+ * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This file implements the Precise Event Based Sampling (PEBS) -+ * sampling format. It supports the following processors: -+ * - 32-bit Pentium 4 or other Netburst-based processors -+ * - 64-bit Pentium 4 or other Netburst-based processors -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+MODULE_AUTHOR("Stephane Eranian "); -+MODULE_DESCRIPTION("Intel P4 Precise Event-Based Sampling (PEBS)"); -+MODULE_LICENSE("GPL"); -+ -+#define ALIGN_PEBS(a, order) \ -+ ((a)+(1UL<<(order))-1) & ~((1UL<<(order))-1) -+ -+#define PEBS_PADDING_ORDER 8 /* log2(256) padding for PEBS alignment constraint */ -+ -+static int pfm_pebs_p4_fmt_validate(u32 flags, u16 npmds, void *data) -+{ -+ struct pfm_pebs_p4_smpl_arg *arg = data; -+ size_t min_buf_size; -+ -+ /* -+ * need to define at least the size of the buffer -+ */ -+ if (data == NULL) { -+ PFM_DBG("no argument passed"); -+ return -EINVAL; -+ } -+ -+ /* -+ * compute min buf size. npmds is the maximum number -+ * of implemented PMD registers. -+ */ -+ min_buf_size = sizeof(struct pfm_pebs_p4_smpl_hdr) -+ + sizeof(struct pfm_pebs_p4_smpl_entry) -+ + (1UL<buf_size); -+ -+ /* -+ * must hold at least the buffer header + one minimally sized entry -+ */ -+ if (arg->buf_size < min_buf_size) -+ return -EINVAL; -+ -+ return 0; -+} -+ -+static int pfm_pebs_p4_fmt_get_size(unsigned int flags, void *data, size_t *size) -+{ -+ struct pfm_pebs_p4_smpl_arg *arg = data; -+ -+ /* -+ * size has been validated in pfm_pebs_p4_fmt_validate() -+ */ -+ *size = arg->buf_size + (1UL<ds; -+ -+ /* -+ * align PEBS buffer base -+ */ -+ pebs_start = ALIGN_PEBS((unsigned long)(hdr+1), PEBS_PADDING_ORDER); -+ pebs_end = pebs_start + arg->buf_size + 1; -+ -+ hdr->version = PFM_PEBS_P4_SMPL_VERSION; -+ hdr->buf_size = arg->buf_size; -+ hdr->overflows = 0; -+ -+ /* -+ * express PEBS buffer base as offset from the end of the header -+ */ -+ hdr->start_offs = pebs_start - (unsigned long)(hdr+1); -+ -+ /* -+ * PEBS buffer boundaries -+ */ -+ ds->pebs_buf_base = pebs_start; -+ ds->pebs_abs_max = pebs_end; -+ -+ /* -+ * PEBS starting position -+ */ -+ ds->pebs_index = pebs_start; -+ -+ /* -+ * PEBS interrupt threshold -+ */ -+ ds->pebs_intr_thres = pebs_start -+ + arg->intr_thres * sizeof(struct pfm_pebs_p4_smpl_entry); -+ -+ /* -+ * save counter reset value for PEBS counter -+ */ -+ ds->pebs_cnt_reset = arg->cnt_reset; -+ -+ /* -+ * keep track of DS AREA -+ */ -+ ctx_arch->ds_area = ds; -+ ctx_arch->flags.use_pebs = 1; -+ ctx_arch->flags.use_ds = 1; -+ -+ PFM_DBG("buffer=%p buf_size=%llu offs=%llu pebs_start=0x%lx " -+ "pebs_end=0x%lx ds=%p pebs_thres=0x%lx cnt_reset=0x%llx", -+ buf, -+ (unsigned long long)hdr->buf_size, -+ (unsigned long long)hdr->start_offs, -+ pebs_start, -+ pebs_end, -+ ds, -+ ds->pebs_intr_thres, -+ (unsigned long long)ds->pebs_cnt_reset); -+ -+ return 0; -+} -+ -+static int pfm_pebs_p4_fmt_handler(struct pfm_context *ctx, -+ unsigned long ip, u64 tstamp, void *data) -+{ -+ struct pfm_pebs_p4_smpl_hdr *hdr; -+ struct pfm_ovfl_arg *arg; -+ -+ hdr = ctx->smpl_addr; -+ arg = &ctx->ovfl_arg; -+ -+ PFM_DBG_ovfl("buffer full"); -+ /* -+ * increment number of buffer overflows. -+ * important to detect duplicate set of samples. -+ */ -+ hdr->overflows++; -+ -+ /* -+ * request notification and masking of monitoring. -+ * Notification is still subject to the overflowed -+ * register having the FL_NOTIFY flag set. -+ */ -+ arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY | PFM_OVFL_CTRL_MASK; -+ -+ return -ENOBUFS; /* we are full, sorry */ -+} -+ -+static int pfm_pebs_p4_fmt_restart(int is_active, u32 *ovfl_ctrl, -+ void *buf) -+{ -+ struct pfm_pebs_p4_smpl_hdr *hdr = buf; -+ -+ /* -+ * reset index to base of buffer -+ */ -+ hdr->ds.pebs_index = hdr->ds.pebs_buf_base; -+ -+ *ovfl_ctrl = PFM_OVFL_CTRL_RESET; -+ -+ return 0; -+} -+ -+static int pfm_pebs_p4_fmt_exit(void *buf) -+{ -+ return 0; -+} -+ -+static struct pfm_smpl_fmt pebs_p4_fmt = { -+ .fmt_name = PFM_PEBS_P4_SMPL_NAME, -+ .fmt_version = 0x1, -+ .fmt_arg_size = sizeof(struct pfm_pebs_p4_smpl_arg), -+ .fmt_validate = pfm_pebs_p4_fmt_validate, -+ .fmt_getsize = pfm_pebs_p4_fmt_get_size, -+ .fmt_init = pfm_pebs_p4_fmt_init, -+ .fmt_handler = pfm_pebs_p4_fmt_handler, -+ .fmt_restart = pfm_pebs_p4_fmt_restart, -+ .fmt_exit = pfm_pebs_p4_fmt_exit, -+ .fmt_flags = PFM_FMT_BUILTIN_FLAG, -+ .owner = THIS_MODULE, -+}; -+ -+static int __init pfm_pebs_p4_fmt_init_module(void) -+{ -+ int ht_enabled; -+ -+ if (!cpu_has_pebs) { -+ PFM_INFO("processor does not have PEBS support"); -+ return -1; -+ } -+ if (current_cpu_data.x86 != 15) { -+ PFM_INFO("not an Intel Pentium 4"); -+ return -1; -+ } -+#ifdef CONFIG_SMP -+ ht_enabled = (cpus_weight(__get_cpu_var(cpu_core_map)) -+ / current_cpu_data.x86_max_cores) > 1; -+#else -+ ht_enabled = 0; -+#endif -+ if (ht_enabled) { -+ PFM_INFO("PEBS not available because HyperThreading is on"); -+ return -1; -+ } -+ return pfm_fmt_register(&pebs_p4_fmt); -+} -+ -+static void __exit pfm_pebs_p4_fmt_cleanup_module(void) -+{ -+ pfm_fmt_unregister(&pebs_p4_fmt); -+} -+ -+module_init(pfm_pebs_p4_fmt_init_module); -+module_exit(pfm_pebs_p4_fmt_cleanup_module); -diff --git a/include/asm-mips/Kbuild b/include/asm-mips/Kbuild -index 7897f05..7ed16fc 100644 ---- a/include/asm-mips/Kbuild -+++ b/include/asm-mips/Kbuild -@@ -1,3 +1,4 @@ - include include/asm-generic/Kbuild.asm - - header-y += cachectl.h sgidefs.h sysmips.h -+header-y += perfmon.h -diff --git a/include/asm-mips/perfmon.h b/include/asm-mips/perfmon.h -new file mode 100644 -index 0000000..7915c17 ---- /dev/null -+++ b/include/asm-mips/perfmon.h -@@ -0,0 +1,34 @@ -+/* -+ * Copyright (c) 2007 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This file contains mips64 specific definitions for the perfmon -+ * interface. -+ * -+ * This file MUST never be included directly. Use linux/perfmon.h. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#ifndef _ASM_MIPS64_PERFMON_H_ -+#define _ASM_MIPS64_PERFMON_H_ -+ -+/* -+ * arch-specific user visible interface definitions -+ */ -+ -+#define PFM_ARCH_MAX_PMCS (256+64) /* 256 HW 64 SW */ -+#define PFM_ARCH_MAX_PMDS (256+64) /* 256 HW 64 SW */ -+ -+#endif /* _ASM_MIPS64_PERFMON_H_ */ -diff --git a/include/asm-mips/perfmon_kern.h b/include/asm-mips/perfmon_kern.h -new file mode 100644 -index 0000000..7d213df ---- /dev/null -+++ b/include/asm-mips/perfmon_kern.h -@@ -0,0 +1,412 @@ -+/* -+ * Copyright (c) 2005 Philip Mucci. -+ * -+ * Based on other versions: -+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This file contains mips64 specific definitions for the perfmon -+ * interface. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#ifndef _ASM_MIPS64_PERFMON_KERN_H_ -+#define _ASM_MIPS64_PERFMON_KERN_H_ -+ -+#ifdef __KERNEL__ -+ -+#ifdef CONFIG_PERFMON -+#include -+#include -+ -+#define PFM_ARCH_PMD_STK_ARG 2 -+#define PFM_ARCH_PMC_STK_ARG 2 -+ -+struct pfm_arch_pmu_info { -+ u32 pmu_style; -+}; -+ -+#define MIPS64_CONFIG_PMC_MASK (1 << 4) -+#define MIPS64_PMC_INT_ENABLE_MASK (1 << 4) -+#define MIPS64_PMC_CNT_ENABLE_MASK (0xf) -+#define MIPS64_PMC_EVT_MASK (0x7 << 6) -+#define MIPS64_PMC_CTR_MASK (1 << 31) -+#define MIPS64_PMD_INTERRUPT (1 << 31) -+ -+/* Coprocessor register 25 contains the PMU interface. */ -+/* Sel 0 is control for counter 0 */ -+/* Sel 1 is count for counter 0. */ -+/* Sel 2 is control for counter 1. */ -+/* Sel 3 is count for counter 1. */ -+ -+/* -+ -+31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 -+M 0--------------------------------------------------------------0 Event-- IE U S K EXL -+ -+M 31 If this bit is one, another pair of Performance Control -+and Counter registers is implemented at a MTC0 -+ -+Event 8:5 Counter event enabled for this counter. Possible events -+are listed in Table 6-30. R/W Undefined -+ -+IE 4 Counter Interrupt Enable. This bit masks bit 31 of the -+associated count register from the interrupt exception -+request output. R/W 0 -+ -+U 3 Count in User Mode. When this bit is set, the specified -+event is counted in User Mode. R/W Undefined -+ -+S 2 Count in Supervisor Mode. When this bit is set, the -+specified event is counted in Supervisor Mode. R/W Undefined -+ -+K 1 Count in Kernel Mode. When this bit is set, count the -+event in Kernel Mode when EXL and ERL both are 0. R/W Undefined -+ -+EXL 0 Count when EXL. When this bit is set, count the event -+when EXL = 1 and ERL = 0. R/W Undefined -+*/ -+ -+static inline void pfm_arch_resend_irq(struct pfm_context *ctx) -+{} -+ -+static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{} -+ -+static inline void pfm_arch_serialize(void) -+{} -+ -+ -+/* -+ * MIPS does not save the PMDs during pfm_arch_intr_freeze_pmu(), thus -+ * this routine needs to do it when switching sets on overflow -+ */ -+static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ pfm_save_pmds(ctx, set); -+} -+ -+static inline void pfm_arch_write_pmc(struct pfm_context *ctx, -+ unsigned int cnum, u64 value) -+{ -+ /* -+ * we only write to the actual register when monitoring is -+ * active (pfm_start was issued) -+ */ -+ if (ctx && (ctx->flags.started == 0)) -+ return; -+ -+ switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) { -+ case 0: -+ write_c0_perfctrl0(value); -+ break; -+ case 1: -+ write_c0_perfctrl1(value); -+ break; -+ case 2: -+ write_c0_perfctrl2(value); -+ break; -+ case 3: -+ write_c0_perfctrl3(value); -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+static inline void pfm_arch_write_pmd(struct pfm_context *ctx, -+ unsigned int cnum, u64 value) -+{ -+ value &= pfm_pmu_conf->ovfl_mask; -+ -+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { -+ case 0: -+ write_c0_perfcntr0(value); -+ break; -+ case 1: -+ write_c0_perfcntr1(value); -+ break; -+ case 2: -+ write_c0_perfcntr2(value); -+ break; -+ case 3: -+ write_c0_perfcntr3(value); -+ break; -+ default: -+ BUG(); -+ } -+} -+ -+static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum) -+{ -+ switch (pfm_pmu_conf->pmd_desc[cnum].hw_addr) { -+ case 0: -+ return read_c0_perfcntr0(); -+ break; -+ case 1: -+ return read_c0_perfcntr1(); -+ break; -+ case 2: -+ return read_c0_perfcntr2(); -+ break; -+ case 3: -+ return read_c0_perfcntr3(); -+ break; -+ default: -+ BUG(); -+ return 0; -+ } -+} -+ -+static inline u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum) -+{ -+ switch (pfm_pmu_conf->pmc_desc[cnum].hw_addr) { -+ case 0: -+ return read_c0_perfctrl0(); -+ break; -+ case 1: -+ return read_c0_perfctrl1(); -+ break; -+ case 2: -+ return read_c0_perfctrl2(); -+ break; -+ case 3: -+ return read_c0_perfctrl3(); -+ break; -+ default: -+ BUG(); -+ return 0; -+ } -+} -+ -+/* -+ * For some CPUs, the upper bits of a counter must be set in order for the -+ * overflow interrupt to happen. On overflow, the counter has wrapped around, -+ * and the upper bits are cleared. This function may be used to set them back. -+ */ -+static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx, -+ unsigned int cnum) -+{ -+ u64 val; -+ val = pfm_arch_read_pmd(ctx, cnum); -+ /* This masks out overflow bit 31 */ -+ pfm_arch_write_pmd(ctx, cnum, val); -+} -+ -+/* -+ * At certain points, perfmon needs to know if monitoring has been -+ * explicitely started/stopped by user via pfm_start/pfm_stop. The -+ * information is tracked in ctx.flags.started. However on certain -+ * architectures, it may be possible to start/stop directly from -+ * user level with a single assembly instruction bypassing -+ * the kernel. This function must be used to determine by -+ * an arch-specific mean if monitoring is actually started/stopped. -+ */ -+static inline int pfm_arch_is_active(struct pfm_context *ctx) -+{ -+ return ctx->flags.started; -+} -+ -+static inline void pfm_arch_ctxswout_sys(struct task_struct *task, -+ struct pfm_context *ctx) -+{} -+ -+static inline void pfm_arch_ctxswin_sys(struct task_struct *task, -+ struct pfm_context *ctx) -+{} -+ -+static inline void pfm_arch_ctxswin_thread(struct task_struct *task, -+ struct pfm_context *ctx) -+{} -+int pfm_arch_ctxswout_thread(struct task_struct *task, -+ struct pfm_context *ctx); -+ -+int pfm_arch_is_monitoring_active(struct pfm_context *ctx); -+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx); -+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx); -+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set); -+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set); -+char *pfm_arch_get_pmu_module_name(void); -+ -+static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ pfm_arch_stop(current, ctx); -+ /* -+ * we mark monitoring as stopped to avoid -+ * certain side effects especially in -+ * pfm_switch_sets_from_intr() on -+ * pfm_arch_restore_pmcs() -+ */ -+ ctx->flags.started = 0; -+} -+ -+/* -+ * unfreeze PMU from pfm_do_interrupt_handler() -+ * ctx may be NULL for spurious -+ */ -+static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx) -+{ -+ if (!ctx) -+ return; -+ -+ PFM_DBG_ovfl("state=%d", ctx->state); -+ -+ ctx->flags.started = 1; -+ -+ if (ctx->state == PFM_CTX_MASKED) -+ return; -+ -+ pfm_arch_restore_pmcs(ctx, ctx->active_set); -+} -+ -+/* -+ * this function is called from the PMU interrupt handler ONLY. -+ * On MIPS, the PMU is frozen via arch_stop, masking would be implemented -+ * via arch-stop as well. Given that the PMU is already stopped when -+ * entering the interrupt handler, we do not need to stop it again, so -+ * this function is a nop. -+ */ -+static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{} -+ -+/* -+ * on MIPS masking/unmasking uses the start/stop mechanism, so we simply -+ * need to start here. -+ */ -+static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ pfm_arch_start(current, ctx); -+} -+ -+static inline int pfm_arch_context_create(struct pfm_context *ctx, -+ u32 ctx_flags) -+{ -+ return 0; -+} -+ -+static inline void pfm_arch_context_free(struct pfm_context *ctx) -+{} -+ -+ -+ -+ -+ -+/* -+ * function called from pfm_setfl_sane(). Context is locked -+ * and interrupts are masked. -+ * The value of flags is the value of ctx_flags as passed by -+ * user. -+ * -+ * function must check arch-specific set flags. -+ * Return: -+ * 1 when flags are valid -+ * 0 on error -+ */ -+static inline int -+pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags) -+{ -+ return 0; -+} -+ -+static inline int pfm_arch_init(void) -+{ -+ return 0; -+} -+ -+static inline void pfm_arch_init_percpu(void) -+{} -+ -+static inline int pfm_arch_load_context(struct pfm_context *ctx) -+{ -+ return 0; -+} -+ -+static inline void pfm_arch_unload_context(struct pfm_context *ctx) -+{} -+ -+static inline int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds) -+{ -+ return 0; -+} -+ -+static inline void pfm_arch_pmu_release(void) -+{} -+ -+#ifdef CONFIG_PERFMON_FLUSH -+/* -+ * due to cache aliasing problem on MIPS, it is necessary to flush -+ * pages out of the cache when they are modified. -+ */ -+static inline void pfm_cacheflush(void *addr, unsigned int len) -+{ -+ unsigned long start, end; -+ -+ start = (unsigned long)addr & PAGE_MASK; -+ end = ((unsigned long)addr + len + PAGE_SIZE - 1) & PAGE_MASK; -+ -+ while (start < end) { -+ flush_data_cache_page(start); -+ start += PAGE_SIZE; -+ } -+} -+#else -+static inline void pfm_cacheflush(void *addr, unsigned int len) -+{} -+#endif -+ -+static inline void pfm_arch_arm_handle_work(struct task_struct *task) -+{} -+ -+static inline void pfm_arch_disarm_handle_work(struct task_struct *task) -+{} -+ -+static inline int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg) -+{ -+ return 0; -+} -+ -+static inline int pfm_arch_get_base_syscall(void) -+{ -+ if (test_thread_flag(TIF_32BIT_ADDR)) { -+ if (test_thread_flag(TIF_32BIT_REGS)) -+ return __NR_O32_Linux+330; -+ return __NR_N32_Linux+293; -+ } -+ return __NR_64_Linux+289; -+} -+ -+struct pfm_arch_context { -+ /* empty */ -+}; -+ -+#define PFM_ARCH_CTX_SIZE sizeof(struct pfm_arch_context) -+/* -+ * MIPS may need extra alignment requirements for the sampling buffer -+ */ -+#ifdef CONFIG_PERFMON_SMPL_ALIGN -+#define PFM_ARCH_SMPL_ALIGN_SIZE 0x4000 -+#else -+#define PFM_ARCH_SMPL_ALIGN_SIZE 0 -+#endif -+ -+#endif /* CONFIG_PERFMON */ -+ -+#endif /* __KERNEL__ */ -+#endif /* _ASM_MIPS64_PERFMON_KERN_H_ */ -diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h -index a944eda..470cdfc 100644 ---- a/include/asm-mips/system.h -+++ b/include/asm-mips/system.h -@@ -67,6 +67,10 @@ do { \ - __mips_mt_fpaff_switch_to(prev); \ - if (cpu_has_dsp) \ - __save_dsp(prev); \ -+ if (test_tsk_thread_flag(prev, TIF_PERFMON_CTXSW)) \ -+ pfm_ctxsw_out(prev, next); \ -+ if (test_tsk_thread_flag(next, TIF_PERFMON_CTXSW)) \ -+ pfm_ctxsw_in(prev, next); \ - (last) = resume(prev, next, task_thread_info(next)); \ - } while (0) - -diff --git a/include/asm-mips/thread_info.h b/include/asm-mips/thread_info.h -index bb30606..34fd6aa 100644 ---- a/include/asm-mips/thread_info.h -+++ b/include/asm-mips/thread_info.h -@@ -114,6 +114,7 @@ register struct thread_info *__current_thread_info __asm__("$28"); - #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ - #define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */ - #define TIF_SECCOMP 4 /* secure computing */ -+#define TIF_PERFMON_WORK 5 /* work for pfm_handle_work() */ - #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */ - #define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */ - #define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */ -@@ -124,6 +125,7 @@ register struct thread_info *__current_thread_info __asm__("$28"); - #define TIF_32BIT_REGS 22 /* also implies 16/32 fprs */ - #define TIF_32BIT_ADDR 23 /* 32-bit address space (o32/n32) */ - #define TIF_FPUBOUND 24 /* thread bound to FPU-full CPU set */ -+#define TIF_PERFMON_CTXSW 25 /* perfmon needs ctxsw calls */ - #define TIF_SYSCALL_TRACE 31 /* syscall trace active */ - - #define _TIF_SYSCALL_TRACE (1< -+ * -+ * This file contains i386/x86_64 specific definitions for the perfmon -+ * interface. -+ * -+ * This file MUST never be included directly. Use linux/perfmon.h. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#ifndef _ASM_X86_PERFMON__H_ -+#define _ASM_X86_PERFMON__H_ -+ -+/* -+ * arch-specific user visible interface definitions -+ */ -+ -+#define PFM_ARCH_MAX_PMCS (256+64) /* 256 HW 64 SW */ -+#define PFM_ARCH_MAX_PMDS (256+64) /* 256 HW 64 SW */ -+ -+#endif /* _ASM_X86_PERFMON_H_ */ -diff --git a/include/asm-x86/perfmon_kern.h b/include/asm-x86/perfmon_kern.h -new file mode 100644 -index 0000000..0e5d3a5 ---- /dev/null -+++ b/include/asm-x86/perfmon_kern.h -@@ -0,0 +1,548 @@ -+/* -+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * Copyright (c) 2007 Advanced Micro Devices, Inc. -+ * Contributed by Robert Richter -+ * -+ * This file contains X86 Processor Family specific definitions -+ * for the perfmon interface. This covers P6, Pentium M, P4/Xeon -+ * (32-bit and 64-bit, i.e., EM64T) and AMD X86-64. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#ifndef _ASM_X86_PERFMON_KERN_H_ -+#define _ASM_X86_PERFMON_KERN_H_ -+ -+#ifdef CONFIG_PERFMON -+#include -+#ifdef CONFIG_4KSTACKS -+#define PFM_ARCH_PMD_STK_ARG 2 -+#define PFM_ARCH_PMC_STK_ARG 2 -+#else -+#define PFM_ARCH_PMD_STK_ARG 4 /* about 700 bytes of stack space */ -+#define PFM_ARCH_PMC_STK_ARG 4 /* about 200 bytes of stack space */ -+#endif -+ -+struct pfm_arch_pmu_info { -+ u32 flags; /* PMU feature flags */ -+ /* -+ * mandatory model-specific callbacks -+ */ -+ int (*stop_save)(struct pfm_context *ctx, struct pfm_event_set *set); -+ int (*has_ovfls)(struct pfm_context *ctx); -+ void (*quiesce)(void); -+ -+ /* -+ * optional model-specific callbacks -+ */ -+ void (*acquire_pmu_percpu)(void); -+ void (*release_pmu_percpu)(void); -+ int (*create_context)(struct pfm_context *ctx, u32 ctx_flags); -+ void (*free_context)(struct pfm_context *ctx); -+ int (*load_context)(struct pfm_context *ctx); -+ void (*unload_context)(struct pfm_context *ctx); -+ void (*write_pmc)(struct pfm_context *ctx, unsigned int cnum, u64 value); -+ void (*write_pmd)(struct pfm_context *ctx, unsigned int cnum, u64 value); -+ u64 (*read_pmd)(struct pfm_context *ctx, unsigned int cnum); -+ u64 (*read_pmc)(struct pfm_context *ctx, unsigned int cnum); -+ void (*nmi_copy_state)(struct pfm_context *ctx); -+ void (*restore_pmcs)(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+ void (*restore_pmds)(struct pfm_context *ctx, -+ struct pfm_event_set *set); -+}; -+ -+/* -+ * PMU feature flags -+ */ -+#define PFM_X86_FL_USE_NMI 0x01 /* user asking for NMI */ -+#define PFM_X86_FL_NO_SHARING 0x02 /* no sharing with other subsystems */ -+#define PFM_X86_FL_SHARING 0x04 /* PMU is being shared */ -+ -+struct pfm_x86_ctx_flags { -+ unsigned int insecure:1; /* rdpmc per-thread self-monitoring */ -+ unsigned int use_pebs:1; /* PEBS used */ -+ unsigned int use_ds:1; /* DS used */ -+ unsigned int reserved:29; /* for future use */ -+}; -+ -+struct pfm_arch_context { -+ u64 saved_real_iip; /* instr pointer of last NMI intr */ -+ struct pfm_x86_ctx_flags flags; /* flags */ -+ void *ds_area; /* address of DS area (to go away) */ -+ void *data; /* model-specific data */ -+}; -+ -+/* -+ * functions implemented as inline on x86 -+ */ -+ -+/** -+ * pfm_arch_write_pmc - write a single PMC register -+ * @ctx: context to work on -+ * @cnum: PMC index -+ * @value: PMC 64-bit value -+ * -+ * in certain situations, ctx may be NULL -+ */ -+static inline void pfm_arch_write_pmc(struct pfm_context *ctx, -+ unsigned int cnum, u64 value) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ -+ pmu_info = pfm_pmu_info(); -+ -+ /* -+ * we only write to the actual register when monitoring is -+ * active (pfm_start was issued) -+ */ -+ if (ctx && ctx->flags.started == 0) -+ return; -+ -+ /* -+ * model-specific override, if any -+ */ -+ if (pmu_info->write_pmc) { -+ pmu_info->write_pmc(ctx, cnum, value); -+ return; -+ } -+ -+ PFM_DBG_ovfl("pfm_arch_write_pmc(0x%lx, 0x%Lx)", -+ pfm_pmu_conf->pmc_desc[cnum].hw_addr, -+ (unsigned long long) value); -+ -+ wrmsrl(pfm_pmu_conf->pmc_desc[cnum].hw_addr, value); -+} -+ -+/** -+ * pfm_arch_write_pmd - write a single PMD register -+ * @ctx: context to work on -+ * @cnum: PMD index -+ * @value: PMD 64-bit value -+ */ -+static inline void pfm_arch_write_pmd(struct pfm_context *ctx, -+ unsigned int cnum, u64 value) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ -+ pmu_info = pfm_pmu_info(); -+ -+ /* -+ * to make sure the counter overflows, we set the -+ * upper bits. we also clear any other unimplemented -+ * bits as this may cause crash on some processors. -+ */ -+ if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_C64) -+ value = (value | ~pfm_pmu_conf->ovfl_mask) -+ & ~pfm_pmu_conf->pmd_desc[cnum].rsvd_msk; -+ -+ PFM_DBG_ovfl("pfm_arch_write_pmd(0x%lx, 0x%Lx)", -+ pfm_pmu_conf->pmd_desc[cnum].hw_addr, -+ (unsigned long long) value); -+ -+ /* -+ * model-specific override, if any -+ */ -+ if (pmu_info->write_pmd) { -+ pmu_info->write_pmd(ctx, cnum, value); -+ return; -+ } -+ -+ wrmsrl(pfm_pmu_conf->pmd_desc[cnum].hw_addr, value); -+} -+ -+/** -+ * pfm_arch_read_pmd - read a single PMD register -+ * @ctx: context to work on -+ * @cnum: PMD index -+ * -+ * return value is register 64-bit value -+ */ -+static inline u64 pfm_arch_read_pmd(struct pfm_context *ctx, unsigned int cnum) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ u64 tmp; -+ -+ pmu_info = pfm_pmu_info(); -+ -+ /* -+ * model-specific override, if any -+ */ -+ if (pmu_info->read_pmd) -+ tmp = pmu_info->read_pmd(ctx, cnum); -+ else -+ rdmsrl(pfm_pmu_conf->pmd_desc[cnum].hw_addr, tmp); -+ -+ PFM_DBG_ovfl("pfm_arch_read_pmd(0x%lx) = 0x%Lx", -+ pfm_pmu_conf->pmd_desc[cnum].hw_addr, -+ (unsigned long long) tmp); -+ return tmp; -+} -+ -+/** -+ * pfm_arch_read_pmc - read a single PMC register -+ * @ctx: context to work on -+ * @cnum: PMC index -+ * -+ * return value is register 64-bit value -+ */ -+static inline u64 pfm_arch_read_pmc(struct pfm_context *ctx, unsigned int cnum) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ u64 tmp; -+ -+ pmu_info = pfm_pmu_info(); -+ -+ /* -+ * model-specific override, if any -+ */ -+ if (pmu_info->read_pmc) -+ tmp = pmu_info->read_pmc(ctx, cnum); -+ else -+ rdmsrl(pfm_pmu_conf->pmc_desc[cnum].hw_addr, tmp); -+ -+ PFM_DBG_ovfl("pfm_arch_read_pmc(0x%lx) = 0x%016Lx", -+ pfm_pmu_conf->pmc_desc[cnum].hw_addr, -+ (unsigned long long) tmp); -+ return tmp; -+} -+ -+/** -+ * pfm_arch_is_active - return non-zero is monitoring has been started -+ * @ctx: context to check -+ * -+ * At certain points, perfmon needs to know if monitoring has been -+ * explicitly started. -+ * -+ * On x86, there is not other way but to use pfm_start/pfm_stop -+ * to activate monitoring, thus we can simply check flags.started -+ */ -+static inline int pfm_arch_is_active(struct pfm_context *ctx) -+{ -+ return ctx->flags.started; -+} -+ -+ -+/** -+ * pfm_arch_unload_context - detach context from thread or CPU -+ * @ctx: context to detach -+ * -+ * in system-wide ctx->task is NULL, otherwise it points to the -+ * attached thread -+ */ -+static inline void pfm_arch_unload_context(struct pfm_context *ctx) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ struct pfm_arch_context *ctx_arch; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ pmu_info = pfm_pmu_info(); -+ -+ if (ctx_arch->flags.insecure) { -+ PFM_DBG("clear cr4.pce"); -+ clear_in_cr4(X86_CR4_PCE); -+ } -+ -+ if (pmu_info->unload_context) -+ pmu_info->unload_context(ctx); -+} -+ -+/** -+ * pfm_arch_load_context - attach context to thread or CPU -+ * @ctx: context to attach -+ */ -+static inline int pfm_arch_load_context(struct pfm_context *ctx) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ struct pfm_arch_context *ctx_arch; -+ int ret = 0; -+ -+ ctx_arch = pfm_ctx_arch(ctx); -+ pmu_info = pfm_pmu_info(); -+ -+ /* -+ * RDPMC authorized in system-wide and -+ * per-thread self-monitoring. -+ * -+ * RDPMC only gives access to counts. -+ * -+ * The context-switch routine code does not restore -+ * all the PMD registers (optimization), thus there -+ * is a possible leak of counts there in per-thread -+ * mode. -+ */ -+ if (ctx->task == current || ctx->flags.system) { -+ PFM_DBG("set cr4.pce"); -+ set_in_cr4(X86_CR4_PCE); -+ ctx_arch->flags.insecure = 1; -+ } -+ -+ if (pmu_info->load_context) -+ ret = pmu_info->load_context(ctx); -+ -+ return ret; -+} -+ -+void pfm_arch_restore_pmcs(struct pfm_context *ctx, struct pfm_event_set *set); -+void pfm_arch_start(struct task_struct *task, struct pfm_context *ctx); -+void pfm_arch_stop(struct task_struct *task, struct pfm_context *ctx); -+ -+/** -+ * pfm_arch_unmask_monitoring - unmask monitoring -+ * @ctx: context to mask -+ * @set: current event set -+ * -+ * masking is slightly different from stopping in that, it does not undo -+ * the pfm_start() issued by user. This is used in conjunction with -+ * sampling. Masking means stop monitoring, but do not authorize user -+ * to issue pfm_start/stop during that time. Unmasking is achieved via -+ * pfm_restart() and also may also depend on the sampling format used. -+ * -+ * on x86 masking/unmasking use the start/stop mechanism, except -+ * that flags.started is not modified. -+ */ -+static inline void pfm_arch_unmask_monitoring(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ pfm_arch_start(current, ctx); -+} -+ -+/** -+ * pfm_arch_intr_freeze_pmu - stop monitoring when handling PMU interrupt -+ * @ctx: current context -+ * @set: current event set -+ * -+ * called from __pfm_interrupt_handler(). -+ * ctx is not NULL. ctx is locked. interrupts are masked -+ * -+ * The following actions must take place: -+ * - stop all monitoring to ensure handler has consistent view. -+ * - collect overflowed PMDs bitmask into povfls_pmds and -+ * npend_ovfls. If no interrupt detected then npend_ovfls -+ * must be set to zero. -+ */ -+static inline void pfm_arch_intr_freeze_pmu(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ /* -+ * on X86, freezing is equivalent to stopping -+ */ -+ pfm_arch_stop(current, ctx); -+ -+ /* -+ * we mark monitoring as stopped to avoid -+ * certain side effects especially in -+ * pfm_switch_sets_from_intr() and -+ * pfm_arch_restore_pmcs() -+ */ -+ ctx->flags.started = 0; -+} -+ -+/** -+ * pfm_arch_intr_unfreeze_pmu - conditionally reactive monitoring -+ * @ctx: current context -+ * -+ * current context may be not when dealing when spurious interrupts -+ * -+ * Must re-activate monitoring if context is not MASKED. -+ * interrupts are masked. -+ */ -+static inline void pfm_arch_intr_unfreeze_pmu(struct pfm_context *ctx) -+{ -+ if (ctx == NULL) -+ return; -+ -+ PFM_DBG_ovfl("state=%d", ctx->state); -+ -+ /* -+ * restore flags.started which is cleared in -+ * pfm_arch_intr_freeze_pmu() -+ */ -+ ctx->flags.started = 1; -+ -+ if (ctx->state == PFM_CTX_MASKED) -+ return; -+ -+ pfm_arch_restore_pmcs(ctx, ctx->active_set); -+} -+ -+/** -+ * pfm_arch_setfl_sane - check arch/model specific event set flags -+ * @ctx: context to work on -+ * @flags: event set flags as passed by user -+ * -+ * called from pfm_setfl_sane(). Context is locked. Interrupts are masked. -+ * -+ * Return: -+ * 0 when flags are valid -+ * 1 on error -+ */ -+static inline int pfm_arch_setfl_sane(struct pfm_context *ctx, u32 flags) -+{ -+ return 0; -+} -+ -+/** -+ * pfm_arch_ovfl_reset_pmd - reset pmd on overflow -+ * @ctx: current context -+ * @cnum: PMD index -+ * -+ * On some CPUs, the upper bits of a counter must be set in order for the -+ * overflow interrupt to happen. On overflow, the counter has wrapped around, -+ * and the upper bits are cleared. This function may be used to set them back. -+ * -+ * For x86, the current version loses whatever is remaining in the counter, -+ * which is usually has a small count. In order not to loose this count, -+ * we do a read-modify-write to set the upper bits while preserving the -+ * low-order bits. This is slow but works. -+ */ -+static inline void pfm_arch_ovfl_reset_pmd(struct pfm_context *ctx, unsigned int cnum) -+{ -+ u64 val; -+ val = pfm_arch_read_pmd(ctx, cnum); -+ pfm_arch_write_pmd(ctx, cnum, val); -+} -+ -+/** -+ * pfm_arch_context_create - create context -+ * @ctx: newly created context -+ * @flags: context flags as passed by user -+ * -+ * called from __pfm_create_context() -+ */ -+static inline int pfm_arch_context_create(struct pfm_context *ctx, u32 ctx_flags) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ -+ pmu_info = pfm_pmu_info(); -+ -+ if (pmu_info->create_context) -+ return pmu_info->create_context(ctx, ctx_flags); -+ -+ return 0; -+} -+ -+/** -+ * pfm_arch_context_free - free context -+ * @ctx: context to free -+ */ -+static inline void pfm_arch_context_free(struct pfm_context *ctx) -+{ -+ struct pfm_arch_pmu_info *pmu_info; -+ -+ pmu_info = pfm_pmu_info(); -+ -+ if (pmu_info->free_context) -+ pmu_info->free_context(ctx); -+} -+ -+/* -+ * pfm_arch_clear_pmd_ovfl_cond - alter the pmds in such a way that they -+ * will not cause cause interrupts when unused. -+ * -+ * This is a nop on x86 -+ */ -+static inline void pfm_arch_clear_pmd_ovfl_cond(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{} -+ -+/* -+ * functions implemented in arch/x86/perfmon/perfmon.c -+ */ -+int pfm_arch_init(void); -+void pfm_arch_resend_irq(struct pfm_context *ctx); -+ -+int pfm_arch_ctxswout_thread(struct task_struct *task, struct pfm_context *ctx); -+void pfm_arch_ctxswin_thread(struct task_struct *task, struct pfm_context *ctx); -+ -+void pfm_arch_restore_pmds(struct pfm_context *ctx, struct pfm_event_set *set); -+int pfm_arch_pmu_config_init(struct pfm_pmu_config *cfg); -+void pfm_arch_pmu_config_remove(void); -+char *pfm_arch_get_pmu_module_name(void); -+int pfm_arch_pmu_acquire(u64 *unavail_pmcs, u64 *unavail_pmds); -+void pfm_arch_pmu_release(void); -+ -+/* -+ * pfm_arch_serialize - make PMU modifications visible to subsequent instructions -+ * -+ * This is a nop on x86 -+ */ -+static inline void pfm_arch_serialize(void) -+{} -+ -+/* -+ * on x86, the PMDs are already saved by pfm_arch_freeze_pmu() -+ * when entering the PMU interrupt handler, thus, we do not need -+ * to save them again in pfm_switch_sets_from_intr() -+ */ -+static inline void pfm_arch_save_pmds_from_intr(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{} -+ -+ -+static inline void pfm_arch_ctxswout_sys(struct task_struct *task, -+ struct pfm_context *ctx) -+{} -+ -+static inline void pfm_arch_ctxswin_sys(struct task_struct *task, -+ struct pfm_context *ctx) -+{} -+ -+static inline void pfm_arch_init_percpu(void) -+{} -+ -+static inline void pfm_cacheflush(void *addr, unsigned int len) -+{} -+ -+/* -+ * this function is called from the PMU interrupt handler ONLY. -+ * On x86, the PMU is frozen via arch_stop, masking would be implemented -+ * via arch-stop as well. Given that the PMU is already stopped when -+ * entering the interrupt handler, we do not need to stop it again, so -+ * this function is a nop. -+ */ -+static inline void pfm_arch_mask_monitoring(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{} -+ -+ -+static inline void pfm_arch_arm_handle_work(struct task_struct *task) -+{} -+ -+static inline void pfm_arch_disarm_handle_work(struct task_struct *task) -+{} -+ -+static inline int pfm_arch_get_base_syscall(void) -+{ -+#ifdef __x86_64__ -+ /* 32-bit syscall definition coming from ia32_unistd.h */ -+ if (test_thread_flag(TIF_IA32)) -+ return __NR_ia32_pfm_create_context; -+#endif -+ return __NR_pfm_create_context; -+} -+ -+#define PFM_ARCH_CTX_SIZE (sizeof(struct pfm_arch_context)) -+/* -+ * x86 does not need extra alignment requirements for the sampling buffer -+ */ -+#define PFM_ARCH_SMPL_ALIGN_SIZE 0 -+ -+asmlinkage void pmu_interrupt(void); -+ -+#endif /* CONFIG_PEFMON */ -+ -+#endif /* _ASM_X86_PERFMON_KERN_H_ */ -diff --git a/include/asm-x86/perfmon_pebs_core_smpl.h b/include/asm-x86/perfmon_pebs_core_smpl.h -new file mode 100644 -index 0000000..4a12e0d ---- /dev/null -+++ b/include/asm-x86/perfmon_pebs_core_smpl.h -@@ -0,0 +1,164 @@ -+/* -+ * Copyright (c) 2005-2007 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ * -+ * This file implements the sampling format to support Intel -+ * Precise Event Based Sampling (PEBS) feature of Intel Core -+ * processors, such as Intel Core 2. -+ * -+ * What is PEBS? -+ * ------------ -+ * This is a hardware feature to enhance sampling by providing -+ * better precision as to where a sample is taken. This avoids the -+ * typical skew in the instruction one can observe with any -+ * interrupt-based sampling technique. -+ * -+ * PEBS also lowers sampling overhead significantly by having the -+ * processor store samples instead of the OS. PMU interrupt are only -+ * generated after multiple samples are written. -+ * -+ * Another benefit of PEBS is that samples can be captured inside -+ * critical sections where interrupts are masked. -+ * -+ * How does it work? -+ * PEBS effectively implements a Hw buffer. The Os must pass a region -+ * of memory where samples are to be stored. The region can have any -+ * size. The OS must also specify the sampling period to reload. The PMU -+ * will interrupt when it reaches the end of the buffer or a specified -+ * threshold location inside the memory region. -+ * -+ * The description of the buffer is stored in the Data Save Area (DS). -+ * The samples are stored sequentially in the buffer. The format of the -+ * buffer is fixed and specified in the PEBS documentation. The sample -+ * format does not change between 32-bit and 64-bit modes unlike on the -+ * Pentium 4 version of PEBS. -+ * -+ * PEBS does not work when HyperThreading is enabled due to certain MSR -+ * being shared being to two threads. -+ * -+ * What does the format do? -+ * It provides access to the PEBS feature for both 32-bit and 64-bit -+ * processors that support it. -+ * -+ * The same code and data structures are used for both 32-bit and 64-bi -+ * modes. A single format name is used for both modes. In 32-bit mode, -+ * some of the extended registers are written to zero in each sample. -+ * -+ * It is important to realize that the format provides a zero-copy -+ * environment for the samples, i.e,, the OS never touches the -+ * samples. Whatever the processor write is directly accessible to -+ * the user. -+ * -+ * Parameters to the buffer can be passed via pfm_create_context() in -+ * the pfm_pebs_smpl_arg structure. -+ */ -+#ifndef __PERFMON_PEBS_CORE_SMPL_H__ -+#define __PERFMON_PEBS_CORE_SMPL_H__ 1 -+ -+/* -+ * The 32-bit and 64-bit formats are identical, thus we use only -+ * one name for the format. -+ */ -+#define PFM_PEBS_CORE_SMPL_NAME "pebs_core" -+ -+/* -+ * format specific parameters (passed at context creation) -+ * -+ * intr_thres: index from start of buffer of entry where the -+ * PMU interrupt must be triggered. It must be several samples -+ * short of the end of the buffer. -+ */ -+struct pfm_pebs_core_smpl_arg { -+ u64 cnt_reset; /* counter reset value */ -+ size_t buf_size; /* size of the PEBS buffer in bytes */ -+ size_t intr_thres;/* index of PEBS interrupt threshold entry */ -+ u64 reserved[6]; /* for future use */ -+}; -+ -+/* -+ * Data Save Area (32 and 64-bit mode) -+ * -+ * The DS area is exposed to the user. To determine the number -+ * of samples available in PEBS, it is necessary to substract -+ * pebs_index from pebs_base. -+ * -+ * Layout of the structure is mandated by hardware and specified -+ * in the Intel documentation. -+ */ -+struct pfm_ds_area_core { -+ u64 bts_buf_base; -+ u64 bts_index; -+ u64 bts_abs_max; -+ u64 bts_intr_thres; -+ u64 pebs_buf_base; -+ u64 pebs_index; -+ u64 pebs_abs_max; -+ u64 pebs_intr_thres; -+ u64 pebs_cnt_reset; -+}; -+ -+/* -+ * This header is at the beginning of the sampling buffer returned to the user. -+ * -+ * Because of PEBS alignement constraints, the actual PEBS buffer area does -+ * not necessarily begin right after the header. The hdr_start_offs must be -+ * used to compute the first byte of the buffer. The offset is defined as -+ * the number of bytes between the end of the header and the beginning of -+ * the buffer. As such the formula is: -+ * actual_buffer = (unsigned long)(hdr+1)+hdr->hdr_start_offs -+ */ -+struct pfm_pebs_core_smpl_hdr { -+ u64 overflows; /* #overflows for buffer */ -+ size_t buf_size; /* bytes in the buffer */ -+ size_t start_offs; /* actual buffer start offset */ -+ u32 version; /* smpl format version */ -+ u32 reserved1; /* for future use */ -+ u64 reserved2[5]; /* for future use */ -+ struct pfm_ds_area_core ds; /* data save area */ -+}; -+ -+/* -+ * Sample format as mandated by Intel documentation. -+ * The same format is used in both 32 and 64 bit modes. -+ */ -+struct pfm_pebs_core_smpl_entry { -+ u64 eflags; -+ u64 ip; -+ u64 eax; -+ u64 ebx; -+ u64 ecx; -+ u64 edx; -+ u64 esi; -+ u64 edi; -+ u64 ebp; -+ u64 esp; -+ u64 r8; /* 0 in 32-bit mode */ -+ u64 r9; /* 0 in 32-bit mode */ -+ u64 r10; /* 0 in 32-bit mode */ -+ u64 r11; /* 0 in 32-bit mode */ -+ u64 r12; /* 0 in 32-bit mode */ -+ u64 r13; /* 0 in 32-bit mode */ -+ u64 r14; /* 0 in 32-bit mode */ -+ u64 r15; /* 0 in 32-bit mode */ -+}; -+ -+#define PFM_PEBS_CORE_SMPL_VERSION_MAJ 1U -+#define PFM_PEBS_CORE_SMPL_VERSION_MIN 0U -+#define PFM_PEBS_CORE_SMPL_VERSION (((PFM_PEBS_CORE_SMPL_VERSION_MAJ&0xffff)<<16)|\ -+ (PFM_PEBS_CORE_SMPL_VERSION_MIN & 0xffff)) -+ -+#endif /* __PERFMON_PEBS_CORE_SMPL_H__ */ -diff --git a/include/asm-x86/perfmon_pebs_p4_smpl.h b/include/asm-x86/perfmon_pebs_p4_smpl.h -new file mode 100644 -index 0000000..26b51b4 ---- /dev/null -+++ b/include/asm-x86/perfmon_pebs_p4_smpl.h -@@ -0,0 +1,193 @@ -+/* -+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ * -+ * This file implements the sampling format to support Intel -+ * Precise Event Based Sampling (PEBS) feature of Pentium 4 -+ * and other Netburst-based processors. Not to be used for -+ * Intel Core-based processors. -+ * -+ * What is PEBS? -+ * ------------ -+ * This is a hardware feature to enhance sampling by providing -+ * better precision as to where a sample is taken. This avoids the -+ * typical skew in the instruction one can observe with any -+ * interrupt-based sampling technique. -+ * -+ * PEBS also lowers sampling overhead significantly by having the -+ * processor store samples instead of the OS. PMU interrupt are only -+ * generated after multiple samples are written. -+ * -+ * Another benefit of PEBS is that samples can be captured inside -+ * critical sections where interrupts are masked. -+ * -+ * How does it work? -+ * PEBS effectively implements a Hw buffer. The Os must pass a region -+ * of memory where samples are to be stored. The region can have any -+ * size. The OS must also specify the sampling period to reload. The PMU -+ * will interrupt when it reaches the end of the buffer or a specified -+ * threshold location inside the memory region. -+ * -+ * The description of the buffer is stored in the Data Save Area (DS). -+ * The samples are stored sequentially in the buffer. The format of the -+ * buffer is fixed and specified in the PEBS documentation. The sample -+ * format changes between 32-bit and 64-bit modes due to extended register -+ * file. -+ * -+ * PEBS does not work when HyperThreading is enabled due to certain MSR -+ * being shared being to two threads. -+ * -+ * What does the format do? -+ * It provides access to the PEBS feature for both 32-bit and 64-bit -+ * processors that support it. -+ * -+ * The same code is used for both 32-bit and 64-bit modes, but different -+ * format names are used because the two modes are not compatible due to -+ * data model and register file differences. Similarly the public data -+ * structures describing the samples are different. -+ * -+ * It is important to realize that the format provides a zero-copy environment -+ * for the samples, i.e,, the OS never touches the samples. Whatever the -+ * processor write is directly accessible to the user. -+ * -+ * Parameters to the buffer can be passed via pfm_create_context() in -+ * the pfm_pebs_smpl_arg structure. -+ * -+ * It is not possible to mix a 32-bit PEBS application on top of a 64-bit -+ * host kernel. -+ */ -+#ifndef __PERFMON_PEBS_P4_SMPL_H__ -+#define __PERFMON_PEBS_P4_SMPL_H__ 1 -+ -+#ifdef __i386__ -+/* -+ * The 32-bit and 64-bit formats are not compatible, thus we have -+ * two different identifications so that 32-bit programs running on -+ * 64-bit OS will fail to use the 64-bit PEBS support. -+ */ -+#define PFM_PEBS_P4_SMPL_NAME "pebs32_p4" -+#else -+#define PFM_PEBS_P4_SMPL_NAME "pebs64_p4" -+#endif -+ -+/* -+ * format specific parameters (passed at context creation) -+ * -+ * intr_thres: index from start of buffer of entry where the -+ * PMU interrupt must be triggered. It must be several samples -+ * short of the end of the buffer. -+ */ -+struct pfm_pebs_p4_smpl_arg { -+ u64 cnt_reset; /* counter reset value */ -+ size_t buf_size; /* size of the PEBS buffer in bytes */ -+ size_t intr_thres;/* index of PEBS interrupt threshold entry */ -+ u64 reserved[6]; /* for future use */ -+}; -+ -+/* -+ * Data Save Area (32 and 64-bit mode) -+ * -+ * The DS area must be exposed to the user because this is the only -+ * way to report on the number of valid entries recorded by the CPU. -+ * This is required when the buffer is not full, i..e, there was not -+ * PMU interrupt. -+ * -+ * Layout of the structure is mandated by hardware and specified in -+ * the Intel documentation. -+ */ -+struct pfm_ds_area_p4 { -+ unsigned long bts_buf_base; -+ unsigned long bts_index; -+ unsigned long bts_abs_max; -+ unsigned long bts_intr_thres; -+ unsigned long pebs_buf_base; -+ unsigned long pebs_index; -+ unsigned long pebs_abs_max; -+ unsigned long pebs_intr_thres; -+ u64 pebs_cnt_reset; -+}; -+ -+/* -+ * This header is at the beginning of the sampling buffer returned to the user. -+ * -+ * Because of PEBS alignement constraints, the actual PEBS buffer area does -+ * not necessarily begin right after the header. The hdr_start_offs must be -+ * used to compute the first byte of the buffer. The offset is defined as -+ * the number of bytes between the end of the header and the beginning of -+ * the buffer. As such the formula is: -+ * actual_buffer = (unsigned long)(hdr+1)+hdr->hdr_start_offs -+ */ -+struct pfm_pebs_p4_smpl_hdr { -+ u64 overflows; /* #overflows for buffer */ -+ size_t buf_size; /* bytes in the buffer */ -+ size_t start_offs; /* actual buffer start offset */ -+ u32 version; /* smpl format version */ -+ u32 reserved1; /* for future use */ -+ u64 reserved2[5]; /* for future use */ -+ struct pfm_ds_area_p4 ds; /* data save area */ -+}; -+ -+/* -+ * 64-bit PEBS record format is described in -+ * http://www.intel.com/technology/64bitextensions/30083502.pdf -+ * -+ * The format does not peek at samples. The sample structure is only -+ * used to ensure that the buffer is large enough to accomodate one -+ * sample. -+ */ -+#ifdef __i386__ -+struct pfm_pebs_p4_smpl_entry { -+ u32 eflags; -+ u32 ip; -+ u32 eax; -+ u32 ebx; -+ u32 ecx; -+ u32 edx; -+ u32 esi; -+ u32 edi; -+ u32 ebp; -+ u32 esp; -+}; -+#else -+struct pfm_pebs_p4_smpl_entry { -+ u64 eflags; -+ u64 ip; -+ u64 eax; -+ u64 ebx; -+ u64 ecx; -+ u64 edx; -+ u64 esi; -+ u64 edi; -+ u64 ebp; -+ u64 esp; -+ u64 r8; -+ u64 r9; -+ u64 r10; -+ u64 r11; -+ u64 r12; -+ u64 r13; -+ u64 r14; -+ u64 r15; -+}; -+#endif -+ -+#define PFM_PEBS_P4_SMPL_VERSION_MAJ 1U -+#define PFM_PEBS_P4_SMPL_VERSION_MIN 0U -+#define PFM_PEBS_P4_SMPL_VERSION (((PFM_PEBS_P4_SMPL_VERSION_MAJ&0xffff)<<16)|\ -+ (PFM_PEBS_P4_SMPL_VERSION_MIN & 0xffff)) -+ -+#endif /* __PERFMON_PEBS_P4_SMPL_H__ */ -diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h -index da0a675..b3a6ae9 100644 ---- a/include/asm-x86/thread_info.h -+++ b/include/asm-x86/thread_info.h -@@ -71,6 +71,7 @@ struct thread_info { - * Warning: layout of LSW is hardcoded in entry.S - */ - #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -+#define TIF_PERFMON_WORK 1 /* work for pfm_handle_work() */ - #define TIF_SIGPENDING 2 /* signal pending */ - #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ - #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -@@ -91,6 +92,7 @@ struct thread_info { - #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ - #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ - #define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ -+#define TIF_PERFMON_CTXSW 28 /* perfmon needs ctxsw calls */ - - #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) - #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) -@@ -112,6 +114,8 @@ struct thread_info { - #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) - #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) - #define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) -+#define _TIF_PERFMON_WORK (1< -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+ -+#ifndef __LINUX_PERFMON_H__ -+#define __LINUX_PERFMON_H__ -+ -+/* -+ * This file contains all the user visible generic definitions for the -+ * interface. Model-specific user-visible definitions are located in -+ * the asm/perfmon.h file. -+ */ -+ -+/* -+ * include arch-specific user interface definitions -+ */ -+#include -+ -+/* -+ * defined by each arch -+ */ -+#define PFM_MAX_PMCS PFM_ARCH_MAX_PMCS -+#define PFM_MAX_PMDS PFM_ARCH_MAX_PMDS -+ -+/* -+ * number of elements for each type of bitvector -+ * all bitvectors use u64 fixed size type on all architectures. -+ */ -+#define PFM_BVSIZE(x) (((x)+(sizeof(__u64)<<3)-1) / (sizeof(__u64)<<3)) -+#define PFM_PMD_BV PFM_BVSIZE(PFM_MAX_PMDS) -+#define PFM_PMC_BV PFM_BVSIZE(PFM_MAX_PMCS) -+ -+/* -+ * register flags layout: -+ * bit[00-15] : generic flags -+ * bit[16-31] : arch-specific flags -+ * -+ * PFM_REGFL_NO_EMUL64: must be set on the PMC controlling the PMD -+ */ -+#define PFM_REGFL_OVFL_NOTIFY 0x1 /* PMD: send notification on event */ -+#define PFM_REGFL_RANDOM 0x2 /* PMD: randomize value after event */ -+#define PFM_REGFL_NO_EMUL64 0x4 /* PMC: no 64-bit emulation */ -+ -+/* -+ * event set flags layout: -+ * bits[00-15] : generic flags -+ * bits[16-31] : arch-specific flags (see asm/perfmon.h) -+ */ -+#define PFM_SETFL_OVFL_SWITCH 0x01 /* enable switch on overflow */ -+#define PFM_SETFL_TIME_SWITCH 0x02 /* enable switch on timeout */ -+ -+/* -+ * argument to pfm_create_context() system call -+ * structure shared with user level -+ */ -+struct pfarg_ctx { -+ __u32 ctx_flags; /* noblock/block/syswide */ -+ __u32 ctx_reserved1; /* for future use */ -+ __u64 ctx_reserved2[7]; /* for future use */ -+}; -+ -+/* -+ * context flags layout: -+ * bits[00-15]: generic flags -+ * bits[16-31]: arch-specific flags (see perfmon_const.h) -+ */ -+#define PFM_FL_NOTIFY_BLOCK 0x01 /* block task on user notifications */ -+#define PFM_FL_SYSTEM_WIDE 0x02 /* create a system wide context */ -+#define PFM_FL_OVFL_NO_MSG 0x80 /* no overflow msgs */ -+ -+/* -+ * argument to pfm_write_pmcs() system call. -+ * structure shared with user level -+ */ -+struct pfarg_pmc { -+ __u16 reg_num; /* which register */ -+ __u16 reg_set; /* event set for this register */ -+ __u32 reg_flags; /* REGFL flags */ -+ __u64 reg_value; /* pmc value */ -+ __u64 reg_reserved2[4]; /* for future use */ -+}; -+ -+/* -+ * argument to pfm_write_pmds() and pfm_read_pmds() system calls. -+ * structure shared with user level -+ */ -+struct pfarg_pmd { -+ __u16 reg_num; /* which register */ -+ __u16 reg_set; /* event set for this register */ -+ __u32 reg_flags; /* REGFL flags */ -+ __u64 reg_value; /* initial pmc/pmd value */ -+ __u64 reg_long_reset; /* value to reload after notification */ -+ __u64 reg_short_reset; /* reset after counter overflow */ -+ __u64 reg_last_reset_val; /* return: PMD last reset value */ -+ __u64 reg_ovfl_switch_cnt; /* #overflows before switch */ -+ __u64 reg_reset_pmds[PFM_PMD_BV]; /* reset on overflow */ -+ __u64 reg_smpl_pmds[PFM_PMD_BV]; /* record in sample */ -+ __u64 reg_smpl_eventid; /* opaque event identifier */ -+ __u64 reg_random_mask; /* bitmask used to limit random value */ -+ __u32 reg_random_seed; /* seed for randomization (OBSOLETE) */ -+ __u32 reg_reserved2[7]; /* for future use */ -+}; -+ -+/* -+ * optional argument to pfm_start() system call. Pass NULL if not needed. -+ * structure shared with user level -+ */ -+struct pfarg_start { -+ __u16 start_set; /* event set to start with */ -+ __u16 start_reserved1; /* for future use */ -+ __u32 start_reserved2; /* for future use */ -+ __u64 reserved3[3]; /* for future use */ -+}; -+ -+/* -+ * argument to pfm_load_context() system call. -+ * structure shared with user level -+ */ -+struct pfarg_load { -+ __u32 load_pid; /* thread or CPU to attach to */ -+ __u16 load_set; /* set to load first */ -+ __u16 load_reserved1; /* for future use */ -+ __u64 load_reserved2[3]; /* for future use */ -+}; -+ -+/* -+ * argument to pfm_create_evtsets() and pfm_delete_evtsets() system calls. -+ * structure shared with user level. -+ */ -+struct pfarg_setdesc { -+ __u16 set_id; /* which set */ -+ __u16 set_reserved1; /* for future use */ -+ __u32 set_flags; /* SETFL flags */ -+ __u64 set_timeout; /* switch timeout in nsecs */ -+ __u64 reserved[6]; /* for future use */ -+}; -+ -+/* -+ * argument to pfm_getinfo_evtsets() system call. -+ * structure shared with user level -+ */ -+struct pfarg_setinfo { -+ __u16 set_id; /* which set */ -+ __u16 set_reserved1; /* for future use */ -+ __u32 set_flags; /* out: SETFL flags */ -+ __u64 set_ovfl_pmds[PFM_PMD_BV]; /* out: last ovfl PMDs */ -+ __u64 set_runs; /* out: #times the set was active */ -+ __u64 set_timeout; /* out: eff/leftover timeout (nsecs) */ -+ __u64 set_act_duration; /* out: time set was active in nsecs */ -+ __u64 set_avail_pmcs[PFM_PMC_BV];/* out: available PMCs */ -+ __u64 set_avail_pmds[PFM_PMD_BV];/* out: available PMDs */ -+ __u64 set_reserved3[6]; /* for future use */ -+}; -+ -+/* -+ * default value for the user and group security parameters in -+ * /proc/sys/kernel/perfmon/sys_group -+ * /proc/sys/kernel/perfmon/task_group -+ */ -+#define PFM_GROUP_PERM_ANY -1 /* any user/group */ -+ -+/* -+ * overflow notification message. -+ * structure shared with user level -+ */ -+struct pfarg_ovfl_msg { -+ __u32 msg_type; /* message type: PFM_MSG_OVFL */ -+ __u32 msg_ovfl_pid; /* process id */ -+ __u16 msg_active_set; /* active set at overflow */ -+ __u16 msg_ovfl_cpu; /* cpu of PMU interrupt */ -+ __u32 msg_ovfl_tid; /* thread id */ -+ __u64 msg_ovfl_ip; /* IP on PMU intr */ -+ __u64 msg_ovfl_pmds[PFM_PMD_BV];/* overflowed PMDs */ -+}; -+ -+#define PFM_MSG_OVFL 1 /* an overflow happened */ -+#define PFM_MSG_END 2 /* task to which context was attached ended */ -+ -+/* -+ * generic notification message (union). -+ * union shared with user level -+ */ -+union pfarg_msg { -+ __u32 type; -+ struct pfarg_ovfl_msg pfm_ovfl_msg; -+}; -+ -+/* -+ * perfmon version number -+ */ -+#define PFM_VERSION_MAJ 2U -+#define PFM_VERSION_MIN 82U -+#define PFM_VERSION (((PFM_VERSION_MAJ&0xffff)<<16)|\ -+ (PFM_VERSION_MIN & 0xffff)) -+#define PFM_VERSION_MAJOR(x) (((x)>>16) & 0xffff) -+#define PFM_VERSION_MINOR(x) ((x) & 0xffff) -+ -+#endif /* __LINUX_PERFMON_H__ */ -diff --git a/include/linux/perfmon_dfl_smpl.h b/include/linux/perfmon_dfl_smpl.h -new file mode 100644 -index 0000000..e0817a8 ---- /dev/null -+++ b/include/linux/perfmon_dfl_smpl.h -@@ -0,0 +1,78 @@ -+/* -+ * Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This file implements the new dfl sampling buffer format -+ * for perfmon2 subsystem. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#ifndef __PERFMON_DFL_SMPL_H__ -+#define __PERFMON_DFL_SMPL_H__ 1 -+ -+/* -+ * format specific parameters (passed at context creation) -+ */ -+struct pfm_dfl_smpl_arg { -+ __u64 buf_size; /* size of the buffer in bytes */ -+ __u32 buf_flags; /* buffer specific flags */ -+ __u32 reserved1; /* for future use */ -+ __u64 reserved[6]; /* for future use */ -+}; -+ -+/* -+ * This header is at the beginning of the sampling buffer returned to the user. -+ * It is directly followed by the first record. -+ */ -+struct pfm_dfl_smpl_hdr { -+ __u64 hdr_count; /* how many valid entries */ -+ __u64 hdr_cur_offs; /* current offset from top of buffer */ -+ __u64 hdr_overflows; /* #overflows for buffer */ -+ __u64 hdr_buf_size; /* bytes in the buffer */ -+ __u64 hdr_min_buf_space;/* minimal buffer size (internal use) */ -+ __u32 hdr_version; /* smpl format version */ -+ __u32 hdr_buf_flags; /* copy of buf_flags */ -+ __u64 hdr_reserved[10]; /* for future use */ -+}; -+ -+/* -+ * Entry header in the sampling buffer. The header is directly followed -+ * with the values of the PMD registers of interest saved in increasing -+ * index order: PMD4, PMD5, and so on. How many PMDs are present depends -+ * on how the session was programmed. -+ * -+ * In the case where multiple counters overflow at the same time, multiple -+ * entries are written consecutively. -+ * -+ * last_reset_value member indicates the initial value of the overflowed PMD. -+ */ -+struct pfm_dfl_smpl_entry { -+ __u32 pid; /* thread id (for NPTL, this is gettid()) */ -+ __u16 ovfl_pmd; /* index of overflowed PMD for this sample */ -+ __u16 reserved; /* for future use */ -+ __u64 last_reset_val; /* initial value of overflowed PMD */ -+ __u64 ip; /* where did the overflow intr happened */ -+ __u64 tstamp; /* overflow timetamp */ -+ __u16 cpu; /* cpu on which the overfow occurred */ -+ __u16 set; /* event set active when overflow ocurred */ -+ __u32 tgid; /* thread group id (getpid() for NPTL) */ -+}; -+ -+#define PFM_DFL_SMPL_VERSION_MAJ 1U -+#define PFM_DFL_SMPL_VERSION_MIN 0U -+#define PFM_DFL_SMPL_VERSION (((PFM_DFL_SMPL_VERSION_MAJ&0xffff)<<16)|\ -+ (PFM_DFL_SMPL_VERSION_MIN & 0xffff)) -+ -+#endif /* __PERFMON_DFL_SMPL_H__ */ -diff --git a/include/linux/perfmon_fmt.h b/include/linux/perfmon_fmt.h -new file mode 100644 -index 0000000..82a6a90 ---- /dev/null -+++ b/include/linux/perfmon_fmt.h -@@ -0,0 +1,74 @@ -+/* -+ * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * Interface for custom sampling buffer format modules -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#ifndef __PERFMON_FMT_H__ -+#define __PERFMON_FMT_H__ 1 -+ -+#include -+ -+typedef int (*fmt_validate_t)(u32 flags, u16 npmds, void *arg); -+typedef int (*fmt_getsize_t)(u32 flags, void *arg, size_t *size); -+typedef int (*fmt_init_t)(struct pfm_context *ctx, void *buf, u32 flags, -+ u16 nmpds, void *arg); -+typedef int (*fmt_restart_t)(int is_active, u32 *ovfl_ctrl, void *buf); -+typedef int (*fmt_exit_t)(void *buf); -+typedef int (*fmt_handler_t)(struct pfm_context *ctx, -+ unsigned long ip, u64 stamp, void *data); -+ -+struct pfm_smpl_fmt { -+ char *fmt_name; /* name of the format (required) */ -+ size_t fmt_arg_size; /* size of fmt args for ctx create */ -+ u32 fmt_flags; /* format specific flags */ -+ u32 fmt_version; /* format version number */ -+ -+ fmt_validate_t fmt_validate; /* validate context flags */ -+ fmt_getsize_t fmt_getsize; /* get size for sampling buffer */ -+ fmt_init_t fmt_init; /* initialize buffer area */ -+ fmt_handler_t fmt_handler; /* overflow handler (required) */ -+ fmt_restart_t fmt_restart; /* restart after notification */ -+ fmt_exit_t fmt_exit; /* context termination */ -+ -+ struct list_head fmt_list; /* internal use only */ -+ -+ struct kobject kobj; /* sysfs internal use only */ -+ struct module *owner; /* pointer to module owner */ -+ u32 fmt_qdepth; /* Max notify queue depth (required) */ -+}; -+#define to_smpl_fmt(n) container_of(n, struct pfm_smpl_fmt, kobj) -+ -+#define PFM_FMTFL_IS_BUILTIN 0x1 /* fmt is compiled in */ -+/* -+ * we need to know whether the format is builtin or compiled -+ * as a module -+ */ -+#ifdef MODULE -+#define PFM_FMT_BUILTIN_FLAG 0 /* not built as a module */ -+#else -+#define PFM_FMT_BUILTIN_FLAG PFM_PMUFL_IS_BUILTIN /* built as a module */ -+#endif -+ -+int pfm_fmt_register(struct pfm_smpl_fmt *fmt); -+int pfm_fmt_unregister(struct pfm_smpl_fmt *fmt); -+void pfm_sysfs_builtin_fmt_add(void); -+ -+int pfm_sysfs_add_fmt(struct pfm_smpl_fmt *fmt); -+void pfm_sysfs_remove_fmt(struct pfm_smpl_fmt *fmt); -+ -+#endif /* __PERFMON_FMT_H__ */ -diff --git a/include/linux/perfmon_kern.h b/include/linux/perfmon_kern.h -new file mode 100644 -index 0000000..6c3b527 ---- /dev/null -+++ b/include/linux/perfmon_kern.h -@@ -0,0 +1,551 @@ -+/* -+ * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+ -+#ifndef __LINUX_PERFMON_KERN_H__ -+#define __LINUX_PERFMON_KERN_H__ -+/* -+ * This file contains all the definitions of data structures, variables, macros -+ * that are to be shared between generic code and arch-specific code -+ * -+ * For generic only definitions, use perfmon/perfmon_priv.h -+ */ -+#ifdef CONFIG_PERFMON -+ -+#include -+#include -+#include -+ -+/* -+ * system adminstrator configuration controls available via -+ * the /sys/kerne/perfmon interface -+ */ -+struct pfm_controls { -+ u32 debug; /* debugging control bitmask */ -+ gid_t sys_group; /* gid to create a syswide context */ -+ gid_t task_group; /* gid to create a per-task context */ -+ u32 flags; /* control flags (see below) */ -+ size_t arg_mem_max; /* maximum vector argument size */ -+ size_t smpl_buffer_mem_max; /* max buf mem, -1 for infinity */ -+}; -+extern struct pfm_controls pfm_controls; -+ -+/* -+ * control flags -+ */ -+#define PFM_CTRL_FL_RW_EXPERT 0x1 /* bypass reserved fields on read/write */ -+ -+/* -+ * software PMD -+ */ -+struct pfm_pmd { -+ u64 value; /* 64-bit value */ -+ u64 lval; /* last reset value */ -+ u64 ovflsw_thres; /* #ovfls left before switch */ -+ u64 long_reset; /* long reset value on overflow */ -+ u64 short_reset; /* short reset value on overflow */ -+ u64 reset_pmds[PFM_PMD_BV]; /* pmds to reset on overflow */ -+ u64 smpl_pmds[PFM_PMD_BV]; /* pmds to record on overflow */ -+ u64 mask; /* range mask for random value */ -+ u64 ovflsw_ref_thres; /* #ovfls before next set */ -+ u64 eventid; /* opaque event identifier */ -+ u32 flags; /* notify/do not notify */ -+}; -+ -+/* -+ * event_set: encapsulates the full PMU state -+ */ -+struct pfm_event_set { -+ struct list_head list; /* ordered chain of sets */ -+ u16 id; /* set identification */ -+ u16 nused_pmds; /* max number of used PMDs */ -+ u16 nused_pmcs; /* max number of used PMCs */ -+ u16 pad1; /* paddding */ -+ u32 flags; /* public flags */ -+ u32 priv_flags; /* private flags (see below) */ -+ u64 runs; /* # of activations */ -+ u32 npend_ovfls; /* number of pending PMD overflow */ -+ u32 pad2; /* padding */ -+ u64 used_pmds[PFM_PMD_BV]; /* used PMDs */ -+ u64 povfl_pmds[PFM_PMD_BV]; /* pending overflowed PMDs */ -+ u64 ovfl_pmds[PFM_PMD_BV]; /* last overflowed PMDs */ -+ u64 reset_pmds[PFM_PMD_BV]; /* PMDs to reset after overflow */ -+ u64 ovfl_notify[PFM_PMD_BV]; /* notify on overflow */ -+ u64 used_pmcs[PFM_PMC_BV]; /* used PMCs */ -+ u64 pmcs[PFM_MAX_PMCS]; /* PMC values */ -+ -+ struct pfm_pmd pmds[PFM_MAX_PMDS]; -+ -+ ktime_t hrtimer_exp; /* switch timeout reference */ -+ ktime_t hrtimer_rem; /* per-thread remainder timeout */ -+ -+ u64 duration_start; /* start time in ns */ -+ u64 duration; /* total active ns */ -+}; -+ -+/* -+ * common private event set flags (priv_flags) -+ * -+ * upper 16 bits: for arch-specific use -+ * lower 16 bits: for common use -+ */ -+#define PFM_SETFL_PRIV_MOD_PMDS 0x1 /* PMD register(s) modified */ -+#define PFM_SETFL_PRIV_MOD_PMCS 0x2 /* PMC register(s) modified */ -+#define PFM_SETFL_PRIV_SWITCH 0x4 /* must switch set on restart */ -+#define PFM_SETFL_PRIV_MOD_BOTH (PFM_SETFL_PRIV_MOD_PMDS \ -+ | PFM_SETFL_PRIV_MOD_PMCS) -+ -+/* -+ * context flags -+ */ -+struct pfm_context_flags { -+ unsigned int block:1; /* task blocks on user notifications */ -+ unsigned int system:1; /* do system wide monitoring */ -+ unsigned int no_msg:1; /* no message sent on overflow */ -+ unsigned int switch_ovfl:1; /* switch set on counter ovfl */ -+ unsigned int switch_time:1; /* switch set on timeout */ -+ unsigned int started:1; /* pfm_start() issued */ -+ unsigned int work_type:2; /* type of work for pfm_handle_work */ -+ unsigned int mmap_nlock:1; /* no lock in pfm_release_buf_space */ -+ unsigned int ia64_v20_compat:1; /* context is IA-64 v2.0 mode */ -+ unsigned int can_restart:8; /* allowed to issue a PFM_RESTART */ -+ unsigned int reset_count:8; /* number of pending resets */ -+ unsigned int is_self:1; /* per-thread and self-montoring */ -+ unsigned int reserved:5; /* for future use */ -+}; -+ -+/* -+ * values for work_type (TIF_PERFMON_WORK must be set) -+ */ -+#define PFM_WORK_NONE 0 /* nothing to do */ -+#define PFM_WORK_RESET 1 /* reset overflowed counters */ -+#define PFM_WORK_BLOCK 2 /* block current thread */ -+#define PFM_WORK_ZOMBIE 3 /* cleanup zombie context */ -+ -+/* -+ * overflow description argument passed to sampling format -+ */ -+struct pfm_ovfl_arg { -+ u16 ovfl_pmd; /* index of overflowed PMD */ -+ u16 active_set; /* set active at the time of the overflow */ -+ u32 ovfl_ctrl; /* control flags */ -+ u64 pmd_last_reset; /* last reset value of overflowed PMD */ -+ u64 smpl_pmds_values[PFM_MAX_PMDS]; /* values of other PMDs */ -+ u64 pmd_eventid; /* eventid associated with PMD */ -+ u16 num_smpl_pmds; /* number of PMDS in smpl_pmd_values */ -+}; -+/* -+ * depth of message queue -+ * -+ * Depth cannot be bigger than 255 (see reset_count) -+ */ -+#define PFM_MSGS_ORDER 3 /* log2(number of messages) */ -+#define PFM_MSGS_COUNT (1</proc/sys/kernel/printk_ratelimit -+ * -+ * debug is a bitmask where bits are defined as follows: -+ * bit 0: enable non-interrupt code degbug messages -+ * bit 1: enable interrupt code debug messages -+ */ -+#ifdef CONFIG_PERFMON_DEBUG -+#define _PFM_DBG(lm, f, x...) \ -+ do { \ -+ if (unlikely((pfm_controls.debug & lm) && printk_ratelimit())) { \ -+ preempt_disable(); \ -+ printk("perfmon: %s.%d: CPU%d [%d]: " f "\n", \ -+ __func__, __LINE__, \ -+ smp_processor_id(), current->pid , ## x); \ -+ preempt_enable(); \ -+ } \ -+ } while (0) -+ -+#define PFM_DBG(f, x...) _PFM_DBG(0x1, f, ##x) -+#define PFM_DBG_ovfl(f, x...) _PFM_DBG(0x2, f, ## x) -+#else -+#define PFM_DBG(f, x...) do {} while (0) -+#define PFM_DBG_ovfl(f, x...) do {} while (0) -+#endif -+ -+extern struct pfm_pmu_config *pfm_pmu_conf; -+extern int perfmon_disabled; -+ -+static inline struct pfm_arch_context *pfm_ctx_arch(struct pfm_context *c) -+{ -+ return (struct pfm_arch_context *)(c+1); -+} -+ -+int pfm_get_args(void __user *ureq, size_t sz, size_t lsz, void *laddr, -+ void **req, void **to_free); -+ -+int pfm_get_smpl_arg(char __user *fmt_uname, void __user *uaddr, size_t usize, -+ void **arg, struct pfm_smpl_fmt **fmt); -+ -+int __pfm_write_pmcs(struct pfm_context *ctx, struct pfarg_pmc *req, -+ int count); -+int __pfm_write_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count, -+ int compat); -+int __pfm_read_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count); -+ -+int __pfm_load_context(struct pfm_context *ctx, struct pfarg_load *req, -+ struct task_struct *task); -+int __pfm_unload_context(struct pfm_context *ctx, int *can_release); -+ -+int __pfm_stop(struct pfm_context *ctx, int *release_info); -+int __pfm_restart(struct pfm_context *ctx, int *unblock); -+int __pfm_start(struct pfm_context *ctx, struct pfarg_start *start); -+ -+void pfm_free_context(struct pfm_context *ctx); -+ -+void pfm_smpl_buf_space_release(struct pfm_context *ctx, size_t size); -+ -+int pfm_check_task_state(struct pfm_context *ctx, int check_mask, -+ unsigned long *flags, void **resume); -+/* -+ * check_mask bitmask values for pfm_check_task_state() -+ */ -+#define PFM_CMD_STOPPED 0x01 /* command needs thread stopped */ -+#define PFM_CMD_UNLOADED 0x02 /* command needs ctx unloaded */ -+#define PFM_CMD_UNLOAD 0x04 /* command is unload */ -+ -+int __pfm_create_context(struct pfarg_ctx *req, -+ struct pfm_smpl_fmt *fmt, -+ void *fmt_arg, -+ int mode, -+ struct pfm_context **new_ctx); -+ -+struct pfm_event_set *pfm_find_set(struct pfm_context *ctx, u16 set_id, -+ int alloc); -+ -+int pfm_pmu_conf_get(int autoload); -+void pfm_pmu_conf_put(void); -+ -+int pfm_session_allcpus_acquire(void); -+void pfm_session_allcpus_release(void); -+ -+int pfm_smpl_buf_alloc(struct pfm_context *ctx, size_t rsize); -+void pfm_smpl_buf_free(struct pfm_context *ctx); -+ -+struct pfm_smpl_fmt *pfm_smpl_fmt_get(char *name); -+void pfm_smpl_fmt_put(struct pfm_smpl_fmt *fmt); -+ -+void pfm_interrupt_handler(unsigned long iip, struct pt_regs *regs); -+ -+void pfm_resume_task(struct task_struct *t, void *data); -+ -+#include -+#include -+ -+extern const struct file_operations pfm_file_ops; -+/* -+ * upper limit for count in calls that take vector arguments. This is used -+ * to prevent for multiplication overflow when we compute actual storage size -+ */ -+#define PFM_MAX_ARG_COUNT(m) (INT_MAX/sizeof(*(m))) -+ -+#define cast_ulp(_x) ((unsigned long *)_x) -+ -+#define PFM_NORMAL 0 -+#define PFM_COMPAT 1 -+ -+void __pfm_exit_thread(void); -+void pfm_ctxsw_in(struct task_struct *prev, struct task_struct *next); -+void pfm_ctxsw_out(struct task_struct *prev, struct task_struct *next); -+void pfm_handle_work(struct pt_regs *regs); -+void __pfm_init_percpu(void *dummy); -+void pfm_save_pmds(struct pfm_context *ctx, struct pfm_event_set *set); -+ -+static inline void pfm_exit_thread(void) -+{ -+ if (current->pfm_context) -+ __pfm_exit_thread(); -+} -+ -+/* -+ * include arch-specific kernel level definitions -+ */ -+#include -+ -+static inline void pfm_copy_thread(struct task_struct *task) -+{ -+ /* -+ * context or perfmon TIF state is NEVER inherited -+ * in child task. Holds for per-thread and system-wide -+ */ -+ task->pfm_context = NULL; -+ clear_tsk_thread_flag(task, TIF_PERFMON_CTXSW); -+ clear_tsk_thread_flag(task, TIF_PERFMON_WORK); -+ pfm_arch_disarm_handle_work(task); -+} -+ -+ -+/* -+ * read a single PMD register. -+ * -+ * virtual PMD registers have special handler. -+ * Depends on definitions in asm/perfmon_kern.h -+ */ -+static inline u64 pfm_read_pmd(struct pfm_context *ctx, unsigned int cnum) -+{ -+ if (unlikely(pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_V)) -+ return pfm_pmu_conf->pmd_sread(ctx, cnum); -+ -+ return pfm_arch_read_pmd(ctx, cnum); -+} -+/* -+ * write a single PMD register. -+ * -+ * virtual PMD registers have special handler. -+ * Depends on definitions in asm/perfmon_kern.h -+ */ -+static inline void pfm_write_pmd(struct pfm_context *ctx, unsigned int cnum, -+ u64 value) -+{ -+ /* -+ * PMD writes are ignored for read-only registers -+ */ -+ if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_RO) -+ return; -+ -+ if (pfm_pmu_conf->pmd_desc[cnum].type & PFM_REG_V) { -+ pfm_pmu_conf->pmd_swrite(ctx, cnum, value); -+ return; -+ } -+ /* -+ * clear unimplemented bits -+ */ -+ value &= ~pfm_pmu_conf->pmd_desc[cnum].rsvd_msk; -+ -+ pfm_arch_write_pmd(ctx, cnum, value); -+} -+ -+void __pfm_init_percpu(void *dummy); -+ -+static inline void pfm_init_percpu(void) -+{ -+ __pfm_init_percpu(NULL); -+} -+ -+/* -+ * pfm statistics are available via debugfs -+ * and perfmon subdir. -+ * -+ * When adding/removing new stats, make sure you also -+ * update the name table in perfmon_debugfs.c -+ */ -+enum pfm_stats_names { -+ PFM_ST_ovfl_intr_all_count = 0, -+ PFM_ST_ovfl_intr_ns, -+ PFM_ST_ovfl_intr_spurious_count, -+ PFM_ST_ovfl_intr_replay_count, -+ PFM_ST_ovfl_intr_regular_count, -+ PFM_ST_handle_work_count, -+ PFM_ST_ovfl_notify_count, -+ PFM_ST_reset_pmds_count, -+ PFM_ST_pfm_restart_count, -+ PFM_ST_fmt_handler_calls, -+ PFM_ST_fmt_handler_ns, -+ PFM_ST_set_switch_count, -+ PFM_ST_set_switch_ns, -+ PFM_ST_set_switch_exp, -+ PFM_ST_ctxswin_count, -+ PFM_ST_ctxswin_ns, -+ PFM_ST_handle_timeout_count, -+ PFM_ST_ovfl_intr_nmi_count, -+ PFM_ST_ctxswout_count, -+ PFM_ST_ctxswout_ns, -+ PFM_ST_LAST /* last entry marked */ -+}; -+#define PFM_NUM_STATS PFM_ST_LAST -+ -+struct pfm_stats { -+ u64 v[PFM_NUM_STATS]; -+ struct dentry *dirs[PFM_NUM_STATS]; -+ struct dentry *cpu_dir; -+ char cpu_name[8]; -+}; -+ -+#ifdef CONFIG_PERFMON_DEBUG_FS -+#define pfm_stats_get(x) __get_cpu_var(pfm_stats).v[PFM_ST_##x] -+#define pfm_stats_inc(x) __get_cpu_var(pfm_stats).v[PFM_ST_##x]++ -+#define pfm_stats_add(x, y) __get_cpu_var(pfm_stats).v[PFM_ST_##x] += (y) -+void pfm_reset_stats(int cpu); -+#else -+#define pfm_stats_get(x) -+#define pfm_stats_inc(x) -+#define pfm_stats_add(x, y) -+static inline void pfm_reset_stats(int cpu) -+{} -+#endif -+ -+ -+ -+DECLARE_PER_CPU(struct pfm_context *, pmu_ctx); -+DECLARE_PER_CPU(struct pfm_stats, pfm_stats); -+DECLARE_PER_CPU(struct task_struct *, pmu_owner); -+ -+void pfm_cpu_disable(void); -+ -+ -+/* -+ * max vector argument elements for local storage (no kmalloc/kfree) -+ * The PFM_ARCH_PM*_ARG should be defined in perfmon_kern.h. -+ * If not, default (conservative) values are used -+ */ -+#ifndef PFM_ARCH_PMC_STK_ARG -+#define PFM_ARCH_PMC_STK_ARG 1 -+#endif -+ -+#ifndef PFM_ARCH_PMD_STK_ARG -+#define PFM_ARCH_PMD_STK_ARG 1 -+#endif -+ -+#define PFM_PMC_STK_ARG PFM_ARCH_PMC_STK_ARG -+#define PFM_PMD_STK_ARG PFM_ARCH_PMD_STK_ARG -+ -+#else /* !CONFIG_PERFMON */ -+ -+ -+/* -+ * perfmon hooks are nops when CONFIG_PERFMON is undefined -+ */ -+static inline void pfm_cpu_disable(void) -+{} -+ -+static inline void pfm_exit_thread(void) -+{} -+ -+static inline void pfm_handle_work(struct pt_regs *regs) -+{} -+ -+static inline void pfm_copy_thread(struct task_struct *t) -+{} -+ -+static inline void pfm_ctxsw_in(struct task_struct *p, struct task_struct *n) -+{} -+ -+static inline void pfm_ctxsw_out(struct task_struct *p, struct task_struct *n) -+{} -+ -+static inline void pfm_session_allcpus_release(void) -+{} -+ -+static inline int pfm_session_allcpus_acquire(void) -+{ -+ return 0; -+} -+ -+static inline void pfm_init_percpu(void) -+{} -+ -+#endif /* CONFIG_PERFMON */ -+ -+#endif /* __LINUX_PERFMON_KERN_H__ */ -diff --git a/include/linux/perfmon_pmu.h b/include/linux/perfmon_pmu.h -new file mode 100644 -index 0000000..3f5f9e8 ---- /dev/null -+++ b/include/linux/perfmon_pmu.h -@@ -0,0 +1,192 @@ -+/* -+ * Copyright (c) 2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * Interface for PMU description modules -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#ifndef __PERFMON_PMU_H__ -+#define __PERFMON_PMU_H__ 1 -+ -+/* -+ * generic information about a PMC or PMD register -+ * -+ * Dependency bitmasks: -+ * They are used to allow lazy save/restore in the context switch -+ * code. To avoid picking up stale configuration from a previous -+ * thread. Usng the bitmask, the generic read/write routines can -+ * ensure that all registers needed to support the measurement are -+ * restored properly on context switch in. -+ */ -+struct pfm_regmap_desc { -+ u16 type; /* role of the register */ -+ u16 reserved1; /* for future use */ -+ u32 reserved2; /* for future use */ -+ u64 dfl_val; /* power-on default value (quiescent) */ -+ u64 rsvd_msk; /* reserved bits: 1 means reserved */ -+ u64 no_emul64_msk; /* bits to clear for PFM_REGFL_NO_EMUL64 */ -+ unsigned long hw_addr; /* HW register address or index */ -+ struct kobject kobj; /* for internal use only */ -+ char *desc; /* HW register description string */ -+ u64 dep_pmcs[PFM_PMC_BV];/* depending PMC registers */ -+}; -+#define to_reg(n) container_of(n, struct pfm_regmap_desc, kobj) -+ -+/* -+ * pfm_reg_desc helper macros -+ */ -+#define PMC_D(t, d, v, r, n, h) \ -+ { .type = t, \ -+ .desc = d, \ -+ .dfl_val = v, \ -+ .rsvd_msk = r, \ -+ .no_emul64_msk = n, \ -+ .hw_addr = h \ -+ } -+ -+#define PMD_D(t, d, h) \ -+ { .type = t, \ -+ .desc = d, \ -+ .rsvd_msk = 0, \ -+ .no_emul64_msk = 0, \ -+ .hw_addr = h \ -+ } -+ -+#define PMD_DR(t, d, h, r) \ -+ { .type = t, \ -+ .desc = d, \ -+ .rsvd_msk = r, \ -+ .no_emul64_msk = 0, \ -+ .hw_addr = h \ -+ } -+ -+#define PMX_NA \ -+ { .type = PFM_REG_NA } -+ -+#define PMD_DP(t, d, h, p) \ -+ { .type = t, \ -+ .desc = d, \ -+ .rsvd_msk = 0, \ -+ .no_emul64_msk = 0, \ -+ .dep_pmcs[0] = p, \ -+ .hw_addr = h \ -+ } -+ -+/* -+ * type of a PMU register (16-bit bitmask) for use with pfm_reg_desc.type -+ */ -+#define PFM_REG_NA 0x00 /* not avail. (not impl.,no access) must be 0 */ -+#define PFM_REG_I 0x01 /* PMC/PMD: implemented */ -+#define PFM_REG_WC 0x02 /* PMC: has write_checker */ -+#define PFM_REG_C64 0x04 /* PMD: 64-bit virtualization */ -+#define PFM_REG_RO 0x08 /* PMD: read-only (writes ignored) */ -+#define PFM_REG_V 0x10 /* PMD: virtual reg */ -+#define PFM_REG_INTR 0x20 /* PMD: register can generate interrupt */ -+#define PFM_REG_SYS 0x40 /* PMC/PMD: register is for system-wide only */ -+#define PFM_REG_THR 0x80 /* PMC/PMD: register is for per-thread only */ -+#define PFM_REG_NO64 0x100 /* PMC: supports PFM_REGFL_NO_EMUL64 */ -+ -+/* -+ * define some shortcuts for common types -+ */ -+#define PFM_REG_W (PFM_REG_WC|PFM_REG_I) -+#define PFM_REG_W64 (PFM_REG_WC|PFM_REG_NO64|PFM_REG_I) -+#define PFM_REG_C (PFM_REG_C64|PFM_REG_INTR|PFM_REG_I) -+#define PFM_REG_I64 (PFM_REG_NO64|PFM_REG_I) -+#define PFM_REG_IRO (PFM_REG_I|PFM_REG_RO) -+ -+typedef int (*pfm_pmc_check_t)(struct pfm_context *ctx, -+ struct pfm_event_set *set, -+ struct pfarg_pmc *req); -+ -+typedef int (*pfm_pmd_check_t)(struct pfm_context *ctx, -+ struct pfm_event_set *set, -+ struct pfarg_pmd *req); -+ -+ -+typedef u64 (*pfm_sread_t)(struct pfm_context *ctx, unsigned int cnum); -+typedef void (*pfm_swrite_t)(struct pfm_context *ctx, unsigned int cnum, u64 val); -+ -+/* -+ * structure used by pmu description modules -+ * -+ * probe_pmu() routine return value: -+ * - 1 means recognized PMU -+ * - 0 means not recognized PMU -+ */ -+struct pfm_pmu_config { -+ char *pmu_name; /* PMU family name */ -+ char *version; /* config module version */ -+ -+ int counter_width; /* width of hardware counter */ -+ -+ struct pfm_regmap_desc *pmc_desc; /* PMC register descriptions */ -+ struct pfm_regmap_desc *pmd_desc; /* PMD register descriptions */ -+ -+ pfm_pmc_check_t pmc_write_check;/* write checker (optional) */ -+ pfm_pmd_check_t pmd_write_check;/* write checker (optional) */ -+ pfm_pmd_check_t pmd_read_check; /* read checker (optional) */ -+ -+ pfm_sread_t pmd_sread; /* virtual pmd read */ -+ pfm_swrite_t pmd_swrite; /* virtual pmd write */ -+ -+ int (*probe_pmu)(void);/* probe PMU routine */ -+ -+ u16 num_pmc_entries;/* #entries in pmc_desc */ -+ u16 num_pmd_entries;/* #entries in pmd_desc */ -+ -+ void *pmu_info; /* model-specific infos */ -+ u32 flags; /* set of flags */ -+ -+ struct module *owner; /* pointer to module struct */ -+ -+ /* -+ * fields computed internally, do not set in module -+ */ -+ struct pfm_regdesc regs_all; /* regs available to all */ -+ struct pfm_regdesc regs_thr; /* regs avail per-thread */ -+ struct pfm_regdesc regs_sys; /* regs avail system-wide */ -+ -+ u64 ovfl_mask; /* overflow mask */ -+}; -+ -+static inline void *pfm_pmu_info(void) -+{ -+ return pfm_pmu_conf->pmu_info; -+} -+ -+/* -+ * pfm_pmu_config flags -+ */ -+#define PFM_PMUFL_IS_BUILTIN 0x1 /* pmu config is compiled in */ -+ -+/* -+ * we need to know whether the PMU description is builtin or compiled -+ * as a module -+ */ -+#ifdef MODULE -+#define PFM_PMU_BUILTIN_FLAG 0 /* not built as a module */ -+#else -+#define PFM_PMU_BUILTIN_FLAG PFM_PMUFL_IS_BUILTIN /* built as a module */ -+#endif -+ -+int pfm_pmu_register(struct pfm_pmu_config *cfg); -+void pfm_pmu_unregister(struct pfm_pmu_config *cfg); -+ -+int pfm_sysfs_remove_pmu(struct pfm_pmu_config *pmu); -+int pfm_sysfs_add_pmu(struct pfm_pmu_config *pmu); -+ -+#endif /* __PERFMON_PMU_H__ */ -diff --git a/include/linux/sched.h b/include/linux/sched.h -index 3d9120c..8fb3b55 100644 ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -96,6 +96,7 @@ struct exec_domain; - struct futex_pi_state; - struct robust_list_head; - struct bio; -+struct pfm_context; - - /* - * List of flags we want to share for kernel threads, -@@ -1301,6 +1302,9 @@ struct task_struct { - int latency_record_count; - struct latency_record latency_record[LT_SAVECOUNT]; - #endif -+#ifdef CONFIG_PERFMON -+ struct pfm_context *pfm_context; -+#endif - }; - - /* -diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h -index d6ff145..e308523 100644 ---- a/include/linux/syscalls.h -+++ b/include/linux/syscalls.h -@@ -29,6 +29,13 @@ struct msqid_ds; - struct new_utsname; - struct nfsctl_arg; - struct __old_kernel_stat; -+struct pfarg_ctx; -+struct pfarg_pmc; -+struct pfarg_pmd; -+struct pfarg_start; -+struct pfarg_load; -+struct pfarg_setinfo; -+struct pfarg_setdesc; - struct pollfd; - struct rlimit; - struct rusage; -@@ -625,4 +632,27 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); - - int kernel_execve(const char *filename, char *const argv[], char *const envp[]); - -+asmlinkage long sys_pfm_create_context(struct pfarg_ctx __user *ureq, -+ void __user *uarg, size_t smpl_size); -+asmlinkage long sys_pfm_write_pmcs(int fd, struct pfarg_pmc __user *ureq, -+ int count); -+asmlinkage long sys_pfm_write_pmds(int fd, struct pfarg_pmd __user *ureq, -+ int count); -+asmlinkage long sys_pfm_read_pmds(int fd, struct pfarg_pmd __user *ureq, -+ int count); -+asmlinkage long sys_pfm_restart(int fd); -+asmlinkage long sys_pfm_stop(int fd); -+asmlinkage long sys_pfm_start(int fd, struct pfarg_start __user *ureq); -+asmlinkage long sys_pfm_load_context(int fd, struct pfarg_load __user *ureq); -+asmlinkage long sys_pfm_unload_context(int fd); -+asmlinkage long sys_pfm_delete_evtsets(int fd, -+ struct pfarg_setinfo __user *ureq, -+ int count); -+asmlinkage long sys_pfm_create_evtsets(int fd, -+ struct pfarg_setdesc __user *ureq, -+ int count); -+asmlinkage long sys_pfm_getinfo_evtsets(int fd, -+ struct pfarg_setinfo __user *ureq, -+ int count); -+ - #endif -diff --git a/kernel/sched.c b/kernel/sched.c -index ad1962d..1bc8fcf 100644 ---- a/kernel/sched.c -+++ b/kernel/sched.c -@@ -71,6 +71,7 @@ - #include - #include - #include -+#include - - #include - #include -diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c -index 08d6e1b..61f4155 100644 ---- a/kernel/sys_ni.c -+++ b/kernel/sys_ni.c -@@ -126,6 +126,19 @@ cond_syscall(sys_vm86); - cond_syscall(compat_sys_ipc); - cond_syscall(compat_sys_sysctl); - -+cond_syscall(sys_pfm_create_context); -+cond_syscall(sys_pfm_write_pmcs); -+cond_syscall(sys_pfm_write_pmds); -+cond_syscall(sys_pfm_read_pmds); -+cond_syscall(sys_pfm_restart); -+cond_syscall(sys_pfm_start); -+cond_syscall(sys_pfm_stop); -+cond_syscall(sys_pfm_load_context); -+cond_syscall(sys_pfm_unload_context); -+cond_syscall(sys_pfm_create_evtsets); -+cond_syscall(sys_pfm_delete_evtsets); -+cond_syscall(sys_pfm_getinfo_evtsets); -+ - /* arch-specific weak syscall entries */ - cond_syscall(sys_pciconfig_read); - cond_syscall(sys_pciconfig_write); -diff --git a/perfmon/Makefile b/perfmon/Makefile -new file mode 100644 -index 0000000..32ff037 ---- /dev/null -+++ b/perfmon/Makefile -@@ -0,0 +1,12 @@ -+# -+# Copyright (c) 2005-2006 Hewlett-Packard Development Company, L.P. -+# Contributed by Stephane Eranian -+# -+obj-y = perfmon_init.o perfmon_rw.o perfmon_res.o \ -+ perfmon_pmu.o perfmon_sysfs.o perfmon_syscalls.o \ -+ perfmon_file.o perfmon_ctxsw.o perfmon_intr.o \ -+ perfmon_dfl_smpl.o perfmon_sets.o perfmon_hotplug.o \ -+ perfmon_msg.o perfmon_smpl.o perfmon_attach.o \ -+ perfmon_activate.o perfmon_ctx.o perfmon_fmt.o -+ -+obj-$(CONFIG_PERFMON_DEBUG_FS) += perfmon_debugfs.o -diff --git a/perfmon/perfmon_activate.c b/perfmon/perfmon_activate.c -new file mode 100644 -index 0000000..d9f501d ---- /dev/null -+++ b/perfmon/perfmon_activate.c -@@ -0,0 +1,265 @@ -+/* -+ * perfmon_activate.c: perfmon2 start/stop functions -+ * -+ * This file implements the perfmon2 interface which -+ * provides access to the hardware performance counters -+ * of the host processor. -+ * -+ * -+ * The initial version of perfmon.c was written by -+ * Ganesh Venkitachalam, IBM Corp. -+ * -+ * Then it was modified for perfmon-1.x by Stephane Eranian and -+ * David Mosberger, Hewlett Packard Co. -+ * -+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x -+ * by Stephane Eranian, Hewlett Packard Co. -+ * -+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * David Mosberger-Tang -+ * -+ * More information about perfmon available at: -+ * http://perfmon2.sf.net -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include "perfmon_priv.h" -+ -+/** -+ * __pfm_start - activate monitoring -+ * @ctx: context to operate on -+ * @start: pfarg_start as passed by user -+ * -+ * When operating in per-thread mode and not self-monitoring, the monitored -+ * thread must be stopped. Activation will be effective next time the thread -+ * is context switched in. -+ * -+ * The pfarg_start argument is optional and may be used to designate -+ * the initial event set to activate. When not provided, the last active -+ * set is used. For the first activation, set0 is used when start is NULL. -+ * -+ * On some architectures, e.g., IA-64, it may be possible to start monitoring -+ * without calling this function under certain conditions (per-thread and self -+ * monitoring). In this case, either set0 or the last active set is used. -+ * -+ * the context is locked and interrupts are disabled. -+ */ -+int __pfm_start(struct pfm_context *ctx, struct pfarg_start *start) -+{ -+ struct task_struct *task, *owner_task; -+ struct pfm_event_set *new_set, *old_set; -+ int is_self; -+ -+ task = ctx->task; -+ -+ /* -+ * UNLOADED: error -+ * LOADED : normal start, nop if started unless set is different -+ * MASKED : nop or change set when unmasking -+ * ZOMBIE : cannot happen -+ */ -+ if (ctx->state == PFM_CTX_UNLOADED) -+ return -EINVAL; -+ -+ old_set = new_set = ctx->active_set; -+ -+ /* -+ * always the case for system-wide -+ */ -+ if (task == NULL) -+ task = current; -+ -+ is_self = task == current; -+ -+ /* -+ * argument is provided? -+ */ -+ if (start) { -+ /* -+ * find the set to load first -+ */ -+ new_set = pfm_find_set(ctx, start->start_set, 0); -+ if (new_set == NULL) { -+ PFM_DBG("event set%u does not exist", -+ start->start_set); -+ return -EINVAL; -+ } -+ } -+ -+ PFM_DBG("cur_set=%u req_set=%u", old_set->id, new_set->id); -+ -+ /* -+ * if we need to change the active set we need -+ * to check if we can access the PMU -+ */ -+ if (new_set != old_set) { -+ -+ owner_task = __get_cpu_var(pmu_owner); -+ /* -+ * system-wide: must run on the right CPU -+ * per-thread : must be the owner of the PMU context -+ * -+ * pfm_switch_sets() returns with monitoring stopped -+ */ -+ if (is_self) { -+ pfm_switch_sets(ctx, new_set, PFM_PMD_RESET_LONG, 1); -+ } else { -+ /* -+ * In a UP kernel, the PMU may contain the state -+ * of the task we want to operate on, yet the task -+ * may be switched out (lazy save). We need to save -+ * current state (old_set), switch active_set and -+ * mark it for reload. -+ */ -+ if (owner_task == task) -+ pfm_save_pmds(ctx, old_set); -+ ctx->active_set = new_set; -+ new_set->priv_flags |= PFM_SETFL_PRIV_MOD_BOTH; -+ } -+ } -+ -+ /* -+ * mark as started -+ * must be done before calling pfm_arch_start() -+ */ -+ ctx->flags.started = 1; -+ -+ pfm_arch_start(task, ctx); -+ -+ /* -+ * we check whether we had a pending ovfl before restarting. -+ * If so we need to regenerate the interrupt to make sure we -+ * keep recorded samples. For non-self monitoring this check -+ * is done in the pfm_ctxswin_thread() routine. -+ * -+ * we check new_set/old_set because pfm_switch_sets() already -+ * takes care of replaying the pending interrupts -+ */ -+ if (is_self && new_set != old_set && new_set->npend_ovfls) { -+ pfm_arch_resend_irq(ctx); -+ pfm_stats_inc(ovfl_intr_replay_count); -+ } -+ -+ /* -+ * always start with full timeout -+ */ -+ new_set->hrtimer_rem = new_set->hrtimer_exp; -+ -+ /* -+ * activate timeout for system-wide, self-montoring -+ * Always start with full timeout -+ * Timeout is at least one tick away, so no risk of -+ * having hrtimer_start() trying to wakeup softirqd -+ * and thus causing troubles. This cannot happen anmyway -+ * because cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ -+ */ -+ if (is_self && new_set->flags & PFM_SETFL_TIME_SWITCH) { -+ hrtimer_start(&__get_cpu_var(pfm_hrtimer), -+ new_set->hrtimer_rem, -+ HRTIMER_MODE_REL); -+ -+ PFM_DBG("set%u started timeout=%lld", -+ new_set->id, -+ (unsigned long long)new_set->hrtimer_rem.tv64); -+ } -+ -+ /* -+ * we restart total duration even if context was -+ * already started. In that case, counts are simply -+ * reset. -+ * -+ * For per-thread, if not self-monitoring, the statement -+ * below will have no effect because thread is stopped. -+ * The field is reset of ctxsw in. -+ */ -+ new_set->duration_start = sched_clock(); -+ -+ return 0; -+} -+ -+/** -+ * __pfm_stop - stop monitoring -+ * @ctx: context to operate on -+ * @release_info: infos for caller (see below) -+ * -+ * When operating in per-thread* mode and when not self-monitoring, -+ * the monitored thread must be stopped. -+ * -+ * the context is locked and interrupts are disabled. -+ * -+ * release_info value upon return: -+ * - bit 0 : unused -+ * - bit 1 : when set, must cancel hrtimer -+ */ -+int __pfm_stop(struct pfm_context *ctx, int *release_info) -+{ -+ struct pfm_event_set *set; -+ struct task_struct *task; -+ u64 now; -+ int state; -+ -+ *release_info = 0; -+ -+ now = sched_clock(); -+ state = ctx->state; -+ set = ctx->active_set; -+ -+ /* -+ * context must be attached (zombie cannot happen) -+ */ -+ if (state == PFM_CTX_UNLOADED) -+ return -EINVAL; -+ -+ task = ctx->task; -+ -+ PFM_DBG("ctx_task=[%d] ctx_state=%d is_system=%d", -+ task ? task->pid : -1, -+ state, -+ !task); -+ -+ /* -+ * this happens for system-wide context -+ */ -+ if (task == NULL) -+ task = current; -+ -+ /* -+ * compute elapsed time -+ * -+ * unless masked, compute elapsed duration, stop timeout -+ */ -+ if (task == current && state == PFM_CTX_LOADED) { -+ /* -+ * timeout cancel must be deferred until context is -+ * unlocked to avoid race with pfm_handle_switch_timeout() -+ */ -+ if (set->flags & PFM_SETFL_TIME_SWITCH) -+ *release_info |= 0x2; -+ -+ set->duration += now - set->duration_start; -+ } -+ -+ pfm_arch_stop(task, ctx); -+ -+ ctx->flags.started = 0; -+ /* -+ * starting now, in-flight PMU interrupt for this context -+ * are treated as spurious -+ */ -+ return 0; -+} -diff --git a/perfmon/perfmon_attach.c b/perfmon/perfmon_attach.c -new file mode 100644 -index 0000000..bbd1d1e ---- /dev/null -+++ b/perfmon/perfmon_attach.c -@@ -0,0 +1,474 @@ -+/* -+ * perfmon_attach.c: perfmon2 load/unload functions -+ * -+ * This file implements the perfmon2 interface which -+ * provides access to the hardware performance counters -+ * of the host processor. -+ * -+ * -+ * The initial version of perfmon.c was written by -+ * Ganesh Venkitachalam, IBM Corp. -+ * -+ * Then it was modified for perfmon-1.x by Stephane Eranian and -+ * David Mosberger, Hewlett Packard Co. -+ * -+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x -+ * by Stephane Eranian, Hewlett Packard Co. -+ * -+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * David Mosberger-Tang -+ * -+ * More information about perfmon available at: -+ * http://perfmon2.sf.net -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+#include "perfmon_priv.h" -+ -+/** -+ * __pfm_load_context_sys - attach context to a CPU in system-wide mode -+ * @ctx: context to operate on -+ * @set_id: set to activate first -+ * @cpu: CPU to monitor -+ * -+ * The cpu specified in the pfarg_load.load_pid argument must be the current -+ * CPU. -+ * -+ * The function must be called with the context locked and interrupts disabled. -+ */ -+static int pfm_load_ctx_sys(struct pfm_context *ctx, u16 set_id, u32 cpu) -+{ -+ struct pfm_event_set *set; -+ int mycpu; -+ int ret; -+ -+ mycpu = smp_processor_id(); -+ -+ /* -+ * system-wide: check we are running on the desired CPU -+ */ -+ if (cpu != mycpu) { -+ PFM_DBG("wrong CPU: asking %u but on %u", cpu, mycpu); -+ return -EINVAL; -+ } -+ -+ /* -+ * initialize sets -+ */ -+ set = pfm_prepare_sets(ctx, set_id); -+ if (!set) { -+ PFM_DBG("event set%u does not exist", set_id); -+ return -EINVAL; -+ } -+ -+ PFM_DBG("set=%u set_flags=0x%x", set->id, set->flags); -+ -+ ctx->cpu = mycpu; -+ ctx->task = NULL; -+ ctx->active_set = set; -+ -+ /* -+ * perform any architecture specific actions -+ */ -+ ret = pfm_arch_load_context(ctx); -+ if (ret) -+ goto error_noload; -+ -+ /* -+ * now reserve the session, before we can proceed with -+ * actually accessing the PMU hardware -+ */ -+ ret = pfm_session_acquire(1, mycpu); -+ if (ret) -+ goto error; -+ -+ -+ /* -+ * caller must be on monitored CPU to access PMU, thus this is -+ * a form of self-monitoring -+ */ -+ ctx->flags.is_self = 1; -+ -+ set->runs++; -+ -+ /* -+ * load PMD from set -+ * load PMC from set -+ */ -+ pfm_arch_restore_pmds(ctx, set); -+ pfm_arch_restore_pmcs(ctx, set); -+ -+ /* -+ * set new ownership -+ */ -+ pfm_set_pmu_owner(NULL, ctx); -+ -+ /* -+ * reset pending work -+ */ -+ ctx->flags.work_type = PFM_WORK_NONE; -+ ctx->flags.reset_count = 0; -+ -+ /* -+ * reset message queue -+ */ -+ ctx->msgq_head = ctx->msgq_tail = 0; -+ -+ ctx->state = PFM_CTX_LOADED; -+ -+ return 0; -+error: -+ pfm_arch_unload_context(ctx); -+error_noload: -+ return ret; -+} -+ -+/** -+ * __pfm_load_context_thread - attach context to a thread -+ * @ctx: context to operate on -+ * @set_id: first set -+ * @task: threadf to attach to -+ * -+ * The function must be called with the context locked and interrupts disabled. -+ */ -+static int pfm_load_ctx_thread(struct pfm_context *ctx, u16 set_id, -+ struct task_struct *task) -+{ -+ struct pfm_event_set *set; -+ struct pfm_context *old; -+ int ret; -+ -+ PFM_DBG("load_pid=%d set=%u", task->pid, set_id); -+ /* -+ * per-thread: -+ * - task to attach to is checked in sys_pfm_load_context() to avoid -+ * locking issues. if found, and not self, task refcount was -+ * incremented. -+ */ -+ old = cmpxchg(&task->pfm_context, NULL, ctx); -+ if (old) { -+ PFM_DBG("load_pid=%d has a context " -+ "old=%p new=%p cur=%p", -+ task->pid, -+ old, -+ ctx, -+ task->pfm_context); -+ return -EEXIST; -+ } -+ -+ /* -+ * initialize sets -+ */ -+ set = pfm_prepare_sets(ctx, set_id); -+ if (!set) { -+ PFM_DBG("event set%u does not exist", set_id); -+ return -EINVAL; -+ } -+ -+ -+ ctx->task = task; -+ ctx->cpu = -1; -+ ctx->active_set = set; -+ -+ /* -+ * perform any architecture specific actions -+ */ -+ ret = pfm_arch_load_context(ctx); -+ if (ret) -+ goto error_noload; -+ -+ /* -+ * now reserve the session, before we can proceed with -+ * actually accessing the PMU hardware -+ */ -+ ret = pfm_session_acquire(0, -1); -+ if (ret) -+ goto error; -+ -+ -+ set->runs++; -+ if (ctx->task != current) { -+ -+ ctx->flags.is_self = 0; -+ -+ /* force a full reload */ -+ ctx->last_act = PFM_INVALID_ACTIVATION; -+ ctx->last_cpu = -1; -+ set->priv_flags |= PFM_SETFL_PRIV_MOD_BOTH; -+ -+ } else { -+ pfm_check_save_prev_ctx(); -+ -+ ctx->last_cpu = smp_processor_id(); -+ __get_cpu_var(pmu_activation_number)++; -+ ctx->last_act = __get_cpu_var(pmu_activation_number); -+ -+ ctx->flags.is_self = 1; -+ -+ /* -+ * load PMD from set -+ * load PMC from set -+ */ -+ pfm_arch_restore_pmds(ctx, set); -+ pfm_arch_restore_pmcs(ctx, set); -+ -+ /* -+ * set new ownership -+ */ -+ pfm_set_pmu_owner(ctx->task, ctx); -+ } -+ set_tsk_thread_flag(task, TIF_PERFMON_CTXSW); -+ -+ /* -+ * reset pending work -+ */ -+ ctx->flags.work_type = PFM_WORK_NONE; -+ ctx->flags.reset_count = 0; -+ -+ /* -+ * reset message queue -+ */ -+ ctx->msgq_head = ctx->msgq_tail = 0; -+ -+ ctx->state = PFM_CTX_LOADED; -+ -+ return 0; -+ -+error: -+ pfm_arch_unload_context(ctx); -+ ctx->task = NULL; -+error_noload: -+ /* -+ * detach context -+ */ -+ task->pfm_context = NULL; -+ return ret; -+} -+ -+/** -+ * __pfm_load_context - attach context to a CPU or thread -+ * @ctx: context to operate on -+ * @load: pfarg_load as passed by user -+ * @task: thread to attach to, NULL for system-wide -+ */ -+int __pfm_load_context(struct pfm_context *ctx, struct pfarg_load *load, -+ struct task_struct *task) -+{ -+ if (ctx->flags.system) -+ return pfm_load_ctx_sys(ctx, load->load_set, load->load_pid); -+ return pfm_load_ctx_thread(ctx, load->load_set, task); -+} -+ -+/** -+ * pfm_update_ovfl_pmds - account for pending ovfls on PMDs -+ * @ctx: context to operate on -+ * -+ * This function is always called after pfm_stop has been issued -+ */ -+static void pfm_update_ovfl_pmds(struct pfm_context *ctx) -+{ -+ struct pfm_event_set *set; -+ u64 *cnt_pmds; -+ u64 ovfl_mask; -+ u16 num_ovfls, i, first; -+ -+ ovfl_mask = pfm_pmu_conf->ovfl_mask; -+ first = ctx->regs.first_intr_pmd; -+ cnt_pmds = ctx->regs.cnt_pmds; -+ -+ /* -+ * look for pending interrupts and adjust PMD values accordingly -+ */ -+ list_for_each_entry(set, &ctx->set_list, list) { -+ -+ if (!set->npend_ovfls) -+ continue; -+ -+ num_ovfls = set->npend_ovfls; -+ PFM_DBG("set%u nintrs=%u", set->id, num_ovfls); -+ -+ for (i = first; num_ovfls; i++) { -+ if (test_bit(i, cast_ulp(set->povfl_pmds))) { -+ /* only correct value for counters */ -+ if (test_bit(i, cast_ulp(cnt_pmds))) -+ set->pmds[i].value += 1 + ovfl_mask; -+ num_ovfls--; -+ } -+ PFM_DBG("pmd%u set=%u val=0x%llx", -+ i, -+ set->id, -+ (unsigned long long)set->pmds[i].value); -+ } -+ /* -+ * we need to clear to prevent a pfm_getinfo_evtsets() from -+ * returning stale data even after the context is unloaded -+ */ -+ set->npend_ovfls = 0; -+ bitmap_zero(cast_ulp(set->povfl_pmds), ctx->regs.max_intr_pmd); -+ } -+} -+ -+ -+/** -+ * __pfm_unload_context - detach context from CPU or thread -+ * @ctx: context to operate on -+ * @release_info: pointer to return info (see below) -+ * -+ * The function must be called with the context locked and interrupts disabled. -+ * -+ * release_info value upon return: -+ * - bit 0: when set, must free context -+ * - bit 1: when set, must cancel hrtimer -+ */ -+int __pfm_unload_context(struct pfm_context *ctx, int *release_info) -+{ -+ struct task_struct *task; -+ int ret; -+ -+ PFM_DBG("ctx_state=%d task [%d]", -+ ctx->state, -+ ctx->task ? ctx->task->pid : -1); -+ -+ *release_info = 0; -+ -+ /* -+ * unload only when necessary -+ */ -+ if (ctx->state == PFM_CTX_UNLOADED) -+ return 0; -+ -+ task = ctx->task; -+ -+ /* -+ * stop monitoring -+ */ -+ ret = __pfm_stop(ctx, release_info); -+ if (ret) -+ return ret; -+ -+ ctx->state = PFM_CTX_UNLOADED; -+ ctx->flags.can_restart = 0; -+ -+ /* -+ * save active set -+ * UP: -+ * if not current task and due to lazy, state may -+ * still be live -+ * for system-wide, guaranteed to run on correct CPU -+ */ -+ if (__get_cpu_var(pmu_ctx) == ctx) { -+ /* -+ * pending overflows have been saved by pfm_stop() -+ */ -+ pfm_save_pmds(ctx, ctx->active_set); -+ pfm_set_pmu_owner(NULL, NULL); -+ PFM_DBG("released ownership"); -+ } -+ -+ /* -+ * account for pending overflows -+ */ -+ pfm_update_ovfl_pmds(ctx); -+ -+ /* -+ * arch-specific unload operations -+ */ -+ pfm_arch_unload_context(ctx); -+ -+ /* -+ * per-thread: disconnect from monitored task -+ */ -+ if (task) { -+ task->pfm_context = NULL; -+ ctx->task = NULL; -+ clear_tsk_thread_flag(task, TIF_PERFMON_CTXSW); -+ clear_tsk_thread_flag(task, TIF_PERFMON_WORK); -+ pfm_arch_disarm_handle_work(task); -+ } -+ /* -+ * session can be freed, must have interrupts enabled -+ * thus we release in the caller. Bit 0 signals to the -+ * caller that the session can be released. -+ */ -+ *release_info |= 0x1; -+ -+ return 0; -+} -+ -+/** -+ * __pfm_exit_thread - detach and free context on thread exit -+ */ -+void __pfm_exit_thread(void) -+{ -+ struct pfm_context *ctx; -+ unsigned long flags; -+ int free_ok = 0, release_info = 0; -+ int ret; -+ -+ ctx = current->pfm_context; -+ -+ BUG_ON(ctx->flags.system); -+ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ PFM_DBG("state=%d is_self=%d", ctx->state, ctx->flags.is_self); -+ -+ /* -+ * __pfm_unload_context() cannot fail -+ * in the context states we are interested in -+ */ -+ switch (ctx->state) { -+ case PFM_CTX_LOADED: -+ case PFM_CTX_MASKED: -+ __pfm_unload_context(ctx, &release_info); -+ /* -+ * end notification only sent for non -+ * self-monitoring context -+ */ -+ if (!ctx->flags.is_self) -+ pfm_end_notify(ctx); -+ break; -+ case PFM_CTX_ZOMBIE: -+ __pfm_unload_context(ctx, &release_info); -+ free_ok = 1; -+ break; -+ default: -+ BUG_ON(ctx->state != PFM_CTX_LOADED); -+ break; -+ } -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ /* -+ * cancel timer now that context is unlocked -+ */ -+ if (release_info & 0x2) { -+ ret = hrtimer_cancel(&__get_cpu_var(pfm_hrtimer)); -+ PFM_DBG("timeout cancel=%d", ret); -+ } -+ -+ if (release_info & 0x1) -+ pfm_session_release(0, 0); -+ -+ /* -+ * All memory free operations (especially for vmalloc'ed memory) -+ * MUST be done with interrupts ENABLED. -+ */ -+ if (free_ok) -+ pfm_free_context(ctx); -+} -diff --git a/perfmon/perfmon_ctx.c b/perfmon/perfmon_ctx.c -new file mode 100644 -index 0000000..afe6078 ---- /dev/null -+++ b/perfmon/perfmon_ctx.c -@@ -0,0 +1,314 @@ -+/* -+ * perfmon_ctx.c: perfmon2 context functions -+ * -+ * This file implements the perfmon2 interface which -+ * provides access to the hardware performance counters -+ * of the host processor. -+ * -+ * -+ * The initial version of perfmon.c was written by -+ * Ganesh Venkitachalam, IBM Corp. -+ * -+ * Then it was modified for perfmon-1.x by Stephane Eranian and -+ * David Mosberger, Hewlett Packard Co. -+ * -+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x -+ * by Stephane Eranian, Hewlett Packard Co. -+ * -+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * David Mosberger-Tang -+ * -+ * More information about perfmon available at: -+ * http://perfmon2.sf.net -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+#include "perfmon_priv.h" -+ -+/* -+ * context memory pool pointer -+ */ -+static struct kmem_cache *pfm_ctx_cachep; -+ -+/** -+ * pfm_free_context - de-allocate context and associated resources -+ * @ctx: context to free -+ */ -+void pfm_free_context(struct pfm_context *ctx) -+{ -+ pfm_arch_context_free(ctx); -+ -+ pfm_free_sets(ctx); -+ -+ pfm_smpl_buf_free(ctx); -+ -+ PFM_DBG("free ctx @0x%p", ctx); -+ kmem_cache_free(pfm_ctx_cachep, ctx); -+ /* -+ * decrease refcount on: -+ * - PMU description table -+ * - sampling format -+ */ -+ pfm_pmu_conf_put(); -+ pfm_pmu_release(); -+} -+ -+/** -+ * pfm_ctx_flags_sane - check if context flags passed by user are okay -+ * @ctx_flags: flags passed user on pfm_create_context -+ * -+ * return: -+ * 0 if successful -+ * <0 and error code otherwise -+ */ -+static inline int pfm_ctx_flags_sane(u32 ctx_flags) -+{ -+ if (ctx_flags & PFM_FL_SYSTEM_WIDE) { -+ if (ctx_flags & PFM_FL_NOTIFY_BLOCK) { -+ PFM_DBG("cannot use blocking mode in syswide mode"); -+ return -EINVAL; -+ } -+ } -+ return 0; -+} -+ -+/** -+ * pfm_ctx_permissions - check authorization to create new context -+ * @ctx_flags: context flags passed by user -+ * -+ * check for permissions to create a context. -+ * -+ * A sysadmin may decide to restrict creation of per-thread -+ * and/or system-wide context to a group of users using the -+ * group id via /sys/kernel/perfmon/task_group and -+ * /sys/kernel/perfmon/sys_group. -+ * -+ * Once we identify a user level package which can be used -+ * to grant/revoke Linux capabilites at login via PAM, we will -+ * be able to use capabilities. We would also need to increase -+ * the size of cap_t to support more than 32 capabilities (it -+ * is currently defined as u32 and 32 capabilities are alrady -+ * defined). -+ */ -+static inline int pfm_ctx_permissions(u32 ctx_flags) -+{ -+ if ((ctx_flags & PFM_FL_SYSTEM_WIDE) -+ && pfm_controls.sys_group != PFM_GROUP_PERM_ANY -+ && !in_group_p(pfm_controls.sys_group)) { -+ PFM_DBG("user group not allowed to create a syswide ctx"); -+ return -EPERM; -+ } else if (pfm_controls.task_group != PFM_GROUP_PERM_ANY -+ && !in_group_p(pfm_controls.task_group)) { -+ PFM_DBG("user group not allowed to create a task context"); -+ return -EPERM; -+ } -+ return 0; -+} -+ -+/** -+ * __pfm_create_context - allocate and initialize a perfmon context -+ * @req : pfarg_ctx from user -+ * @fmt : pointer sampling format, NULL if not used -+ * @fmt_arg: pointer to argument to sampling format, NULL if not used -+ * @mode: PFM_NORMAL or PFM_COMPAT(IA-64 v2.0 compatibility) -+ * @ctx : address of new context upon succesful return, undefined otherwise -+ * -+ * function used to allocate a new context. A context is allocated along -+ * with the default event set. If a sampling format is used, the buffer -+ * may be allocated and initialized. -+ * -+ * The file descriptor identifying the context is allocated and returned -+ * to caller. -+ * -+ * This function operates with no locks and interrupts are enabled. -+ * return: -+ * >=0: the file descriptor to identify the context -+ * <0 : the error code -+ */ -+int __pfm_create_context(struct pfarg_ctx *req, -+ struct pfm_smpl_fmt *fmt, -+ void *fmt_arg, -+ int mode, -+ struct pfm_context **new_ctx) -+{ -+ struct pfm_context *ctx; -+ struct file *filp = NULL; -+ u32 ctx_flags; -+ int fd = 0, ret; -+ -+ ctx_flags = req->ctx_flags; -+ -+ /* Increase refcount on PMU description */ -+ ret = pfm_pmu_conf_get(1); -+ if (ret < 0) -+ goto error_conf; -+ -+ ret = pfm_ctx_flags_sane(ctx_flags); -+ if (ret < 0) -+ goto error_alloc; -+ -+ ret = pfm_ctx_permissions(ctx_flags); -+ if (ret < 0) -+ goto error_alloc; -+ -+ /* -+ * we can use GFP_KERNEL and potentially sleep because we do -+ * not hold any lock at this point. -+ */ -+ might_sleep(); -+ ret = -ENOMEM; -+ ctx = kmem_cache_zalloc(pfm_ctx_cachep, GFP_KERNEL); -+ if (!ctx) -+ goto error_alloc; -+ -+ PFM_DBG("alloc ctx @0x%p", ctx); -+ -+ INIT_LIST_HEAD(&ctx->set_list); -+ spin_lock_init(&ctx->lock); -+ init_completion(&ctx->restart_complete); -+ init_waitqueue_head(&ctx->msgq_wait); -+ -+ /* -+ * context is unloaded -+ */ -+ ctx->state = PFM_CTX_UNLOADED; -+ -+ /* -+ * initialization of context's flags -+ * must be done before pfm_find_set() -+ */ -+ ctx->flags.block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0; -+ ctx->flags.system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0; -+ ctx->flags.no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0; -+ ctx->flags.ia64_v20_compat = mode == PFM_COMPAT ? 1 : 0; -+ -+ ret = pfm_pmu_acquire(ctx); -+ if (ret) -+ goto error_file; -+ /* -+ * check if PMU is usable -+ */ -+ if (!(ctx->regs.num_pmcs && ctx->regs.num_pmcs)) { -+ PFM_DBG("no usable PMU registers"); -+ ret = -EBUSY; -+ goto error_file; -+ } -+ -+ /* -+ * link to format, must be done first for correct -+ * error handling in pfm_context_free() -+ */ -+ ctx->smpl_fmt = fmt; -+ -+ ret = -ENFILE; -+ fd = pfm_alloc_fd(&filp); -+ if (fd < 0) -+ goto error_file; -+ -+ /* -+ * initialize arch-specific section -+ * must be done before fmt_init() -+ */ -+ ret = pfm_arch_context_create(ctx, ctx_flags); -+ if (ret) -+ goto error_set; -+ -+ ret = -ENOMEM; -+ -+ /* -+ * add initial set -+ */ -+ if (pfm_create_initial_set(ctx)) -+ goto error_set; -+ -+ /* -+ * does the user want to sample? -+ * must be done after pfm_pmu_acquire() because -+ * needs ctx->regs -+ */ -+ if (fmt) { -+ ret = pfm_setup_smpl_fmt(ctx, ctx_flags, fmt_arg, filp); -+ if (ret) -+ goto error_set; -+ } -+ -+ filp->private_data = ctx; -+ -+ ctx->last_act = PFM_INVALID_ACTIVATION; -+ ctx->last_cpu = -1; -+ -+ /* -+ * initialize notification message queue -+ */ -+ ctx->msgq_head = ctx->msgq_tail = 0; -+ -+ PFM_DBG("flags=0x%x system=%d notify_block=%d no_msg=%d" -+ " use_fmt=%d ctx_fd=%d mode=%d", -+ ctx_flags, -+ ctx->flags.system, -+ ctx->flags.block, -+ ctx->flags.no_msg, -+ !!fmt, -+ fd, mode); -+ -+ if (new_ctx) -+ *new_ctx = ctx; -+ -+ /* -+ * we defer the fd_install until we are certain the call succeeded -+ * to ensure we do not have to undo its effect. Neither put_filp() -+ * nor put_unused_fd() undoes the effect of fd_install(). -+ */ -+ fd_install(fd, filp); -+ -+ return fd; -+ -+error_set: -+ put_filp(filp); -+ put_unused_fd(fd); -+error_file: -+ /* -+ * calls the right *_put() functions -+ * calls pfm_release_pmu() -+ */ -+ pfm_free_context(ctx); -+ return ret; -+error_alloc: -+ pfm_pmu_conf_put(); -+error_conf: -+ pfm_smpl_fmt_put(fmt); -+ return ret; -+} -+ -+/** -+ * pfm_init_ctx -- initialize context SLAB -+ * -+ * called from pfm_init -+ */ -+int __init pfm_init_ctx(void) -+{ -+ pfm_ctx_cachep = kmem_cache_create("pfm_context", -+ sizeof(struct pfm_context)+PFM_ARCH_CTX_SIZE, -+ SLAB_HWCACHE_ALIGN, 0, NULL); -+ if (!pfm_ctx_cachep) { -+ PFM_ERR("cannot initialize context slab"); -+ return -ENOMEM; -+ } -+ return 0; -+} -diff --git a/perfmon/perfmon_ctxsw.c b/perfmon/perfmon_ctxsw.c -new file mode 100644 -index 0000000..9a28d13 ---- /dev/null -+++ b/perfmon/perfmon_ctxsw.c -@@ -0,0 +1,342 @@ -+/* -+ * perfmon_cxtsw.c: perfmon2 context switch code -+ * -+ * This file implements the perfmon2 interface which -+ * provides access to the hardware performance counters -+ * of the host processor. -+ * -+ * The initial version of perfmon.c was written by -+ * Ganesh Venkitachalam, IBM Corp. -+ * -+ * Then it was modified for perfmon-1.x by Stephane Eranian and -+ * David Mosberger, Hewlett Packard Co. -+ * -+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x -+ * by Stephane Eranian, Hewlett Packard Co. -+ * -+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * David Mosberger-Tang -+ * -+ * More information about perfmon available at: -+ * http://perfmon2.sf.net -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include "perfmon_priv.h" -+ -+void pfm_save_pmds(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ u64 val, ovfl_mask; -+ u64 *used_pmds, *cnt_pmds; -+ u16 i, num; -+ -+ ovfl_mask = pfm_pmu_conf->ovfl_mask; -+ num = set->nused_pmds; -+ cnt_pmds = ctx->regs.cnt_pmds; -+ used_pmds = set->used_pmds; -+ -+ /* -+ * save HW PMD, for counters, reconstruct 64-bit value -+ */ -+ for (i = 0; num; i++) { -+ if (test_bit(i, cast_ulp(used_pmds))) { -+ val = pfm_read_pmd(ctx, i); -+ if (likely(test_bit(i, cast_ulp(cnt_pmds)))) -+ val = (set->pmds[i].value & ~ovfl_mask) | -+ (val & ovfl_mask); -+ set->pmds[i].value = val; -+ num--; -+ } -+ } -+ pfm_arch_clear_pmd_ovfl_cond(ctx, set); -+} -+ -+/* -+ * interrupts are disabled (no preemption) -+ */ -+void __pfm_ctxswin_thread(struct task_struct *task, -+ struct pfm_context *ctx, u64 now) -+{ -+ u64 cur_act; -+ struct pfm_event_set *set; -+ int reload_pmcs, reload_pmds; -+ int mycpu, is_active; -+ -+ mycpu = smp_processor_id(); -+ -+ cur_act = __get_cpu_var(pmu_activation_number); -+ /* -+ * we need to lock context because it could be accessed -+ * from another CPU. Normally the schedule() functions -+ * has masked interrupts which should be enough to -+ * protect against PMU interrupts. -+ */ -+ spin_lock(&ctx->lock); -+ -+ is_active = pfm_arch_is_active(ctx); -+ -+ set = ctx->active_set; -+ -+ /* -+ * in case fo zombie, we do not complete ctswin of the -+ * PMU, and we force a call to pfm_handle_work() to finish -+ * cleanup, i.e., free context + smpl_buff. The reason for -+ * deferring to pfm_handle_work() is that it is not possible -+ * to vfree() with interrupts disabled. -+ */ -+ if (unlikely(ctx->state == PFM_CTX_ZOMBIE)) { -+ pfm_post_work(task, ctx, PFM_WORK_ZOMBIE); -+ goto done; -+ } -+ -+ /* -+ * if we were the last user of the PMU on that CPU, -+ * then nothing to do except restore psr -+ */ -+ if (ctx->last_cpu == mycpu && ctx->last_act == cur_act) { -+ /* -+ * check for forced reload conditions -+ */ -+ reload_pmcs = set->priv_flags & PFM_SETFL_PRIV_MOD_PMCS; -+ reload_pmds = set->priv_flags & PFM_SETFL_PRIV_MOD_PMDS; -+ } else { -+#ifndef CONFIG_SMP -+ pfm_check_save_prev_ctx(); -+#endif -+ reload_pmcs = 1; -+ reload_pmds = 1; -+ } -+ /* consumed */ -+ set->priv_flags &= ~PFM_SETFL_PRIV_MOD_BOTH; -+ -+ if (reload_pmds) -+ pfm_arch_restore_pmds(ctx, set); -+ -+ /* -+ * need to check if had in-flight interrupt in -+ * pfm_ctxswout_thread(). If at least one bit set, then we must replay -+ * the interrupt to avoid losing some important performance data. -+ * -+ * npend_ovfls is cleared in interrupt handler -+ */ -+ if (set->npend_ovfls) { -+ pfm_arch_resend_irq(ctx); -+ pfm_stats_inc(ovfl_intr_replay_count); -+ } -+ -+ if (reload_pmcs) -+ pfm_arch_restore_pmcs(ctx, set); -+ -+ /* -+ * record current activation for this context -+ */ -+ __get_cpu_var(pmu_activation_number)++; -+ ctx->last_cpu = mycpu; -+ ctx->last_act = __get_cpu_var(pmu_activation_number); -+ -+ /* -+ * establish new ownership. -+ */ -+ pfm_set_pmu_owner(task, ctx); -+ -+ pfm_arch_ctxswin_thread(task, ctx); -+ /* -+ * set->duration does not count when context in MASKED state. -+ * set->duration_start is reset in unmask_monitoring() -+ */ -+ set->duration_start = now; -+ -+ /* -+ * re-arm switch timeout, if necessary -+ * Timeout is active only if monitoring is active, -+ * i.e., LOADED + started -+ * -+ * We reload the remainder timeout or the full timeout. -+ * Remainder is recorded on context switch out or in -+ * pfm_load_context() -+ */ -+ if (ctx->state == PFM_CTX_LOADED -+ && (set->flags & PFM_SETFL_TIME_SWITCH) && is_active) { -+ pfm_restart_timer(ctx, set); -+ /* careful here as pfm_restart_timer may switch sets */ -+ } -+done: -+ spin_unlock(&ctx->lock); -+} -+ -+/* -+ * interrupts are masked, runqueue lock is held. -+ * -+ * In UP. we simply stop monitoring and leave the state -+ * in place, i.e., lazy save -+ */ -+void __pfm_ctxswout_thread(struct task_struct *task, -+ struct pfm_context *ctx, u64 now) -+{ -+ struct pfm_event_set *set; -+ int need_save_pmds, is_active; -+ -+ /* -+ * we need to lock context because it could be accessed -+ * from another CPU. Normally the schedule() functions -+ * has masked interrupts which should be enough to -+ * protect against PMU interrupts. -+ */ -+ -+ spin_lock(&ctx->lock); -+ -+ is_active = pfm_arch_is_active(ctx); -+ set = ctx->active_set; -+ -+ /* -+ * stop monitoring and -+ * collect pending overflow information -+ * needed on ctxswin. We cannot afford to lose -+ * a PMU interrupt. -+ */ -+ need_save_pmds = pfm_arch_ctxswout_thread(task, ctx); -+ -+ if (ctx->state == PFM_CTX_LOADED) { -+ /* -+ * accumulate only when set is actively monitoring, -+ */ -+ set->duration += now - set->duration_start; -+ -+ /* -+ * record remaining timeout -+ * reload in pfm_ctxsw_in() -+ */ -+ if (is_active && (set->flags & PFM_SETFL_TIME_SWITCH)) { -+ struct hrtimer *h = NULL; -+ h = &__get_cpu_var(pfm_hrtimer); -+ hrtimer_cancel(h); -+ set->hrtimer_rem = hrtimer_get_remaining(h); -+ PFM_DBG_ovfl("hrtimer=%lld", -+ (long long)set->hrtimer_rem.tv64); -+ } -+ } -+ -+#ifdef CONFIG_SMP -+ /* -+ * in SMP, release ownership of this PMU. -+ * PMU interrupts are masked, so nothing -+ * can happen. -+ */ -+ pfm_set_pmu_owner(NULL, NULL); -+ -+ /* -+ * On some architectures, it is necessary to read the -+ * PMD registers to check for pending overflow in -+ * pfm_arch_ctxswout_thread(). In that case, saving of -+ * the PMDs may be done there and not here. -+ */ -+ if (need_save_pmds) -+ pfm_save_pmds(ctx, set); -+#endif -+ spin_unlock(&ctx->lock); -+} -+ -+/* -+ * -+ */ -+static void __pfm_ctxswout_sys(struct task_struct *prev, -+ struct task_struct *next) -+{ -+ struct pfm_context *ctx; -+ -+ ctx = __get_cpu_var(pmu_ctx); -+ BUG_ON(!ctx); -+ -+ /* -+ * propagate TIF_PERFMON_CTXSW to ensure that: -+ * - previous task has TIF_PERFMON_CTXSW cleared, in case it is -+ * scheduled onto another CPU where there is syswide monitoring -+ * - next task has TIF_PERFMON_CTXSW set to ensure it will come back -+ * here when context switched out -+ */ -+ clear_tsk_thread_flag(prev, TIF_PERFMON_CTXSW); -+ set_tsk_thread_flag(next, TIF_PERFMON_CTXSW); -+ -+ /* -+ * nothing to do until actually started -+ * XXX: assumes no mean to start from user level -+ */ -+ if (!ctx->flags.started) -+ return; -+ -+ pfm_arch_ctxswout_sys(prev, ctx); -+} -+ -+/* -+ * -+ */ -+static void __pfm_ctxswin_sys(struct task_struct *prev, -+ struct task_struct *next) -+{ -+ struct pfm_context *ctx; -+ -+ ctx = __get_cpu_var(pmu_ctx); -+ BUG_ON(!ctx); -+ -+ /* -+ * nothing to do until actually started -+ * XXX: assumes no mean to start from user level -+ */ -+ if (!ctx->flags.started) -+ return; -+ -+ pfm_arch_ctxswin_sys(next, ctx); -+} -+ -+void pfm_ctxsw_out(struct task_struct *prev, -+ struct task_struct *next) -+{ -+ struct pfm_context *ctxp; -+ u64 now; -+ -+ now = sched_clock(); -+ -+ ctxp = prev->pfm_context; -+ -+ if (ctxp) -+ __pfm_ctxswout_thread(prev, ctxp, now); -+ else -+ __pfm_ctxswout_sys(prev, next); -+ -+ pfm_stats_inc(ctxswout_count); -+ pfm_stats_add(ctxswout_ns, sched_clock() - now); -+} -+ -+void pfm_ctxsw_in(struct task_struct *prev, -+ struct task_struct *next) -+{ -+ struct pfm_context *ctxn; -+ u64 now; -+ -+ now = sched_clock(); -+ -+ ctxn = next->pfm_context; -+ -+ if (ctxn) -+ __pfm_ctxswin_thread(next, ctxn, now); -+ else -+ __pfm_ctxswin_sys(prev, next); -+ -+ pfm_stats_inc(ctxswin_count); -+ pfm_stats_add(ctxswin_ns, sched_clock() - now); -+} -diff --git a/perfmon/perfmon_debugfs.c b/perfmon/perfmon_debugfs.c -new file mode 100644 -index 0000000..e4d2fad ---- /dev/null -+++ b/perfmon/perfmon_debugfs.c -@@ -0,0 +1,168 @@ -+/* -+ * perfmon_debugfs.c: perfmon2 statistics interface to debugfs -+ * -+ * This file implements the perfmon2 interface which -+ * provides access to the hardware performance counters -+ * of the host processor. -+ * -+ * The initial version of perfmon.c was written by -+ * Ganesh Venkitachalam, IBM Corp. -+ * -+ * Then it was modified for perfmon-1.x by Stephane Eranian and -+ * David Mosberger, Hewlett Packard Co. -+ * -+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x -+ * by Stephane Eranian, Hewlett Packard Co. -+ * -+ * Copyright (c) 2007 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * More information about perfmon available at: -+ * http://perfmon2.sf.net -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+ -+/* -+ * to make the statistics visible to user space: -+ * $ mount -t debugfs none /mnt -+ * $ cd /mnt/perfmon -+ * then choose a CPU subdir -+ */ -+DECLARE_PER_CPU(struct pfm_stats, pfm_stats); -+ -+static struct dentry *pfm_debugfs_dir; -+ -+void pfm_reset_stats(int cpu) -+{ -+ struct pfm_stats *st; -+ unsigned long flags; -+ -+ st = &per_cpu(pfm_stats, cpu); -+ -+ local_irq_save(flags); -+ memset(st->v, 0, sizeof(st->v)); -+ local_irq_restore(flags); -+} -+ -+static const char *pfm_stats_strs[] = { -+ "ovfl_intr_all_count", -+ "ovfl_intr_ns", -+ "ovfl_intr_spurious_count", -+ "ovfl_intr_replay_count", -+ "ovfl_intr_regular_count", -+ "handle_work_count", -+ "ovfl_notify_count", -+ "reset_pmds_count", -+ "pfm_restart_count", -+ "fmt_handler_calls", -+ "fmt_handler_ns", -+ "set_switch_count", -+ "set_switch_ns", -+ "set_switch_exp", -+ "ctxswin_count", -+ "ctxswin_ns", -+ "handle_timeout_count", -+ "ovfl_intr_nmi_count", -+ "ctxswout_count", -+ "ctxswout_ns", -+}; -+#define PFM_NUM_STRS ARRAY_SIZE(pfm_stats_strs) -+ -+void pfm_debugfs_del_cpu(int cpu) -+{ -+ struct pfm_stats *st; -+ int i; -+ -+ st = &per_cpu(pfm_stats, cpu); -+ -+ for (i = 0; i < PFM_NUM_STATS; i++) { -+ if (st->dirs[i]) -+ debugfs_remove(st->dirs[i]); -+ st->dirs[i] = NULL; -+ } -+ if (st->cpu_dir) -+ debugfs_remove(st->cpu_dir); -+ st->cpu_dir = NULL; -+} -+ -+int pfm_debugfs_add_cpu(int cpu) -+{ -+ struct pfm_stats *st; -+ int i; -+ -+ /* -+ * sanity check between stats names and the number -+ * of entries in the pfm_stats value array. -+ */ -+ if (PFM_NUM_STRS != PFM_NUM_STATS) { -+ PFM_ERR("PFM_NUM_STRS != PFM_NUM_STATS error"); -+ return -1; -+ } -+ -+ st = &per_cpu(pfm_stats, cpu); -+ sprintf(st->cpu_name, "cpu%d", cpu); -+ -+ st->cpu_dir = debugfs_create_dir(st->cpu_name, pfm_debugfs_dir); -+ if (!st->cpu_dir) -+ return -1; -+ -+ for (i = 0; i < PFM_NUM_STATS; i++) { -+ st->dirs[i] = debugfs_create_u64(pfm_stats_strs[i], -+ S_IRUGO, -+ st->cpu_dir, -+ &st->v[i]); -+ if (!st->dirs[i]) -+ goto error; -+ } -+ pfm_reset_stats(cpu); -+ return 0; -+error: -+ while (i >= 0) { -+ debugfs_remove(st->dirs[i]); -+ i--; -+ } -+ debugfs_remove(st->cpu_dir); -+ return -1; -+} -+ -+/* -+ * called once from pfm_init() -+ */ -+int __init pfm_init_debugfs(void) -+{ -+ int cpu1, cpu2, ret; -+ -+ pfm_debugfs_dir = debugfs_create_dir("perfmon", NULL); -+ if (!pfm_debugfs_dir) -+ return -1; -+ -+ for_each_online_cpu(cpu1) { -+ ret = pfm_debugfs_add_cpu(cpu1); -+ if (ret) -+ goto error; -+ } -+ return 0; -+error: -+ for_each_online_cpu(cpu2) { -+ if (cpu2 == cpu1) -+ break; -+ pfm_debugfs_del_cpu(cpu2); -+ } -+ return -1; -+} -diff --git a/perfmon/perfmon_dfl_smpl.c b/perfmon/perfmon_dfl_smpl.c -new file mode 100644 -index 0000000..8c83489 ---- /dev/null -+++ b/perfmon/perfmon_dfl_smpl.c -@@ -0,0 +1,298 @@ -+/* -+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This file implements the new default sampling buffer format -+ * for the perfmon2 subsystem. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+MODULE_AUTHOR("Stephane Eranian "); -+MODULE_DESCRIPTION("new perfmon default sampling format"); -+MODULE_LICENSE("GPL"); -+ -+static int pfm_dfl_fmt_validate(u32 ctx_flags, u16 npmds, void *data) -+{ -+ struct pfm_dfl_smpl_arg *arg = data; -+ u64 min_buf_size; -+ -+ if (data == NULL) { -+ PFM_DBG("no argument passed"); -+ return -EINVAL; -+ } -+ -+ /* -+ * sanity check in case size_t is smaller then u64 -+ */ -+#if BITS_PER_LONG == 4 -+#define MAX_SIZE_T (1ULL<<(sizeof(size_t)<<3)) -+ if (sizeof(size_t) < sizeof(arg->buf_size)) { -+ if (arg->buf_size >= MAX_SIZE_T) -+ return -ETOOBIG; -+ } -+#endif -+ -+ /* -+ * compute min buf size. npmds is the maximum number -+ * of implemented PMD registers. -+ */ -+ min_buf_size = sizeof(struct pfm_dfl_smpl_hdr) -+ + (sizeof(struct pfm_dfl_smpl_entry) + (npmds*sizeof(u64))); -+ -+ PFM_DBG("validate ctx_flags=0x%x flags=0x%x npmds=%u " -+ "min_buf_size=%llu buf_size=%llu\n", -+ ctx_flags, -+ arg->buf_flags, -+ npmds, -+ (unsigned long long)min_buf_size, -+ (unsigned long long)arg->buf_size); -+ -+ /* -+ * must hold at least the buffer header + one minimally sized entry -+ */ -+ if (arg->buf_size < min_buf_size) -+ return -EINVAL; -+ -+ return 0; -+} -+ -+static int pfm_dfl_fmt_get_size(u32 flags, void *data, size_t *size) -+{ -+ struct pfm_dfl_smpl_arg *arg = data; -+ -+ /* -+ * size has been validated in default_validate -+ * we can never loose bits from buf_size. -+ */ -+ *size = (size_t)arg->buf_size; -+ -+ return 0; -+} -+ -+static int pfm_dfl_fmt_init(struct pfm_context *ctx, void *buf, u32 ctx_flags, -+ u16 npmds, void *data) -+{ -+ struct pfm_dfl_smpl_hdr *hdr; -+ struct pfm_dfl_smpl_arg *arg = data; -+ -+ hdr = buf; -+ -+ hdr->hdr_version = PFM_DFL_SMPL_VERSION; -+ hdr->hdr_buf_size = arg->buf_size; -+ hdr->hdr_buf_flags = arg->buf_flags; -+ hdr->hdr_cur_offs = sizeof(*hdr); -+ hdr->hdr_overflows = 0; -+ hdr->hdr_count = 0; -+ hdr->hdr_min_buf_space = sizeof(struct pfm_dfl_smpl_entry) + (npmds*sizeof(u64)); -+ /* -+ * due to cache aliasing, it may be necessary to flush the cache -+ * on certain architectures (e.g., MIPS) -+ */ -+ pfm_cacheflush(hdr, sizeof(*hdr)); -+ -+ PFM_DBG("buffer=%p buf_size=%llu hdr_size=%zu hdr_version=%u.%u " -+ "min_space=%llu npmds=%u", -+ buf, -+ (unsigned long long)hdr->hdr_buf_size, -+ sizeof(*hdr), -+ PFM_VERSION_MAJOR(hdr->hdr_version), -+ PFM_VERSION_MINOR(hdr->hdr_version), -+ (unsigned long long)hdr->hdr_min_buf_space, -+ npmds); -+ -+ return 0; -+} -+ -+/* -+ * called from pfm_overflow_handler() to record a new sample -+ * -+ * context is locked, interrupts are disabled (no preemption) -+ */ -+static int pfm_dfl_fmt_handler(struct pfm_context *ctx, -+ unsigned long ip, u64 tstamp, void *data) -+{ -+ struct pfm_dfl_smpl_hdr *hdr; -+ struct pfm_dfl_smpl_entry *ent; -+ struct pfm_ovfl_arg *arg; -+ void *cur, *last; -+ u64 *e; -+ size_t entry_size, min_size; -+ u16 npmds, i; -+ u16 ovfl_pmd; -+ void *buf; -+ -+ hdr = ctx->smpl_addr; -+ arg = &ctx->ovfl_arg; -+ -+ buf = hdr; -+ cur = buf+hdr->hdr_cur_offs; -+ last = buf+hdr->hdr_buf_size; -+ ovfl_pmd = arg->ovfl_pmd; -+ min_size = hdr->hdr_min_buf_space; -+ -+ /* -+ * precheck for sanity -+ */ -+ if ((last - cur) < min_size) -+ goto full; -+ -+ npmds = arg->num_smpl_pmds; -+ -+ ent = (struct pfm_dfl_smpl_entry *)cur; -+ -+ entry_size = sizeof(*ent) + (npmds << 3); -+ -+ /* position for first pmd */ -+ e = (u64 *)(ent+1); -+ -+ hdr->hdr_count++; -+ -+ PFM_DBG_ovfl("count=%llu cur=%p last=%p free_bytes=%zu ovfl_pmd=%d " -+ "npmds=%u", -+ (unsigned long long)hdr->hdr_count, -+ cur, last, -+ (last-cur), -+ ovfl_pmd, -+ npmds); -+ -+ /* -+ * current = task running at the time of the overflow. -+ * -+ * per-task mode: -+ * - this is usually the task being monitored. -+ * Under certain conditions, it might be a different task -+ * -+ * system-wide: -+ * - this is not necessarily the task controlling the session -+ */ -+ ent->pid = current->pid; -+ ent->ovfl_pmd = ovfl_pmd; -+ ent->last_reset_val = arg->pmd_last_reset; -+ -+ /* -+ * where did the fault happen (includes slot number) -+ */ -+ ent->ip = ip; -+ -+ ent->tstamp = tstamp; -+ ent->cpu = smp_processor_id(); -+ ent->set = arg->active_set; -+ ent->tgid = current->tgid; -+ -+ /* -+ * selectively store PMDs in increasing index number -+ */ -+ if (npmds) { -+ u64 *val = arg->smpl_pmds_values; -+ for (i = 0; i < npmds; i++) -+ *e++ = *val++; -+ } -+ -+ /* -+ * update position for next entry -+ */ -+ hdr->hdr_cur_offs += entry_size; -+ cur += entry_size; -+ -+ pfm_cacheflush(hdr, sizeof(*hdr)); -+ pfm_cacheflush(ent, entry_size); -+ -+ /* -+ * post check to avoid losing the last sample -+ */ -+ if ((last - cur) < min_size) -+ goto full; -+ -+ /* reset before returning from interrupt handler */ -+ arg->ovfl_ctrl = PFM_OVFL_CTRL_RESET; -+ -+ return 0; -+full: -+ PFM_DBG_ovfl("sampling buffer full free=%zu, count=%llu", -+ last-cur, -+ (unsigned long long)hdr->hdr_count); -+ -+ /* -+ * increment number of buffer overflows. -+ * important to detect duplicate set of samples. -+ */ -+ hdr->hdr_overflows++; -+ -+ /* -+ * request notification and masking of monitoring. -+ * Notification is still subject to the overflowed -+ * register having the FL_NOTIFY flag set. -+ */ -+ arg->ovfl_ctrl = PFM_OVFL_CTRL_NOTIFY | PFM_OVFL_CTRL_MASK; -+ -+ return -ENOBUFS; /* we are full, sorry */ -+} -+ -+static int pfm_dfl_fmt_restart(int is_active, u32 *ovfl_ctrl, void *buf) -+{ -+ struct pfm_dfl_smpl_hdr *hdr; -+ -+ hdr = buf; -+ -+ hdr->hdr_count = 0; -+ hdr->hdr_cur_offs = sizeof(*hdr); -+ -+ pfm_cacheflush(hdr, sizeof(*hdr)); -+ -+ *ovfl_ctrl = PFM_OVFL_CTRL_RESET; -+ -+ return 0; -+} -+ -+static int pfm_dfl_fmt_exit(void *buf) -+{ -+ return 0; -+} -+ -+static struct pfm_smpl_fmt dfl_fmt = { -+ .fmt_name = "default", -+ .fmt_version = 0x10000, -+ .fmt_arg_size = sizeof(struct pfm_dfl_smpl_arg), -+ .fmt_validate = pfm_dfl_fmt_validate, -+ .fmt_getsize = pfm_dfl_fmt_get_size, -+ .fmt_init = pfm_dfl_fmt_init, -+ .fmt_handler = pfm_dfl_fmt_handler, -+ .fmt_restart = pfm_dfl_fmt_restart, -+ .fmt_exit = pfm_dfl_fmt_exit, -+ .fmt_flags = PFM_FMT_BUILTIN_FLAG, -+ .owner = THIS_MODULE -+}; -+ -+static int pfm_dfl_fmt_init_module(void) -+{ -+ return pfm_fmt_register(&dfl_fmt); -+} -+ -+static void pfm_dfl_fmt_cleanup_module(void) -+{ -+ pfm_fmt_unregister(&dfl_fmt); -+} -+ -+module_init(pfm_dfl_fmt_init_module); -+module_exit(pfm_dfl_fmt_cleanup_module); -diff --git a/perfmon/perfmon_file.c b/perfmon/perfmon_file.c -new file mode 100644 -index 0000000..1cde81b ---- /dev/null -+++ b/perfmon/perfmon_file.c -@@ -0,0 +1,751 @@ -+/* -+ * perfmon_file.c: perfmon2 file input/output functions -+ * -+ * This file implements the perfmon2 interface which -+ * provides access to the hardware performance counters -+ * of the host processor. -+ * -+ * The initial version of perfmon.c was written by -+ * Ganesh Venkitachalam, IBM Corp. -+ * -+ * Then it was modified for perfmon-1.x by Stephane Eranian and -+ * David Mosberger, Hewlett Packard Co. -+ * -+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x -+ * by Stephane Eranian, Hewlett Packard Co. -+ * -+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * David Mosberger-Tang -+ * -+ * More information about perfmon available at: -+ * http://perfmon2.sf.net -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "perfmon_priv.h" -+ -+#define PFMFS_MAGIC 0xa0b4d889 /* perfmon filesystem magic number */ -+ -+struct pfm_controls pfm_controls = { -+ .sys_group = PFM_GROUP_PERM_ANY, -+ .task_group = PFM_GROUP_PERM_ANY, -+ .arg_mem_max = PAGE_SIZE, -+ .smpl_buffer_mem_max = ~0, -+}; -+EXPORT_SYMBOL(pfm_controls); -+ -+static int __init enable_debug(char *str) -+{ -+ pfm_controls.debug = 1; -+ PFM_INFO("debug output enabled\n"); -+ return 1; -+} -+__setup("perfmon_debug", enable_debug); -+ -+static int pfmfs_delete_dentry(struct dentry *dentry) -+{ -+ return 1; -+} -+ -+static struct dentry_operations pfmfs_dentry_operations = { -+ .d_delete = pfmfs_delete_dentry, -+}; -+ -+int pfm_buf_map_pagefault(struct vm_area_struct *vma, struct vm_fault *vmf) -+{ -+ void *kaddr; -+ unsigned long address; -+ struct pfm_context *ctx; -+ size_t size; -+ -+ address = (unsigned long)vmf->virtual_address; -+ -+ ctx = vma->vm_private_data; -+ if (ctx == NULL) { -+ PFM_DBG("no ctx"); -+ return VM_FAULT_SIGBUS; -+ } -+ /* -+ * size available to user (maybe different from real_smpl_size -+ */ -+ size = ctx->smpl_size; -+ -+ if ((address < vma->vm_start) || -+ (address >= (vma->vm_start + size))) -+ return VM_FAULT_SIGBUS; -+ -+ kaddr = ctx->smpl_addr + (address - vma->vm_start); -+ -+ vmf->page = vmalloc_to_page(kaddr); -+ get_page(vmf->page); -+ -+ PFM_DBG("[%d] start=%p ref_count=%d", -+ current->pid, -+ kaddr, page_count(vmf->page)); -+ -+ return 0; -+} -+ -+/* -+ * we need to determine whther or not we are closing the last reference -+ * to the file and thus are going to end up in pfm_close() which eventually -+ * calls pfm_release_buf_space(). In that function, we update the accouting -+ * for locked_vm given that we are actually freeing the sampling buffer. The -+ * issue is that there are multiple paths leading to pfm_release_buf_space(), -+ * from exit(), munmap(), close(). The path coming from munmap() is problematic -+ * becuse do_munmap() grabs mmap_sem in write-mode which is also what -+ * pfm_release_buf_space does. To avoid deadlock, we need to determine where -+ * we are calling from and skip the locking. The vm_ops->close() callback -+ * is invoked for each remove_vma() independently of the number of references -+ * left on the file descriptor, therefore simple reference counter does not -+ * work. We need to determine if this is the last call, and then set a flag -+ * to skip the locking. -+ */ -+static void pfm_buf_map_close(struct vm_area_struct *vma) -+{ -+ struct file *file; -+ struct pfm_context *ctx; -+ -+ file = vma->vm_file; -+ ctx = vma->vm_private_data; -+ -+ /* -+ * if file is going to close, then pfm_close() will -+ * be called, do not lock in pfm_release_buf -+ */ -+ if (atomic_read(&file->f_count) == 1) -+ ctx->flags.mmap_nlock = 1; -+} -+ -+/* -+ * we do not have a close callback because, the locked -+ * memory accounting must be done when the actual buffer -+ * is freed. Munmap does not free the page backing the vma -+ * because they may still be in use by the PMU interrupt handler. -+ */ -+struct vm_operations_struct pfm_buf_map_vm_ops = { -+ .fault = pfm_buf_map_pagefault, -+ .close = pfm_buf_map_close -+}; -+ -+static int pfm_mmap_buffer(struct pfm_context *ctx, struct vm_area_struct *vma, -+ size_t size) -+{ -+ if (ctx->smpl_addr == NULL) { -+ PFM_DBG("no sampling buffer to map"); -+ return -EINVAL; -+ } -+ -+ if (size > ctx->smpl_size) { -+ PFM_DBG("mmap size=%zu >= actual buf size=%zu", -+ size, -+ ctx->smpl_size); -+ return -EINVAL; -+ } -+ -+ vma->vm_ops = &pfm_buf_map_vm_ops; -+ vma->vm_private_data = ctx; -+ -+ return 0; -+} -+ -+static int pfm_mmap(struct file *file, struct vm_area_struct *vma) -+{ -+ size_t size; -+ struct pfm_context *ctx; -+ unsigned long flags; -+ int ret; -+ -+ PFM_DBG("pfm_file_ops"); -+ -+ ctx = file->private_data; -+ size = (vma->vm_end - vma->vm_start); -+ -+ if (ctx == NULL) -+ return -EINVAL; -+ -+ ret = -EINVAL; -+ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ if (vma->vm_flags & VM_WRITE) { -+ PFM_DBG("cannot map buffer for writing"); -+ goto done; -+ } -+ -+ PFM_DBG("vm_pgoff=%lu size=%zu vm_start=0x%lx", -+ vma->vm_pgoff, -+ size, -+ vma->vm_start); -+ -+ ret = pfm_mmap_buffer(ctx, vma, size); -+ if (ret == 0) -+ vma->vm_flags |= VM_RESERVED; -+ -+ PFM_DBG("ret=%d vma_flags=0x%lx vma_start=0x%lx vma_size=%lu", -+ ret, -+ vma->vm_flags, -+ vma->vm_start, -+ vma->vm_end-vma->vm_start); -+done: -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ return ret; -+} -+ -+/* -+ * Extract one message from queue. -+ * -+ * return: -+ * -EAGAIN: when non-blocking and nothing is* in the queue. -+ * -ERESTARTSYS: when blocking and signal is pending -+ * Otherwise returns size of message (sizeof(pfarg_msg)) -+ */ -+ssize_t __pfm_read(struct pfm_context *ctx, union pfarg_msg *msg_buf, int non_block) -+{ -+ ssize_t ret = 0; -+ unsigned long flags; -+ DECLARE_WAITQUEUE(wait, current); -+ -+ /* -+ * we must masks interrupts to avoid a race condition -+ * with the PMU interrupt handler. -+ */ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ while (pfm_msgq_is_empty(ctx)) { -+ -+ /* -+ * handle non-blocking reads -+ * return -EAGAIN -+ */ -+ ret = -EAGAIN; -+ if (non_block) -+ break; -+ -+ add_wait_queue(&ctx->msgq_wait, &wait); -+ set_current_state(TASK_INTERRUPTIBLE); -+ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ schedule(); -+ -+ /* -+ * during this window, another thread may call -+ * pfm_read() and steal our message -+ */ -+ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ remove_wait_queue(&ctx->msgq_wait, &wait); -+ set_current_state(TASK_RUNNING); -+ -+ /* -+ * check for pending signals -+ * return -ERESTARTSYS -+ */ -+ ret = -ERESTARTSYS; -+ if (signal_pending(current)) -+ break; -+ -+ /* -+ * we may have a message -+ */ -+ ret = 0; -+ } -+ -+ /* -+ * extract message -+ */ -+ if (ret == 0) { -+ /* -+ * copy the oldest message into msg_buf. -+ * We cannot directly call copy_to_user() -+ * because interrupts masked. This is done -+ * in the caller -+ */ -+ pfm_get_next_msg(ctx, msg_buf); -+ -+ ret = sizeof(*msg_buf); -+ -+ PFM_DBG("extracted type=%d", msg_buf->type); -+ } -+ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ PFM_DBG("blocking=%d ret=%zd", non_block, ret); -+ -+ return ret; -+} -+ -+static ssize_t pfm_read(struct file *filp, char __user *buf, size_t size, -+ loff_t *ppos) -+{ -+ struct pfm_context *ctx; -+ union pfarg_msg msg_buf; -+ int non_block, ret; -+ -+ PFM_DBG_ovfl("buf=%p size=%zu", buf, size); -+ -+ ctx = filp->private_data; -+ if (ctx == NULL) { -+ PFM_ERR("no ctx for pfm_read"); -+ return -EINVAL; -+ } -+ -+ non_block = filp->f_flags & O_NONBLOCK; -+ -+#ifdef CONFIG_IA64_PERFMON_COMPAT -+ /* -+ * detect IA-64 v2.0 context read (message size is different) -+ * nops on all other architectures -+ */ -+ if (unlikely(ctx->flags.ia64_v20_compat)) -+ return pfm_arch_compat_read(ctx, buf, non_block, size); -+#endif -+ /* -+ * cannot extract partial messages. -+ * check even when there is no message -+ * -+ * cannot extract more than one message per call. Bytes -+ * above sizeof(msg) are ignored. -+ */ -+ if (size < sizeof(msg_buf)) { -+ PFM_DBG("message is too small size=%zu must be >=%zu)", -+ size, -+ sizeof(msg_buf)); -+ return -EINVAL; -+ } -+ -+ ret = __pfm_read(ctx, &msg_buf, non_block); -+ if (ret > 0) { -+ if (copy_to_user(buf, &msg_buf, sizeof(msg_buf))) -+ ret = -EFAULT; -+ } -+ PFM_DBG_ovfl("ret=%d", ret); -+ return ret; -+} -+ -+static ssize_t pfm_write(struct file *file, const char __user *ubuf, -+ size_t size, loff_t *ppos) -+{ -+ PFM_DBG("pfm_write called"); -+ return -EINVAL; -+} -+ -+static unsigned int pfm_poll(struct file *filp, poll_table *wait) -+{ -+ struct pfm_context *ctx; -+ unsigned long flags; -+ unsigned int mask = 0; -+ -+ PFM_DBG("pfm_file_ops"); -+ -+ if (filp->f_op != &pfm_file_ops) { -+ PFM_ERR("pfm_poll bad magic"); -+ return 0; -+ } -+ -+ ctx = filp->private_data; -+ if (ctx == NULL) { -+ PFM_ERR("pfm_poll no ctx"); -+ return 0; -+ } -+ -+ PFM_DBG("before poll_wait"); -+ -+ poll_wait(filp, &ctx->msgq_wait, wait); -+ -+ /* -+ * pfm_msgq_is_empty() is non-atomic -+ * -+ * filp is protected by fget() at upper level -+ * context cannot be closed by another thread. -+ * -+ * There may be a race with a PMU interrupt adding -+ * messages to the queue. But we are interested in -+ * queue not empty, so adding more messages should -+ * not really be a problem. -+ * -+ * There may be a race with another thread issuing -+ * a read() and stealing messages from the queue thus -+ * may return the wrong answer. This could potentially -+ * lead to a blocking read, because nothing is -+ * available in the queue -+ */ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ if (!pfm_msgq_is_empty(ctx)) -+ mask = POLLIN | POLLRDNORM; -+ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ PFM_DBG("after poll_wait mask=0x%x", mask); -+ -+ return mask; -+} -+ -+static int pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd, -+ unsigned long arg) -+{ -+ PFM_DBG("pfm_ioctl called"); -+ return -EINVAL; -+} -+ -+/* -+ * interrupt cannot be masked when entering this function -+ */ -+static inline int __pfm_fasync(int fd, struct file *filp, -+ struct pfm_context *ctx, int on) -+{ -+ int ret; -+ -+ PFM_DBG("in fd=%d on=%d async_q=%p", -+ fd, -+ on, -+ ctx->async_queue); -+ -+ ret = fasync_helper(fd, filp, on, &ctx->async_queue); -+ -+ PFM_DBG("out fd=%d on=%d async_q=%p ret=%d", -+ fd, -+ on, -+ ctx->async_queue, ret); -+ -+ return ret; -+} -+ -+static int pfm_fasync(int fd, struct file *filp, int on) -+{ -+ struct pfm_context *ctx; -+ int ret; -+ -+ PFM_DBG("pfm_file_ops"); -+ -+ ctx = filp->private_data; -+ if (ctx == NULL) { -+ PFM_ERR("pfm_fasync no ctx"); -+ return -EBADF; -+ } -+ -+ /* -+ * we cannot mask interrupts during this call because this may -+ * may go to sleep if memory is not readily avalaible. -+ * -+ * We are protected from the context disappearing by the -+ * get_fd()/put_fd() done in caller. Serialization of this function -+ * is ensured by caller. -+ */ -+ ret = __pfm_fasync(fd, filp, ctx, on); -+ -+ PFM_DBG("pfm_fasync called on fd=%d on=%d async_queue=%p ret=%d", -+ fd, -+ on, -+ ctx->async_queue, ret); -+ -+ return ret; -+} -+ -+#ifdef CONFIG_SMP -+static void __pfm_close_remote_cpu(void *info) -+{ -+ struct pfm_context *ctx = info; -+ int can_release; -+ -+ BUG_ON(ctx != __get_cpu_var(pmu_ctx)); -+ -+ /* -+ * we are in IPI interrupt handler which has always higher -+ * priority than PMU interrupt, therefore we do not need to -+ * mask interrupts. context locking is not needed because we -+ * are in close(), no more user references. -+ * -+ * can_release is ignored, release done on calling CPU -+ */ -+ __pfm_unload_context(ctx, &can_release); -+ -+ /* -+ * we cannot free context here because we are in_interrupt(). -+ * we free on the calling CPU -+ */ -+} -+ -+static int pfm_close_remote_cpu(u32 cpu, struct pfm_context *ctx) -+{ -+ BUG_ON(irqs_disabled()); -+ return smp_call_function_single(cpu, __pfm_close_remote_cpu, ctx, 1); -+} -+#endif /* CONFIG_SMP */ -+ -+/* -+ * called either on explicit close() or from exit_files(). -+ * Only the LAST user of the file gets to this point, i.e., it is -+ * called only ONCE. -+ * -+ * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero -+ * (fput()),i.e, last task to access the file. Nobody else can access the -+ * file at this point. -+ * -+ * When called from exit_files(), the VMA has been freed because exit_mm() -+ * is executed before exit_files(). -+ * -+ * When called from exit_files(), the current task is not yet ZOMBIE but we -+ * flush the PMU state to the context. -+ */ -+int __pfm_close(struct pfm_context *ctx, struct file *filp) -+{ -+ unsigned long flags; -+ int state; -+ int can_free = 1, can_unload = 1; -+ int is_system, can_release = 0; -+ u32 cpu; -+ -+ /* -+ * no risk of ctx of filp disappearing so we can operate outside -+ * of spin_lock(). fasync_helper() runs with interrupts masked, -+ * thus there is no risk with the PMU interrupt handler -+ * -+ * In case of zombie, we will not have the async struct anymore -+ * thus kill_fasync() will not do anything -+ * -+ * fd is not used when removing the entry so we pass -1 -+ */ -+ if (filp->f_flags & FASYNC) -+ __pfm_fasync (-1, filp, ctx, 0); -+ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ state = ctx->state; -+ is_system = ctx->flags.system; -+ cpu = ctx->cpu; -+ -+ PFM_DBG("state=%d", state); -+ -+ /* -+ * check if unload is needed -+ */ -+ if (state == PFM_CTX_UNLOADED) -+ goto doit; -+ -+#ifdef CONFIG_SMP -+ /* -+ * we need to release the resource on the ORIGINAL cpu. -+ * we need to release the context lock to avoid deadlocks -+ * on the original CPU, especially in the context switch -+ * routines. It is safe to unlock because we are in close(), -+ * in other words, there is no more access from user level. -+ * we can also unmask interrupts on this CPU because the -+ * context is running on the original CPU. Context will be -+ * unloaded and the session will be released on the original -+ * CPU. Upon return, the caller is guaranteed that the context -+ * is gone from original CPU. -+ */ -+ if (is_system && cpu != smp_processor_id()) { -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ pfm_close_remote_cpu(cpu, ctx); -+ can_release = 1; -+ goto free_it; -+ } -+ -+ if (!is_system && ctx->task != current) { -+ /* -+ * switch context to zombie state -+ */ -+ ctx->state = PFM_CTX_ZOMBIE; -+ -+ PFM_DBG("zombie ctx for [%d]", ctx->task->pid); -+ /* -+ * must check if other thread is using block overflow -+ * notification mode. If so make sure it will not block -+ * because there will not be any pfm_restart() issued. -+ * When the thread notices the ZOMBIE state, it will clean -+ * up what is left of the context -+ */ -+ if (state == PFM_CTX_MASKED && ctx->flags.block) { -+ /* -+ * force task to wake up from MASKED state -+ */ -+ PFM_DBG("waking up [%d]", ctx->task->pid); -+ -+ complete(&ctx->restart_complete); -+ } -+ /* -+ * PMU session will be release by monitored task when it notices -+ * ZOMBIE state as part of pfm_unload_context() -+ */ -+ can_unload = can_free = 0; -+ } -+#endif -+ if (can_unload) -+ __pfm_unload_context(ctx, &can_release); -+doit: -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+#ifdef CONFIG_SMP -+free_it: -+#endif -+ if (can_release) -+ pfm_session_release(is_system, cpu); -+ -+ if (can_free) -+ pfm_free_context(ctx); -+ -+ return 0; -+} -+ -+static int pfm_close(struct inode *inode, struct file *filp) -+{ -+ struct pfm_context *ctx; -+ -+ PFM_DBG("called filp=%p", filp); -+ -+ ctx = filp->private_data; -+ if (ctx == NULL) { -+ PFM_ERR("no ctx"); -+ return -EBADF; -+ } -+ return __pfm_close(ctx, filp); -+} -+ -+static int pfm_no_open(struct inode *irrelevant, struct file *dontcare) -+{ -+ PFM_DBG("pfm_file_ops"); -+ -+ return -ENXIO; -+} -+ -+ -+const struct file_operations pfm_file_ops = { -+ .llseek = no_llseek, -+ .read = pfm_read, -+ .write = pfm_write, -+ .poll = pfm_poll, -+ .ioctl = pfm_ioctl, -+ .open = pfm_no_open, /* special open to disallow open via /proc */ -+ .fasync = pfm_fasync, -+ .release = pfm_close, -+ .mmap = pfm_mmap -+}; -+ -+static int pfmfs_get_sb(struct file_system_type *fs_type, -+ int flags, const char *dev_name, -+ void *data, struct vfsmount *mnt) -+{ -+ return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt); -+} -+ -+static struct file_system_type pfm_fs_type = { -+ .name = "pfmfs", -+ .get_sb = pfmfs_get_sb, -+ .kill_sb = kill_anon_super, -+}; -+ -+/* -+ * pfmfs should _never_ be mounted by userland - too much of security hassle, -+ * no real gain from having the whole whorehouse mounted. So we don't need -+ * any operations on the root directory. However, we need a non-trivial -+ * d_name - pfm: will go nicely and kill the special-casing in procfs. -+ */ -+static struct vfsmount *pfmfs_mnt; -+ -+int __init pfm_init_fs(void) -+{ -+ int err = register_filesystem(&pfm_fs_type); -+ if (!err) { -+ pfmfs_mnt = kern_mount(&pfm_fs_type); -+ err = PTR_ERR(pfmfs_mnt); -+ if (IS_ERR(pfmfs_mnt)) -+ unregister_filesystem(&pfm_fs_type); -+ else -+ err = 0; -+ } -+ return err; -+} -+ -+int pfm_alloc_fd(struct file **cfile) -+{ -+ int fd, ret = 0; -+ struct file *file = NULL; -+ struct inode * inode; -+ char name[32]; -+ struct qstr this; -+ -+ fd = get_unused_fd(); -+ if (fd < 0) -+ return -ENFILE; -+ -+ ret = -ENFILE; -+ -+ file = get_empty_filp(); -+ if (!file) -+ goto out; -+ -+ /* -+ * allocate a new inode -+ */ -+ inode = new_inode(pfmfs_mnt->mnt_sb); -+ if (!inode) -+ goto out; -+ -+ PFM_DBG("new inode ino=%ld @%p", inode->i_ino, inode); -+ -+ inode->i_sb = pfmfs_mnt->mnt_sb; -+ inode->i_mode = S_IFCHR|S_IRUGO; -+ inode->i_uid = current->fsuid; -+ inode->i_gid = current->fsgid; -+ -+ sprintf(name, "[%lu]", inode->i_ino); -+ this.name = name; -+ this.hash = inode->i_ino; -+ this.len = strlen(name); -+ -+ ret = -ENOMEM; -+ -+ /* -+ * allocate a new dcache entry -+ */ -+ file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this); -+ if (!file->f_dentry) -+ goto out; -+ -+ file->f_dentry->d_op = &pfmfs_dentry_operations; -+ -+ d_add(file->f_dentry, inode); -+ file->f_vfsmnt = mntget(pfmfs_mnt); -+ file->f_mapping = inode->i_mapping; -+ -+ file->f_op = &pfm_file_ops; -+ file->f_mode = FMODE_READ; -+ file->f_flags = O_RDONLY; -+ file->f_pos = 0; -+ -+ *cfile = file; -+ -+ return fd; -+out: -+ if (file) -+ put_filp(file); -+ put_unused_fd(fd); -+ return ret; -+} -diff --git a/perfmon/perfmon_fmt.c b/perfmon/perfmon_fmt.c -new file mode 100644 -index 0000000..27c4340 ---- /dev/null -+++ b/perfmon/perfmon_fmt.c -@@ -0,0 +1,219 @@ -+/* -+ * perfmon_fmt.c: perfmon2 sampling buffer format management -+ * -+ * This file implements the perfmon2 interface which -+ * provides access to the hardware performance counters -+ * of the host processor. -+ * -+ * The initial version of perfmon.c was written by -+ * Ganesh Venkitachalam, IBM Corp. -+ * -+ * Then it was modified for perfmon-1.x by Stephane Eranian and -+ * David Mosberger, Hewlett Packard Co. -+ * -+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x -+ * by Stephane Eranian, Hewlett Packard Co. -+ * -+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * David Mosberger-Tang -+ * -+ * More information about perfmon available at: -+ * http://perfmon2.sf.net -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include "perfmon_priv.h" -+ -+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_smpl_fmt_lock); -+static LIST_HEAD(pfm_smpl_fmt_list); -+ -+static inline int fmt_is_mod(struct pfm_smpl_fmt *f) -+{ -+ return !(f->fmt_flags & PFM_FMTFL_IS_BUILTIN); -+} -+ -+static struct pfm_smpl_fmt *pfm_find_fmt(char *name) -+{ -+ struct pfm_smpl_fmt *entry; -+ -+ list_for_each_entry(entry, &pfm_smpl_fmt_list, fmt_list) { -+ if (!strcmp(entry->fmt_name, name)) -+ return entry; -+ } -+ return NULL; -+} -+/* -+ * find a buffer format based on its name -+ */ -+struct pfm_smpl_fmt *pfm_smpl_fmt_get(char *name) -+{ -+ struct pfm_smpl_fmt *fmt; -+ -+ spin_lock(&pfm_smpl_fmt_lock); -+ -+ fmt = pfm_find_fmt(name); -+ -+ /* -+ * increase module refcount -+ */ -+ if (fmt && fmt_is_mod(fmt) && !try_module_get(fmt->owner)) -+ fmt = NULL; -+ -+ spin_unlock(&pfm_smpl_fmt_lock); -+ -+ return fmt; -+} -+ -+void pfm_smpl_fmt_put(struct pfm_smpl_fmt *fmt) -+{ -+ if (fmt == NULL || !fmt_is_mod(fmt)) -+ return; -+ BUG_ON(fmt->owner == NULL); -+ -+ spin_lock(&pfm_smpl_fmt_lock); -+ module_put(fmt->owner); -+ spin_unlock(&pfm_smpl_fmt_lock); -+} -+ -+int pfm_fmt_register(struct pfm_smpl_fmt *fmt) -+{ -+ int ret = 0; -+ -+ if (perfmon_disabled) { -+ PFM_INFO("perfmon disabled, cannot add sampling format"); -+ return -ENOSYS; -+ } -+ -+ /* some sanity checks */ -+ if (fmt == NULL) { -+ PFM_INFO("perfmon: NULL format for register"); -+ return -EINVAL; -+ } -+ -+ if (fmt->fmt_name == NULL) { -+ PFM_INFO("perfmon: format has no name"); -+ return -EINVAL; -+ } -+ -+ if (fmt->fmt_qdepth > PFM_MSGS_COUNT) { -+ PFM_INFO("perfmon: format %s requires %u msg queue depth (max %d)", -+ fmt->fmt_name, -+ fmt->fmt_qdepth, -+ PFM_MSGS_COUNT); -+ return -EINVAL; -+ } -+ -+ /* -+ * fmt is missing the initialization of .owner = THIS_MODULE -+ * this is only valid when format is compiled as a module -+ */ -+ if (fmt->owner == NULL && fmt_is_mod(fmt)) { -+ PFM_INFO("format %s has no module owner", fmt->fmt_name); -+ return -EINVAL; -+ } -+ /* -+ * we need at least a handler -+ */ -+ if (fmt->fmt_handler == NULL) { -+ PFM_INFO("format %s has no handler", fmt->fmt_name); -+ return -EINVAL; -+ } -+ -+ /* -+ * format argument size cannot be bigger than PAGE_SIZE -+ */ -+ if (fmt->fmt_arg_size > PAGE_SIZE) { -+ PFM_INFO("format %s arguments too big", fmt->fmt_name); -+ return -EINVAL; -+ } -+ -+ spin_lock(&pfm_smpl_fmt_lock); -+ -+ /* -+ * because of sysfs, we cannot have two formats with the same name -+ */ -+ if (pfm_find_fmt(fmt->fmt_name)) { -+ PFM_INFO("format %s already registered", fmt->fmt_name); -+ ret = -EBUSY; -+ goto out; -+ } -+ -+ ret = pfm_sysfs_add_fmt(fmt); -+ if (ret) { -+ PFM_INFO("sysfs cannot add format entry for %s", fmt->fmt_name); -+ goto out; -+ } -+ -+ list_add(&fmt->fmt_list, &pfm_smpl_fmt_list); -+ -+ PFM_INFO("added sampling format %s", fmt->fmt_name); -+out: -+ spin_unlock(&pfm_smpl_fmt_lock); -+ -+ return ret; -+} -+EXPORT_SYMBOL(pfm_fmt_register); -+ -+int pfm_fmt_unregister(struct pfm_smpl_fmt *fmt) -+{ -+ struct pfm_smpl_fmt *fmt2; -+ int ret = 0; -+ -+ if (!fmt || !fmt->fmt_name) { -+ PFM_DBG("invalid fmt"); -+ return -EINVAL; -+ } -+ -+ spin_lock(&pfm_smpl_fmt_lock); -+ -+ fmt2 = pfm_find_fmt(fmt->fmt_name); -+ if (!fmt) { -+ PFM_INFO("unregister failed, format not registered"); -+ ret = -EINVAL; -+ goto out; -+ } -+ list_del_init(&fmt->fmt_list); -+ -+ pfm_sysfs_remove_fmt(fmt); -+ -+ PFM_INFO("removed sampling format: %s", fmt->fmt_name); -+ -+out: -+ spin_unlock(&pfm_smpl_fmt_lock); -+ return ret; -+ -+} -+EXPORT_SYMBOL(pfm_fmt_unregister); -+ -+/* -+ * we defer adding the builtin formats to /sys/kernel/perfmon/formats -+ * until after the pfm sysfs subsystem is initialized. This function -+ * is called from pfm_init_sysfs() -+ */ -+void __init pfm_sysfs_builtin_fmt_add(void) -+{ -+ struct pfm_smpl_fmt *entry; -+ -+ /* -+ * locking not needed, kernel not fully booted -+ * when called -+ */ -+ list_for_each_entry(entry, &pfm_smpl_fmt_list, fmt_list) { -+ pfm_sysfs_add_fmt(entry); -+ } -+} -diff --git a/perfmon/perfmon_hotplug.c b/perfmon/perfmon_hotplug.c -new file mode 100644 -index 0000000..eaaba81 ---- /dev/null -+++ b/perfmon/perfmon_hotplug.c -@@ -0,0 +1,151 @@ -+/* -+ * perfmon_hotplug.c: handling of CPU hotplug -+ * -+ * The initial version of perfmon.c was written by -+ * Ganesh Venkitachalam, IBM Corp. -+ * -+ * Then it was modified for perfmon-1.x by Stephane Eranian and -+ * David Mosberger, Hewlett Packard Co. -+ * -+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x -+ * by Stephane Eranian, Hewlett Packard Co. -+ * -+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * David Mosberger-Tang -+ * -+ * More information about perfmon available at: -+ * http://perfmon2.sf.net -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+#include "perfmon_priv.h" -+ -+#ifndef CONFIG_HOTPLUG_CPU -+void pfm_cpu_disable(void) -+{} -+ -+int __init pfm_init_hotplug(void) -+{ -+ return 0; -+} -+#else /* CONFIG_HOTPLUG_CPU */ -+/* -+ * CPU hotplug event nofication callback -+ * -+ * We use the callback to do manage the sysfs interface. -+ * Note that the actual shutdown of monitoring on the CPU -+ * is done in pfm_cpu_disable(), see comments there for more -+ * information. -+ */ -+static int pfm_cpu_notify(struct notifier_block *nfb, -+ unsigned long action, void *hcpu) -+{ -+ unsigned int cpu = (unsigned long)hcpu; -+ int ret = NOTIFY_OK; -+ -+ pfm_pmu_conf_get(0); -+ -+ switch (action) { -+ case CPU_ONLINE: -+ pfm_debugfs_add_cpu(cpu); -+ PFM_INFO("CPU%d is online", cpu); -+ break; -+ case CPU_UP_PREPARE: -+ PFM_INFO("CPU%d prepare online", cpu); -+ break; -+ case CPU_UP_CANCELED: -+ pfm_debugfs_del_cpu(cpu); -+ PFM_INFO("CPU%d is up canceled", cpu); -+ break; -+ case CPU_DOWN_PREPARE: -+ PFM_INFO("CPU%d prepare offline", cpu); -+ break; -+ case CPU_DOWN_FAILED: -+ PFM_INFO("CPU%d is down failed", cpu); -+ break; -+ case CPU_DEAD: -+ pfm_debugfs_del_cpu(cpu); -+ PFM_INFO("CPU%d is offline", cpu); -+ break; -+ } -+ pfm_pmu_conf_put(); -+ return ret; -+} -+ -+/* -+ * called from cpu_disable() to detach the perfmon context -+ * from the CPU going down. -+ * -+ * We cannot use the cpu hotplug notifier because we MUST run -+ * on the CPU that is going down to save the PMU state -+ */ -+void pfm_cpu_disable(void) -+{ -+ struct pfm_context *ctx; -+ unsigned long flags; -+ int is_system, release_info = 0; -+ u32 cpu; -+ int r; -+ -+ ctx = __get_cpu_var(pmu_ctx); -+ if (ctx == NULL) -+ return; -+ -+ is_system = ctx->flags.system; -+ cpu = ctx->cpu; -+ -+ /* -+ * context is LOADED or MASKED -+ * -+ * we unload from CPU. That stops monitoring and does -+ * all the bookeeping of saving values and updating duration -+ */ -+ spin_lock_irqsave(&ctx->lock, flags); -+ if (is_system) -+ __pfm_unload_context(ctx, &release_info); -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ /* -+ * cancel timer -+ */ -+ if (release_info & 0x2) { -+ r = hrtimer_cancel(&__get_cpu_var(pfm_hrtimer)); -+ PFM_DBG("timeout cancel=%d", r); -+ } -+ -+ if (release_info & 0x1) -+ pfm_session_release(is_system, cpu); -+} -+ -+static struct notifier_block pfm_cpu_notifier = { -+ .notifier_call = pfm_cpu_notify -+}; -+ -+int __init pfm_init_hotplug(void) -+{ -+ int ret = 0; -+ /* -+ * register CPU hotplug event notifier -+ */ -+ ret = register_cpu_notifier(&pfm_cpu_notifier); -+ if (!ret) -+ PFM_LOG("CPU hotplug support enabled"); -+ return ret; -+} -+#endif /* CONFIG_HOTPLUG_CPU */ -diff --git a/perfmon/perfmon_init.c b/perfmon/perfmon_init.c -new file mode 100644 -index 0000000..bbb6e4d ---- /dev/null -+++ b/perfmon/perfmon_init.c -@@ -0,0 +1,131 @@ -+/* -+ * perfmon.c: perfmon2 global initialization functions -+ * -+ * This file implements the perfmon2 interface which -+ * provides access to the hardware performance counters -+ * of the host processor. -+ * -+ * -+ * The initial version of perfmon.c was written by -+ * Ganesh Venkitachalam, IBM Corp. -+ * -+ * Then it was modified for perfmon-1.x by Stephane Eranian and -+ * David Mosberger, Hewlett Packard Co. -+ * -+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x -+ * by Stephane Eranian, Hewlett Packard Co. -+ * -+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * David Mosberger-Tang -+ * -+ * More information about perfmon available at: -+ * http://perfmon2.sf.net -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include "perfmon_priv.h" -+ -+/* -+ * external variables -+ */ -+DEFINE_PER_CPU(struct task_struct *, pmu_owner); -+DEFINE_PER_CPU(struct pfm_context *, pmu_ctx); -+DEFINE_PER_CPU(u64, pmu_activation_number); -+DEFINE_PER_CPU(struct pfm_stats, pfm_stats); -+DEFINE_PER_CPU(struct hrtimer, pfm_hrtimer); -+ -+ -+int perfmon_disabled; /* >0 if perfmon is disabled */ -+ -+/* -+ * called from cpu_init() and pfm_pmu_register() -+ */ -+void __pfm_init_percpu(void *dummy) -+{ -+ struct hrtimer *h; -+ -+ h = &__get_cpu_var(pfm_hrtimer); -+ -+ pfm_arch_init_percpu(); -+ -+ /* -+ * initialize per-cpu high res timer -+ */ -+ hrtimer_init(h, CLOCK_MONOTONIC, HRTIMER_MODE_REL); -+#ifdef CONFIG_HIGH_RES_TIMERS -+ /* -+ * avoid potential deadlock on the runqueue lock -+ * during context switch when multiplexing. Situation -+ * arises on architectures which run switch_to() with -+ * the runqueue lock held, e.g., x86. On others, e.g., -+ * IA-64, the problem does not exist. -+ * Setting the callback mode to HRTIMER_CB_IRQSAFE_UNOCKED -+ * such that the callback routine is only called on hardirq -+ * context not on softirq, thus the context switch will not -+ * end up trying to wakeup the softirqd -+ */ -+ h->cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED; -+#endif -+ h->function = pfm_handle_switch_timeout; -+} -+ -+/* -+ * global initialization routine, executed only once -+ */ -+int __init pfm_init(void) -+{ -+ PFM_LOG("version %u.%u", PFM_VERSION_MAJ, PFM_VERSION_MIN); -+ -+ if (pfm_init_ctx()) -+ goto error_disable; -+ -+ -+ if (pfm_init_sets()) -+ goto error_disable; -+ -+ if (pfm_init_fs()) -+ goto error_disable; -+ -+ if (pfm_init_sysfs()) -+ goto error_disable; -+ -+ /* not critical, so no error checking */ -+ pfm_init_debugfs(); -+ -+ /* -+ * one time, arch-specific global initialization -+ */ -+ if (pfm_arch_init()) -+ goto error_disable; -+ -+ if (pfm_init_hotplug()) -+ goto error_disable; -+ return 0; -+ -+error_disable: -+ PFM_ERR("perfmon is disabled due to initialization error"); -+ perfmon_disabled = 1; -+ return -1; -+} -+ -+/* -+ * must use subsys_initcall() to ensure that the perfmon2 core -+ * is initialized before any PMU description module when they are -+ * compiled in. -+ */ -+subsys_initcall(pfm_init); -diff --git a/perfmon/perfmon_intr.c b/perfmon/perfmon_intr.c -new file mode 100644 -index 0000000..c5e3cda ---- /dev/null -+++ b/perfmon/perfmon_intr.c -@@ -0,0 +1,648 @@ -+/* -+ * perfmon_intr.c: perfmon2 interrupt handling -+ * -+ * This file implements the perfmon2 interface which -+ * provides access to the hardware performance counters -+ * of the host processor. -+ * -+ * The initial version of perfmon.c was written by -+ * Ganesh Venkitachalam, IBM Corp. -+ * -+ * Then it was modified for perfmon-1.x by Stephane Eranian and -+ * David Mosberger, Hewlett Packard Co. -+ * -+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x -+ * by Stephane Eranian, Hewlett Packard Co. -+ * -+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * David Mosberger-Tang -+ * -+ * More information about perfmon available at: -+ * http://perfmon2.sf.net -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+#include "perfmon_priv.h" -+ -+/** -+ * pfm_intr_process_64bit_ovfls - handle 64-bit counter emulation -+ * @ctx: context to operate on -+ * @set: set to operate on -+ * -+ * The function returns the number of 64-bit overflows detected. -+ * -+ * 64-bit software pmds are updated for overflowed pmd registers -+ * the set->reset_pmds is updated to the list of pmds to reset -+ * -+ * In any case, set->npend_ovfls is cleared -+ */ -+static u16 pfm_intr_process_64bit_ovfls(struct pfm_context *ctx, -+ struct pfm_event_set *set, -+ u32 *ovfl_ctrl) -+{ -+ u16 i, num_ovfls, max_pmd, max_intr; -+ u16 num_64b_ovfls, has_ovfl_sw, must_switch; -+ u64 ovfl_thres, old_val, new_val, ovfl_mask; -+ -+ num_64b_ovfls = must_switch = 0; -+ -+ ovfl_mask = pfm_pmu_conf->ovfl_mask; -+ max_pmd = ctx->regs.max_pmd; -+ max_intr = ctx->regs.max_intr_pmd; -+ -+ num_ovfls = set->npend_ovfls; -+ has_ovfl_sw = set->flags & PFM_SETFL_OVFL_SWITCH; -+ -+ bitmap_zero(cast_ulp(set->reset_pmds), max_pmd); -+ -+ for (i = ctx->regs.first_intr_pmd; num_ovfls; i++) { -+ /* -+ * skip pmd which did not overflow -+ */ -+ if (!test_bit(i, cast_ulp(set->povfl_pmds))) -+ continue; -+ -+ num_ovfls--; -+ -+ /* -+ * Update software value for counters ONLY -+ * -+ * Note that the pmd is not necessarily 0 at this point as -+ * qualified events may have happened before the PMU was -+ * frozen. The residual count is not taken into consideration -+ * here but will be with any read of the pmd -+ */ -+ ovfl_thres = set->pmds[i].ovflsw_thres; -+ -+ if (likely(test_bit(i, cast_ulp(ctx->regs.cnt_pmds)))) { -+ old_val = new_val = set->pmds[i].value; -+ new_val += 1 + ovfl_mask; -+ set->pmds[i].value = new_val; -+ } else { -+ /* -+ * for non counters which interrupt, e.g., AMD IBS, -+ * we consider this equivalent to a 64-bit counter -+ * overflow. -+ */ -+ old_val = 1; new_val = 0; -+ } -+ -+ /* -+ * check for 64-bit overflow condition -+ */ -+ if (likely(old_val > new_val)) { -+ num_64b_ovfls++; -+ if (has_ovfl_sw && ovfl_thres > 0) { -+ if (ovfl_thres == 1) -+ must_switch = 1; -+ set->pmds[i].ovflsw_thres = ovfl_thres - 1; -+ } -+ -+ /* -+ * what to reset because of this overflow -+ * - the overflowed register -+ * - its reset_smpls -+ */ -+ __set_bit(i, cast_ulp(set->reset_pmds)); -+ -+ bitmap_or(cast_ulp(set->reset_pmds), -+ cast_ulp(set->reset_pmds), -+ cast_ulp(set->pmds[i].reset_pmds), -+ max_pmd); -+ } else { -+ /* -+ * only keep track of 64-bit overflows or -+ * assimilated -+ */ -+ __clear_bit(i, cast_ulp(set->povfl_pmds)); -+ -+ /* -+ * on some PMU, it may be necessary to re-arm the PMD -+ */ -+ pfm_arch_ovfl_reset_pmd(ctx, i); -+ } -+ -+ PFM_DBG_ovfl("ovfl=%s pmd%u new=0x%llx old=0x%llx " -+ "hw_pmd=0x%llx o_pmds=0x%llx must_switch=%u " -+ "o_thres=%llu o_thres_ref=%llu", -+ old_val > new_val ? "64-bit" : "HW", -+ i, -+ (unsigned long long)new_val, -+ (unsigned long long)old_val, -+ (unsigned long long)pfm_read_pmd(ctx, i), -+ (unsigned long long)set->povfl_pmds[0], -+ must_switch, -+ (unsigned long long)set->pmds[i].ovflsw_thres, -+ (unsigned long long)set->pmds[i].ovflsw_ref_thres); -+ } -+ /* -+ * update public bitmask of 64-bit overflowed pmds -+ */ -+ if (num_64b_ovfls) -+ bitmap_copy(cast_ulp(set->ovfl_pmds), cast_ulp(set->povfl_pmds), -+ max_intr); -+ -+ if (must_switch) -+ *ovfl_ctrl |= PFM_OVFL_CTRL_SWITCH; -+ -+ /* -+ * mark the overflows as consumed -+ */ -+ set->npend_ovfls = 0; -+ bitmap_zero(cast_ulp(set->povfl_pmds), max_intr); -+ -+ return num_64b_ovfls; -+} -+ -+/** -+ * pfm_intr_get_smpl_pmds_values - copy 64-bit pmd values for sampling format -+ * @ctx: context to work on -+ * @set: current event set -+ * @arg: overflow arg to be passed to format -+ * @smpl_pmds: list of PMDs of interest for the overflowed register -+ * -+ * build an array of 46-bit PMD values based on smpl_pmds. Values are -+ * stored in increasing order of the PMD indexes -+ */ -+static void pfm_intr_get_smpl_pmds_values(struct pfm_context *ctx, -+ struct pfm_event_set *set, -+ struct pfm_ovfl_arg *arg, -+ u64 *smpl_pmds) -+{ -+ u16 j, k, max_pmd; -+ u64 new_val, ovfl_mask; -+ u64 *cnt_pmds; -+ -+ cnt_pmds = ctx->regs.cnt_pmds; -+ max_pmd = ctx->regs.max_pmd; -+ ovfl_mask = pfm_pmu_conf->ovfl_mask; -+ -+ for (j = k = 0; j < max_pmd; j++) { -+ -+ if (!test_bit(j, cast_ulp(smpl_pmds))) -+ continue; -+ -+ new_val = pfm_read_pmd(ctx, j); -+ -+ /* for counters, build 64-bit value */ -+ if (test_bit(j, cast_ulp(cnt_pmds))) -+ new_val = (set->pmds[j].value & ~ovfl_mask) -+ | (new_val & ovfl_mask); -+ -+ arg->smpl_pmds_values[k++] = new_val; -+ -+ PFM_DBG_ovfl("s_pmd_val[%u]=pmd%u=0x%llx", k, j, -+ (unsigned long long)new_val); -+ } -+ arg->num_smpl_pmds = k; -+} -+ -+/** -+ * pfm_intr_process_smpl_fmt -- handle sampling format callback -+ * @ctx: context to work on -+ * @set: current event set -+ * @ip: interrupted instruction pointer -+ * @now: timestamp -+ * @num_ovfls: number of 64-bit overflows -+ * @ovfl_ctrl: set of controls for interrupt handler tail processing -+ * @regs: register state -+ * -+ * Prepare argument (ovfl_arg) to be passed to sampling format callback, then -+ * invoke the callback (fmt_handler) -+ */ -+static int pfm_intr_process_smpl_fmt(struct pfm_context *ctx, -+ struct pfm_event_set *set, -+ unsigned long ip, -+ u64 now, -+ u64 num_ovfls, -+ u32 *ovfl_ctrl, -+ struct pt_regs *regs) -+{ -+ struct pfm_ovfl_arg *ovfl_arg; -+ u64 start_cycles, end_cycles; -+ u16 i, max_pmd; -+ int ret = 0; -+ -+ ovfl_arg = &ctx->ovfl_arg; -+ -+ ovfl_arg->active_set = set->id; -+ max_pmd = ctx->regs.max_pmd; -+ -+ /* -+ * first_intr_pmd: first PMD which can generate PMU interrupts -+ */ -+ for (i = ctx->regs.first_intr_pmd; num_ovfls; i++) { -+ /* -+ * skip pmd which did not have 64-bit overflows -+ */ -+ if (!test_bit(i, cast_ulp(set->ovfl_pmds))) -+ continue; -+ -+ num_ovfls--; -+ -+ /* -+ * prepare argument to fmt_handler -+ */ -+ ovfl_arg->ovfl_pmd = i; -+ ovfl_arg->ovfl_ctrl = 0; -+ -+ ovfl_arg->pmd_last_reset = set->pmds[i].lval; -+ ovfl_arg->pmd_eventid = set->pmds[i].eventid; -+ ovfl_arg->num_smpl_pmds = 0; -+ -+ /* -+ * copy values of pmds of interest, if any -+ * Sampling format may use them -+ * We do not initialize the unused smpl_pmds_values -+ */ -+ if (!bitmap_empty(cast_ulp(set->pmds[i].smpl_pmds), max_pmd)) -+ pfm_intr_get_smpl_pmds_values(ctx, set, ovfl_arg, -+ set->pmds[i].smpl_pmds); -+ -+ pfm_stats_inc(fmt_handler_calls); -+ -+ /* -+ * call format record (handler) routine -+ */ -+ start_cycles = sched_clock(); -+ ret = (*ctx->smpl_fmt->fmt_handler)(ctx, ip, now, regs); -+ end_cycles = sched_clock(); -+ -+ /* -+ * The reset_pmds mask is constructed automatically -+ * on overflow. When the actual reset takes place -+ * depends on the masking, switch and notification -+ * status. It may be deferred until pfm_restart(). -+ */ -+ *ovfl_ctrl |= ovfl_arg->ovfl_ctrl; -+ -+ pfm_stats_add(fmt_handler_ns, end_cycles - start_cycles); -+ } -+ /* -+ * when the format cannot handle the rest of the overflow, we abort -+ */ -+ if (ret) -+ PFM_DBG_ovfl("handler aborted at PMD%u ret=%d", i, ret); -+ return ret; -+} -+/** -+ * pfm_overflow_handler - main overflow processing routine. -+ * @ctx: context to work on (always current context) -+ * @set: current event set -+ * @ip: interrupt instruction pointer -+ * @regs: machine state -+ * -+ * set->num_ovfl_pmds is 0 when returning from this function even though -+ * set->ovfl_pmds[] may have bits set. When leaving set->num_ovfl_pmds -+ * must never be used to determine if there was a pending overflow. -+ */ -+static void pfm_overflow_handler(struct pfm_context *ctx, -+ struct pfm_event_set *set, -+ unsigned long ip, -+ struct pt_regs *regs) -+{ -+ struct pfm_event_set *set_orig; -+ u64 now; -+ u32 ovfl_ctrl; -+ u16 max_intr, max_pmd; -+ u16 num_ovfls; -+ int ret, has_notify; -+ -+ /* -+ * take timestamp -+ */ -+ now = sched_clock(); -+ -+ max_pmd = ctx->regs.max_pmd; -+ max_intr = ctx->regs.max_intr_pmd; -+ -+ set_orig = set; -+ ovfl_ctrl = 0; -+ -+ /* -+ * skip ZOMBIE case -+ */ -+ if (unlikely(ctx->state == PFM_CTX_ZOMBIE)) -+ goto stop_monitoring; -+ -+ PFM_DBG_ovfl("intr_pmds=0x%llx npend=%u ip=%p, blocking=%d " -+ "u_pmds=0x%llx use_fmt=%u", -+ (unsigned long long)set->povfl_pmds[0], -+ set->npend_ovfls, -+ (void *)ip, -+ ctx->flags.block, -+ (unsigned long long)set->used_pmds[0], -+ !!ctx->smpl_fmt); -+ -+ /* -+ * return number of 64-bit overflows -+ */ -+ num_ovfls = pfm_intr_process_64bit_ovfls(ctx, set, &ovfl_ctrl); -+ -+ /* -+ * there were no 64-bit overflows -+ * nothing else to do -+ */ -+ if (!num_ovfls) -+ return; -+ -+ /* -+ * tmp_ovfl_notify = ovfl_pmds & ovfl_notify -+ * with: -+ * - ovfl_pmds: last 64-bit overflowed pmds -+ * - ovfl_notify: notify on overflow registers -+ */ -+ bitmap_and(cast_ulp(ctx->tmp_ovfl_notify), -+ cast_ulp(set->ovfl_pmds), -+ cast_ulp(set->ovfl_notify), -+ max_intr); -+ -+ has_notify = !bitmap_empty(cast_ulp(ctx->tmp_ovfl_notify), max_intr); -+ -+ /* -+ * check for sampling format and invoke fmt_handler -+ */ -+ if (likely(ctx->smpl_fmt)) { -+ pfm_intr_process_smpl_fmt(ctx, set, ip, now, num_ovfls, -+ &ovfl_ctrl, regs); -+ } else { -+ /* -+ * When no sampling format is used, the default -+ * is: -+ * - mask monitoring if not switching -+ * - notify user if requested -+ * -+ * If notification is not requested, monitoring is masked -+ * and overflowed registers are not reset (saturation). -+ * This mimics the behavior of the default sampling format. -+ */ -+ ovfl_ctrl |= PFM_OVFL_CTRL_NOTIFY; -+ if (has_notify || !(ovfl_ctrl & PFM_OVFL_CTRL_SWITCH)) -+ ovfl_ctrl |= PFM_OVFL_CTRL_MASK; -+ } -+ -+ PFM_DBG_ovfl("set%u o_notify=0x%llx o_pmds=0x%llx " -+ "r_pmds=0x%llx ovfl_ctrl=0x%x", -+ set->id, -+ (unsigned long long)ctx->tmp_ovfl_notify[0], -+ (unsigned long long)set->ovfl_pmds[0], -+ (unsigned long long)set->reset_pmds[0], -+ ovfl_ctrl); -+ -+ /* -+ * execute the various controls -+ * ORDER MATTERS -+ */ -+ -+ -+ /* -+ * mask monitoring -+ */ -+ if (ovfl_ctrl & PFM_OVFL_CTRL_MASK) { -+ pfm_mask_monitoring(ctx, set); -+ /* -+ * when masking, reset is deferred until -+ * pfm_restart() -+ */ -+ ovfl_ctrl &= ~PFM_OVFL_CTRL_RESET; -+ -+ /* -+ * when masking, switching is deferred until -+ * pfm_restart and we need to remember it -+ */ -+ if (ovfl_ctrl & PFM_OVFL_CTRL_SWITCH) { -+ set->priv_flags |= PFM_SETFL_PRIV_SWITCH; -+ ovfl_ctrl &= ~PFM_OVFL_CTRL_SWITCH; -+ } -+ } -+ -+ /* -+ * switch event set -+ */ -+ if (ovfl_ctrl & PFM_OVFL_CTRL_SWITCH) { -+ pfm_switch_sets_from_intr(ctx); -+ /* update view of active set */ -+ set = ctx->active_set; -+ } -+ /* -+ * send overflow notification -+ * -+ * only necessary if at least one overflowed -+ * register had the notify flag set -+ */ -+ if (has_notify && (ovfl_ctrl & PFM_OVFL_CTRL_NOTIFY)) { -+ /* -+ * block on notify, not on masking -+ */ -+ if (ctx->flags.block) -+ pfm_post_work(current, ctx, PFM_WORK_BLOCK); -+ -+ /* -+ * send notification and passed original set id -+ * if error, queue full, for instance, then default -+ * to masking monitoring, i.e., saturate -+ */ -+ ret = pfm_ovfl_notify(ctx, set_orig, ip); -+ if (unlikely(ret)) { -+ if (ctx->state == PFM_CTX_LOADED) { -+ pfm_mask_monitoring(ctx, set); -+ ovfl_ctrl &= ~PFM_OVFL_CTRL_RESET; -+ } -+ } else { -+ ctx->flags.can_restart++; -+ PFM_DBG_ovfl("can_restart=%u", ctx->flags.can_restart); -+ } -+ } -+ -+ /* -+ * reset overflowed registers -+ */ -+ if (ovfl_ctrl & PFM_OVFL_CTRL_RESET) { -+ u16 nn; -+ nn = bitmap_weight(cast_ulp(set->reset_pmds), max_pmd); -+ if (nn) -+ pfm_reset_pmds(ctx, set, nn, PFM_PMD_RESET_SHORT); -+ } -+ return; -+ -+stop_monitoring: -+ /* -+ * Does not happen for a system-wide context nor for a -+ * self-monitored context. We cannot attach to kernel-only -+ * thread, thus it is safe to set TIF bits, i.e., the thread -+ * will eventually leave the kernel or die and either we will -+ * catch the context and clean it up in pfm_handler_work() or -+ * pfm_exit_thread(). -+ * -+ * Mask until we get to pfm_handle_work() -+ */ -+ pfm_mask_monitoring(ctx, set); -+ -+ PFM_DBG_ovfl("ctx is zombie, converted to spurious"); -+ pfm_post_work(current, ctx, PFM_WORK_ZOMBIE); -+} -+ -+/** -+ * __pfm_interrupt_handler - 1st level interrupt handler -+ * @ip: interrupted instruction pointer -+ * @regs: machine state -+ * -+ * Function is static because we use a wrapper to easily capture timing infos. -+ * -+ * -+ * Context locking necessary to avoid concurrent accesses from other CPUs -+ * - For per-thread, we must prevent pfm_restart() which works when -+ * context is LOADED or MASKED -+ */ -+static void __pfm_interrupt_handler(unsigned long ip, struct pt_regs *regs) -+{ -+ struct task_struct *task; -+ struct pfm_context *ctx; -+ struct pfm_event_set *set; -+ -+ -+ task = __get_cpu_var(pmu_owner); -+ ctx = __get_cpu_var(pmu_ctx); -+ -+ /* -+ * verify if there is a context on this CPU -+ */ -+ if (unlikely(ctx == NULL)) { -+ PFM_DBG_ovfl("no ctx"); -+ goto spurious; -+ } -+ -+ /* -+ * we need to lock context because it could be accessed -+ * from another CPU. Depending on the priority level of -+ * the PMU interrupt or the arch, it may be necessary to -+ * mask interrupts alltogether to avoid race condition with -+ * the timer interrupt in case of time-based set switching, -+ * for instance. -+ */ -+ spin_lock(&ctx->lock); -+ -+ set = ctx->active_set; -+ -+ /* -+ * For SMP per-thread, it is not possible to have -+ * owner != NULL && task != current. -+ * -+ * For UP per-thread, because of lazy save, it -+ * is possible to receive an interrupt in another task -+ * which is not using the PMU. This means -+ * that the interrupt was in-flight at the -+ * time of pfm_ctxswout_thread(). In that -+ * case, it will be replayed when the task -+ * is scheduled again. Hence we convert to spurious. -+ * -+ * The basic rule is that an overflow is always -+ * processed in the context of the task that -+ * generated it for all per-thread contexts. -+ * -+ * for system-wide, task is always NULL -+ */ -+#ifndef CONFIG_SMP -+ if (unlikely((task && current->pfm_context != ctx))) { -+ PFM_DBG_ovfl("spurious: not owned by current task"); -+ goto spurious; -+ } -+#endif -+ if (unlikely(ctx->state == PFM_CTX_MASKED)) { -+ PFM_DBG_ovfl("spurious: monitoring masked"); -+ goto spurious; -+ } -+ -+ /* -+ * check that monitoring is active, otherwise convert -+ * to spurious -+ */ -+ if (unlikely(!pfm_arch_is_active(ctx))) { -+ PFM_DBG_ovfl("spurious: monitoring non active"); -+ goto spurious; -+ } -+ -+ /* -+ * freeze PMU and collect overflowed PMD registers -+ * into set->povfl_pmds. Number of overflowed PMDs -+ * reported in set->npend_ovfls -+ */ -+ pfm_arch_intr_freeze_pmu(ctx, set); -+ -+ /* -+ * no overflow detected, interrupt may have come -+ * from the previous thread running on this CPU -+ */ -+ if (unlikely(!set->npend_ovfls)) { -+ PFM_DBG_ovfl("no npend_ovfls"); -+ goto spurious; -+ } -+ -+ pfm_stats_inc(ovfl_intr_regular_count); -+ -+ /* -+ * invoke actual handler -+ */ -+ pfm_overflow_handler(ctx, set, ip, regs); -+ -+ /* -+ * unfreeze PMU, monitoring may not actual be restarted -+ * if context is MASKED -+ */ -+ pfm_arch_intr_unfreeze_pmu(ctx); -+ -+ spin_unlock(&ctx->lock); -+ -+ return; -+ -+spurious: -+ /* ctx may be NULL */ -+ pfm_arch_intr_unfreeze_pmu(ctx); -+ if (ctx) -+ spin_unlock(&ctx->lock); -+ -+ pfm_stats_inc(ovfl_intr_spurious_count); -+} -+ -+ -+/** -+ * pfm_interrupt_handler - 1st level interrupt handler -+ * @ip: interrupt instruction pointer -+ * @regs: machine state -+ * -+ * Function called from the low-level assembly code or arch-specific perfmon -+ * code. Simple wrapper used for timing purpose. Actual work done in -+ * __pfm_overflow_handler() -+ */ -+void pfm_interrupt_handler(unsigned long ip, struct pt_regs *regs) -+{ -+ u64 start; -+ -+ pfm_stats_inc(ovfl_intr_all_count); -+ -+ BUG_ON(!irqs_disabled()); -+ -+ start = sched_clock(); -+ -+ __pfm_interrupt_handler(ip, regs); -+ -+ pfm_stats_add(ovfl_intr_ns, sched_clock() - start); -+} -+EXPORT_SYMBOL(pfm_interrupt_handler); -+ -diff --git a/perfmon/perfmon_msg.c b/perfmon/perfmon_msg.c -new file mode 100644 -index 0000000..b8a1e4c ---- /dev/null -+++ b/perfmon/perfmon_msg.c -@@ -0,0 +1,229 @@ -+/* -+ * perfmon_msg.c: perfmon2 notification message queue management -+ * -+ * This file implements the perfmon2 interface which -+ * provides access to the hardware performance counters -+ * of the host processor. -+ * -+ * The initial version of perfmon.c was written by -+ * Ganesh Venkitachalam, IBM Corp. -+ * -+ * Then it was modified for perfmon-1.x by Stephane Eranian and -+ * David Mosberger, Hewlett Packard Co. -+ * -+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x -+ * by Stephane Eranian, Hewlett Packard Co. -+ * -+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * David Mosberger-Tang -+ * -+ * More information about perfmon available at: -+ * http://perfmon2.sf.net -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+ -+/** -+ * pfm_get_new_msg - get a new message slot from the queue -+ * @ctx: context to operate on -+ * -+ * if queue if full NULL is returned -+ */ -+static union pfarg_msg *pfm_get_new_msg(struct pfm_context *ctx) -+{ -+ int next; -+ -+ next = ctx->msgq_head & PFM_MSGQ_MASK; -+ -+ if ((ctx->msgq_head - ctx->msgq_tail) == PFM_MSGS_COUNT) -+ return NULL; -+ -+ /* -+ * move to next possible slot -+ */ -+ ctx->msgq_head++; -+ -+ PFM_DBG_ovfl("head=%d tail=%d msg=%d", -+ ctx->msgq_head & PFM_MSGQ_MASK, -+ ctx->msgq_tail & PFM_MSGQ_MASK, -+ next); -+ -+ return ctx->msgq+next; -+} -+ -+/** -+ * pfm_notify_user - wakeup any thread wiating on msg queue, post SIGIO -+ * @ctx: context to operate on -+ * -+ * message is already enqueued -+ */ -+static void pfm_notify_user(struct pfm_context *ctx) -+{ -+ if (ctx->state == PFM_CTX_ZOMBIE) { -+ PFM_DBG("no notification, context is zombie"); -+ return; -+ } -+ -+ PFM_DBG_ovfl("waking up"); -+ -+ wake_up_interruptible(&ctx->msgq_wait); -+ -+ /* -+ * it is safe to call kill_fasync() from an interrupt -+ * handler. kill_fasync() grabs two RW locks (fasync_lock, -+ * tasklist_lock) in read mode. There is conflict only in -+ * case the PMU interrupt occurs during a write mode critical -+ * section. This cannot happen because for both locks, the -+ * write mode is always using interrupt masking (write_lock_irq). -+ */ -+ kill_fasync(&ctx->async_queue, SIGIO, POLL_IN); -+} -+ -+/** -+ * pfm_ovfl_notify - send overflow notification -+ * @ctx: context to operate on -+ * @set: which set the overflow comes from -+ * @ip: overflow interrupt instruction address (IIP) -+ * -+ * Appends an overflow notification message to context queue. -+ * call pfm_notify() to wakeup any threads and/or send a signal -+ * -+ * Context is locked and interrupts are disabled (no preemption). -+ */ -+int pfm_ovfl_notify(struct pfm_context *ctx, -+ struct pfm_event_set *set, -+ unsigned long ip) -+{ -+ union pfarg_msg *msg = NULL; -+ u64 *ovfl_pmds; -+ -+ if (!ctx->flags.no_msg) { -+ msg = pfm_get_new_msg(ctx); -+ if (msg == NULL) { -+ /* -+ * when message queue fills up it is because the user -+ * did not extract the message, yet issued -+ * pfm_restart(). At this point, we stop sending -+ * notification, thus the user will not be able to get -+ * new samples when using the default format. -+ */ -+ PFM_DBG_ovfl("no more notification msgs"); -+ return -1; -+ } -+ -+ msg->pfm_ovfl_msg.msg_type = PFM_MSG_OVFL; -+ msg->pfm_ovfl_msg.msg_ovfl_pid = current->pid; -+ msg->pfm_ovfl_msg.msg_active_set = set->id; -+ -+ ovfl_pmds = msg->pfm_ovfl_msg.msg_ovfl_pmds; -+ -+ /* -+ * copy bitmask of all pmd that interrupted last -+ */ -+ bitmap_copy(cast_ulp(ovfl_pmds), cast_ulp(set->ovfl_pmds), -+ ctx->regs.max_intr_pmd); -+ -+ msg->pfm_ovfl_msg.msg_ovfl_cpu = smp_processor_id(); -+ msg->pfm_ovfl_msg.msg_ovfl_tid = current->tgid; -+ msg->pfm_ovfl_msg.msg_ovfl_ip = ip; -+ -+ pfm_stats_inc(ovfl_notify_count); -+ } -+ -+ PFM_DBG_ovfl("ip=0x%lx o_pmds=0x%llx", -+ ip, -+ (unsigned long long)set->ovfl_pmds[0]); -+ -+ pfm_notify_user(ctx); -+ return 0; -+} -+ -+/** -+ * pfm_end_notify_user - notify of thread termination -+ * @ctx: context to operate on -+ * -+ * In per-thread mode, when not self-monitoring, perfmon -+ * sends a 'end' notification message when the monitored -+ * thread where the context is attached is exiting. -+ * -+ * This helper message alleviates the need to track the activity -+ * of the thread/process when it is not directly related, i.e., -+ * was attached. In other words, no needto keep the thread -+ * ptraced. -+ * -+ * The context must be locked and interrupts disabled. -+ */ -+int pfm_end_notify(struct pfm_context *ctx) -+{ -+ union pfarg_msg *msg; -+ -+ msg = pfm_get_new_msg(ctx); -+ if (msg == NULL) { -+ PFM_ERR("%s no more msgs", __func__); -+ return -1; -+ } -+ /* no leak */ -+ memset(msg, 0, sizeof(*msg)); -+ -+ msg->type = PFM_MSG_END; -+ -+ PFM_DBG("end msg: msg=%p no_msg=%d", -+ msg, -+ ctx->flags.no_msg); -+ -+ pfm_notify_user(ctx); -+ return 0; -+} -+ -+/** -+ * pfm_get_next_msg - copy the oldest message from the queue and move tail -+ * @ctx: context to use -+ * @m: where to copy the message into -+ * -+ * The tail of the queue is moved as a consequence of this call -+ */ -+void pfm_get_next_msg(struct pfm_context *ctx, union pfarg_msg *m) -+{ -+ union pfarg_msg *next; -+ -+ PFM_DBG_ovfl("in head=%d tail=%d", -+ ctx->msgq_head & PFM_MSGQ_MASK, -+ ctx->msgq_tail & PFM_MSGQ_MASK); -+ -+ /* -+ * get oldest message -+ */ -+ next = ctx->msgq + (ctx->msgq_tail & PFM_MSGQ_MASK); -+ -+ /* -+ * move tail forward -+ */ -+ ctx->msgq_tail++; -+ -+ /* -+ * copy message, we cannot simply point to it -+ * as it may be re-used before we copy it out -+ */ -+ *m = *next; -+ -+ PFM_DBG_ovfl("out head=%d tail=%d type=%d", -+ ctx->msgq_head & PFM_MSGQ_MASK, -+ ctx->msgq_tail & PFM_MSGQ_MASK, -+ m->type); -+} -diff --git a/perfmon/perfmon_pmu.c b/perfmon/perfmon_pmu.c -new file mode 100644 -index 0000000..df7a9c9 ---- /dev/null -+++ b/perfmon/perfmon_pmu.c -@@ -0,0 +1,590 @@ -+/* -+ * perfmon_pmu.c: perfmon2 PMU configuration management -+ * -+ * This file implements the perfmon2 interface which -+ * provides access to the hardware performance counters -+ * of the host processor. -+ * -+ * The initial version of perfmon.c was written by -+ * Ganesh Venkitachalam, IBM Corp. -+ * -+ * Then it was modified for perfmon-1.x by Stephane Eranian and -+ * David Mosberger, Hewlett Packard Co. -+ * -+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x -+ * by Stephane Eranian, Hewlett Packard Co. -+ * -+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * David Mosberger-Tang -+ * -+ * More information about perfmon available at: -+ * http://perfmon2.sf.net -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include "perfmon_priv.h" -+ -+#ifndef CONFIG_MODULE_UNLOAD -+#define module_refcount(n) 1 -+#endif -+ -+static __cacheline_aligned_in_smp int request_mod_in_progress; -+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_pmu_conf_lock); -+ -+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_pmu_acq_lock); -+static u32 pfm_pmu_acquired; -+ -+/* -+ * perfmon core must acces PMU information ONLY through pfm_pmu_conf -+ * if pfm_pmu_conf is NULL, then no description is registered -+ */ -+struct pfm_pmu_config *pfm_pmu_conf; -+EXPORT_SYMBOL(pfm_pmu_conf); -+ -+static inline int pmu_is_module(struct pfm_pmu_config *c) -+{ -+ return !(c->flags & PFM_PMUFL_IS_BUILTIN); -+} -+/** -+ * pfm_pmu_regdesc_init -- initialize regdesc structure from PMU table -+ * @regs: the regdesc structure to initialize -+ * @excl_type: the register type(s) to exclude from this regdesc -+ * @unvail_pmcs: unavailable PMC registers -+ * @unavail_pmds: unavailable PMD registers -+ * -+ * Return: -+ * 0 success -+ * errno in case of error -+ */ -+static int pfm_pmu_regdesc_init(struct pfm_regdesc *regs, int excl_type, -+ u64 *unavail_pmcs, u64 *unavail_pmds) -+{ -+ struct pfm_regmap_desc *d; -+ u16 n, n2, n_counters, i; -+ int first_intr_pmd = -1, max1, max2, max3; -+ -+ /* -+ * compute the number of implemented PMC from the -+ * description table -+ */ -+ n = 0; -+ max1 = max2 = -1; -+ d = pfm_pmu_conf->pmc_desc; -+ for (i = 0; i < pfm_pmu_conf->num_pmc_entries; i++, d++) { -+ if (!(d->type & PFM_REG_I)) -+ continue; -+ -+ if (test_bit(i, cast_ulp(unavail_pmcs))) -+ continue; -+ -+ if (d->type & excl_type) -+ continue; -+ -+ __set_bit(i, cast_ulp(regs->pmcs)); -+ -+ max1 = i; -+ n++; -+ } -+ -+ if (!n) { -+ PFM_INFO("%s PMU description has no PMC registers", -+ pfm_pmu_conf->pmu_name); -+ return -EINVAL; -+ } -+ -+ regs->max_pmc = max1 + 1; -+ regs->num_pmcs = n; -+ -+ n = n_counters = n2 = 0; -+ max1 = max2 = max3 = -1; -+ d = pfm_pmu_conf->pmd_desc; -+ for (i = 0; i < pfm_pmu_conf->num_pmd_entries; i++, d++) { -+ if (!(d->type & PFM_REG_I)) -+ continue; -+ -+ if (test_bit(i, cast_ulp(unavail_pmds))) -+ continue; -+ -+ if (d->type & excl_type) -+ continue; -+ -+ __set_bit(i, cast_ulp(regs->pmds)); -+ max1 = i; -+ n++; -+ -+ /* -+ * read-write registers -+ */ -+ if (!(d->type & PFM_REG_RO)) { -+ __set_bit(i, cast_ulp(regs->rw_pmds)); -+ max3 = i; -+ n2++; -+ } -+ -+ /* -+ * counter registers -+ */ -+ if (d->type & PFM_REG_C64) { -+ __set_bit(i, cast_ulp(regs->cnt_pmds)); -+ n_counters++; -+ } -+ -+ /* -+ * PMD with intr capabilities -+ */ -+ if (d->type & PFM_REG_INTR) { -+ __set_bit(i, cast_ulp(regs->intr_pmds)); -+ if (first_intr_pmd == -1) -+ first_intr_pmd = i; -+ max2 = i; -+ } -+ } -+ -+ if (!n) { -+ PFM_INFO("%s PMU description has no PMD registers", -+ pfm_pmu_conf->pmu_name); -+ return -EINVAL; -+ } -+ -+ regs->max_pmd = max1 + 1; -+ regs->first_intr_pmd = first_intr_pmd; -+ regs->max_intr_pmd = max2 + 1; -+ -+ regs->num_counters = n_counters; -+ regs->num_pmds = n; -+ regs->max_rw_pmd = max3 + 1; -+ regs->num_rw_pmd = n2; -+ -+ return 0; -+} -+ -+/** -+ * pfm_pmu_regdesc_init_all -- initialize all regdesc structures -+ * @una_pmcs : unavailable PMC registers -+ * @una_pmds : unavailable PMD registers -+ * -+ * Return: -+ * 0 sucess -+ * errno if error -+ * -+ * We maintain 3 regdesc: -+ * regs_all: all available registers -+ * regs_sys: registers available to system-wide contexts only -+ * regs_thr: registers available to per-thread contexts only -+ */ -+static int pfm_pmu_regdesc_init_all(u64 *una_pmcs, u64 *una_pmds) -+{ -+ int ret; -+ -+ memset(&pfm_pmu_conf->regs_all, 0, sizeof(struct pfm_regdesc)); -+ memset(&pfm_pmu_conf->regs_thr, 0, sizeof(struct pfm_regdesc)); -+ memset(&pfm_pmu_conf->regs_sys, 0, sizeof(struct pfm_regdesc)); -+ -+ ret = pfm_pmu_regdesc_init(&pfm_pmu_conf->regs_all, -+ 0, -+ una_pmcs, una_pmds); -+ if (ret) -+ return ret; -+ -+ PFM_DBG("regs_all.pmcs=0x%llx", -+ (unsigned long long)pfm_pmu_conf->regs_all.pmcs[0]); -+ -+ ret = pfm_pmu_regdesc_init(&pfm_pmu_conf->regs_thr, -+ PFM_REG_SYS, -+ una_pmcs, una_pmds); -+ if (ret) -+ return ret; -+ PFM_DBG("regs.thr.pmcs=0x%llx", -+ (unsigned long long)pfm_pmu_conf->regs_thr.pmcs[0]); -+ -+ ret = pfm_pmu_regdesc_init(&pfm_pmu_conf->regs_sys, -+ PFM_REG_THR, -+ una_pmcs, una_pmds); -+ -+ PFM_DBG("regs_sys.pmcs=0x%llx", -+ (unsigned long long)pfm_pmu_conf->regs_sys.pmcs[0]); -+ -+ return ret; -+} -+ -+int pfm_pmu_register(struct pfm_pmu_config *cfg) -+{ -+ u16 i, nspec, nspec_ro, num_pmcs, num_pmds, num_wc = 0; -+ int type, ret = -EBUSY; -+ -+ if (perfmon_disabled) { -+ PFM_INFO("perfmon disabled, cannot add PMU description"); -+ return -ENOSYS; -+ } -+ -+ nspec = nspec_ro = num_pmds = num_pmcs = 0; -+ -+ /* some sanity checks */ -+ if (cfg == NULL || cfg->pmu_name == NULL) { -+ PFM_INFO("PMU config descriptor is invalid"); -+ return -EINVAL; -+ } -+ -+ /* must have a probe */ -+ if (cfg->probe_pmu == NULL) { -+ PFM_INFO("PMU config has no probe routine"); -+ return -EINVAL; -+ } -+ -+ /* -+ * execute probe routine before anything else as it -+ * may update configuration tables -+ */ -+ if ((*cfg->probe_pmu)() == -1) { -+ PFM_INFO("%s PMU detection failed", cfg->pmu_name); -+ return -EINVAL; -+ } -+ -+ if (!(cfg->flags & PFM_PMUFL_IS_BUILTIN) && cfg->owner == NULL) { -+ PFM_INFO("PMU config %s is missing owner", cfg->pmu_name); -+ return -EINVAL; -+ } -+ -+ if (!cfg->num_pmd_entries) { -+ PFM_INFO("%s needs to define num_pmd_entries", cfg->pmu_name); -+ return -EINVAL; -+ } -+ -+ if (!cfg->num_pmc_entries) { -+ PFM_INFO("%s needs to define num_pmc_entries", cfg->pmu_name); -+ return -EINVAL; -+ } -+ -+ if (!cfg->counter_width) { -+ PFM_INFO("PMU config %s, zero width counters", cfg->pmu_name); -+ return -EINVAL; -+ } -+ -+ /* -+ * REG_RO, REG_V not supported on PMC registers -+ */ -+ for (i = 0; i < cfg->num_pmc_entries; i++) { -+ -+ type = cfg->pmc_desc[i].type; -+ -+ if (type & PFM_REG_I) -+ num_pmcs++; -+ -+ if (type & PFM_REG_WC) -+ num_wc++; -+ -+ if (type & PFM_REG_V) { -+ PFM_INFO("PFM_REG_V is not supported on " -+ "PMCs (PMC%d)", i); -+ return -EINVAL; -+ } -+ if (type & PFM_REG_RO) { -+ PFM_INFO("PFM_REG_RO meaningless on " -+ "PMCs (PMC%u)", i); -+ return -EINVAL; -+ } -+ } -+ -+ if (num_wc && cfg->pmc_write_check == NULL) { -+ PFM_INFO("some PMCs have write-checker but no callback provided\n"); -+ return -EINVAL; -+ } -+ -+ /* -+ * check virtual PMD registers -+ */ -+ num_wc = 0; -+ for (i = 0; i < cfg->num_pmd_entries; i++) { -+ -+ type = cfg->pmd_desc[i].type; -+ -+ if (type & PFM_REG_I) -+ num_pmds++; -+ -+ if (type & PFM_REG_V) { -+ nspec++; -+ if (type & PFM_REG_RO) -+ nspec_ro++; -+ } -+ -+ if (type & PFM_REG_WC) -+ num_wc++; -+ } -+ -+ if (num_wc && cfg->pmd_write_check == NULL) { -+ PFM_INFO("PMD have write-checker but no callback provided\n"); -+ return -EINVAL; -+ } -+ -+ if (nspec && cfg->pmd_sread == NULL) { -+ PFM_INFO("PMU config is missing pmd_sread()"); -+ return -EINVAL; -+ } -+ -+ nspec = nspec - nspec_ro; -+ if (nspec && cfg->pmd_swrite == NULL) { -+ PFM_INFO("PMU config is missing pmd_swrite()"); -+ return -EINVAL; -+ } -+ -+ if (num_pmcs >= PFM_MAX_PMCS) { -+ PFM_INFO("%s PMCS registers exceed name space [0-%u]", -+ cfg->pmu_name, -+ PFM_MAX_PMCS); -+ return -EINVAL; -+ } -+ if (num_pmds >= PFM_MAX_PMDS) { -+ PFM_INFO("%s PMDS registers exceed name space [0-%u]", -+ cfg->pmu_name, -+ PFM_MAX_PMDS); -+ return -EINVAL; -+ } -+ spin_lock(&pfm_pmu_conf_lock); -+ -+ if (pfm_pmu_conf) -+ goto unlock; -+ -+ if (!cfg->version) -+ cfg->version = "0.0"; -+ -+ pfm_pmu_conf = cfg; -+ pfm_pmu_conf->ovfl_mask = (1ULL << cfg->counter_width) - 1; -+ -+ ret = pfm_arch_pmu_config_init(cfg); -+ if (ret) -+ goto unlock; -+ -+ ret = pfm_sysfs_add_pmu(pfm_pmu_conf); -+ if (ret) -+ pfm_pmu_conf = NULL; -+ -+unlock: -+ spin_unlock(&pfm_pmu_conf_lock); -+ -+ if (ret) { -+ PFM_INFO("register %s PMU error %d", cfg->pmu_name, ret); -+ } else { -+ PFM_INFO("%s PMU installed", cfg->pmu_name); -+ /* -+ * (re)initialize PMU on each PMU now that we have a description -+ */ -+ on_each_cpu(__pfm_init_percpu, cfg, 0); -+ } -+ return ret; -+} -+EXPORT_SYMBOL(pfm_pmu_register); -+ -+/* -+ * remove PMU description. Caller must pass address of current -+ * configuration. This is mostly for sanity checking as only -+ * one config can exist at any time. -+ * -+ * We are using the module refcount mechanism to protect against -+ * removal while the configuration is being used. As long as there is -+ * one context, a PMU configuration cannot be removed. The protection is -+ * managed in module logic. -+ */ -+void pfm_pmu_unregister(struct pfm_pmu_config *cfg) -+{ -+ if (!(cfg || pfm_pmu_conf)) -+ return; -+ -+ spin_lock(&pfm_pmu_conf_lock); -+ -+ BUG_ON(module_refcount(pfm_pmu_conf->owner)); -+ -+ if (cfg->owner == pfm_pmu_conf->owner) { -+ pfm_sysfs_remove_pmu(pfm_pmu_conf); -+ pfm_pmu_conf = NULL; -+ } -+ -+ spin_unlock(&pfm_pmu_conf_lock); -+} -+EXPORT_SYMBOL(pfm_pmu_unregister); -+ -+static int pfm_pmu_request_module(void) -+{ -+ char *mod_name; -+ int ret; -+ -+ mod_name = pfm_arch_get_pmu_module_name(); -+ if (mod_name == NULL) -+ return -ENOSYS; -+ -+ ret = request_module(mod_name); -+ -+ PFM_DBG("mod=%s ret=%d\n", mod_name, ret); -+ return ret; -+} -+ -+/* -+ * autoload: -+ * 0 : do not try to autoload the PMU description module -+ * not 0 : try to autoload the PMU description module -+ */ -+int pfm_pmu_conf_get(int autoload) -+{ -+ int ret; -+ -+ spin_lock(&pfm_pmu_conf_lock); -+ -+ if (request_mod_in_progress) { -+ ret = -ENOSYS; -+ goto skip; -+ } -+ -+ if (autoload && pfm_pmu_conf == NULL) { -+ -+ request_mod_in_progress = 1; -+ -+ spin_unlock(&pfm_pmu_conf_lock); -+ -+ pfm_pmu_request_module(); -+ -+ spin_lock(&pfm_pmu_conf_lock); -+ -+ request_mod_in_progress = 0; -+ -+ /* -+ * request_module() may succeed but the module -+ * may not have registered properly so we need -+ * to check -+ */ -+ } -+ -+ ret = pfm_pmu_conf == NULL ? -ENOSYS : 0; -+ if (!ret && pmu_is_module(pfm_pmu_conf) -+ && !try_module_get(pfm_pmu_conf->owner)) -+ ret = -ENOSYS; -+ -+skip: -+ spin_unlock(&pfm_pmu_conf_lock); -+ -+ return ret; -+} -+ -+void pfm_pmu_conf_put(void) -+{ -+ if (pfm_pmu_conf == NULL || !pmu_is_module(pfm_pmu_conf)) -+ return; -+ -+ spin_lock(&pfm_pmu_conf_lock); -+ module_put(pfm_pmu_conf->owner); -+ spin_unlock(&pfm_pmu_conf_lock); -+} -+ -+ -+/* -+ * acquire PMU resource from lower-level PMU register allocator -+ * (currently perfctr-watchdog.c) -+ * -+ * acquisition is done when the first context is created (and not -+ * when it is loaded). We grab all that is defined in the description -+ * module and then we make adjustments at the arch-specific level. -+ * -+ * The PMU resource is released when the last perfmon context is -+ * destroyed. -+ * -+ * interrupts are not masked -+ */ -+int pfm_pmu_acquire(struct pfm_context *ctx) -+{ -+ u64 unavail_pmcs[PFM_PMC_BV]; -+ u64 unavail_pmds[PFM_PMD_BV]; -+ int ret = 0; -+ -+ spin_lock(&pfm_pmu_acq_lock); -+ -+ PFM_DBG("pmu_acquired=%u", pfm_pmu_acquired); -+ -+ pfm_pmu_acquired++; -+ -+ /* -+ * we need to initialize regdesc each time we re-acquire -+ * the PMU for the first time as there may have been changes -+ * in the list of available registers, e.g., NMI may have -+ * been disabled. Checking on PMU module insert is not -+ * enough -+ */ -+ if (pfm_pmu_acquired == 1) { -+ memset(unavail_pmcs, 0, sizeof(unavail_pmcs)); -+ memset(unavail_pmds, 0, sizeof(unavail_pmds)); -+ -+ ret = pfm_arch_pmu_acquire(unavail_pmcs, unavail_pmds); -+ if (ret) { -+ pfm_pmu_acquired--; -+ } else { -+ pfm_pmu_regdesc_init_all(unavail_pmcs, unavail_pmds); -+ -+ /* available PMU ressources */ -+ PFM_DBG("PMU acquired: %u PMCs, %u PMDs, %u counters", -+ pfm_pmu_conf->regs_all.num_pmcs, -+ pfm_pmu_conf->regs_all.num_pmds, -+ pfm_pmu_conf->regs_all.num_counters); -+ } -+ } -+ spin_unlock(&pfm_pmu_acq_lock); -+ -+ /* -+ * copy the regdesc that corresponds to the context -+ * we copy and not just point because it helps with -+ * memory locality. the regdesc structure is accessed -+ * very frequently in performance critical code such -+ * as context switch and interrupt handling. By using -+ * a local copy, we increase memory footprint, but -+ * increase chance to have local memory access, -+ * especially for system-wide contexts. -+ */ -+ if (ctx->flags.system) -+ ctx->regs = pfm_pmu_conf->regs_sys; -+ else -+ ctx->regs = pfm_pmu_conf->regs_thr; -+ -+ return ret; -+} -+ -+/* -+ * release the PMU resource -+ * -+ * actual release happens when last context is destroyed -+ * -+ * interrupts are not masked -+ */ -+void pfm_pmu_release(void) -+{ -+ BUG_ON(irqs_disabled()); -+ -+ /* -+ * we need to use a spinlock because release takes some time -+ * and we may have a race with pfm_pmu_acquire() -+ */ -+ spin_lock(&pfm_pmu_acq_lock); -+ -+ PFM_DBG("pmu_acquired=%d", pfm_pmu_acquired); -+ -+ /* -+ * we decouple test and decrement because if we had errors -+ * in pfm_pmu_acquire(), we still come here on pfm_context_free() -+ * but with pfm_pmu_acquire=0 -+ */ -+ if (pfm_pmu_acquired > 0 && --pfm_pmu_acquired == 0) { -+ pfm_arch_pmu_release(); -+ PFM_DBG("PMU released"); -+ } -+ spin_unlock(&pfm_pmu_acq_lock); -+} -diff --git a/perfmon/perfmon_priv.h b/perfmon/perfmon_priv.h -new file mode 100644 -index 0000000..5b485de ---- /dev/null -+++ b/perfmon/perfmon_priv.h -@@ -0,0 +1,182 @@ -+/* -+ * Copyright (c) 2001-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+ -+#ifndef __PERFMON_PRIV_H__ -+#define __PERFMON_PRIV_H__ -+/* -+ * This file contains all the definitions of data structures, variables, macros -+ * that are to private to the generic code, i.e., not shared with any code that -+ * lives under arch/ or include/asm-XX -+ * -+ * For shared definitions, use include/linux/perfmon_kern.h -+ */ -+ -+#ifdef CONFIG_PERFMON -+ -+/* -+ * type of PMD reset for pfm_reset_pmds() or pfm_switch_sets*() -+ */ -+#define PFM_PMD_RESET_SHORT 1 /* use short reset value */ -+#define PFM_PMD_RESET_LONG 2 /* use long reset value */ -+ -+/* -+ * context lazy save/restore activation count -+ */ -+#define PFM_INVALID_ACTIVATION ((u64)~0) -+ -+DECLARE_PER_CPU(u64, pmu_activation_number); -+DECLARE_PER_CPU(struct hrtimer, pfm_hrtimer); -+ -+static inline void pfm_set_pmu_owner(struct task_struct *task, -+ struct pfm_context *ctx) -+{ -+ __get_cpu_var(pmu_owner) = task; -+ __get_cpu_var(pmu_ctx) = ctx; -+} -+ -+static inline int pfm_msgq_is_empty(struct pfm_context *ctx) -+{ -+ return ctx->msgq_head == ctx->msgq_tail; -+} -+ -+void pfm_get_next_msg(struct pfm_context *ctx, union pfarg_msg *m); -+int pfm_end_notify(struct pfm_context *ctx); -+int pfm_ovfl_notify(struct pfm_context *ctx, struct pfm_event_set *set, -+ unsigned long ip); -+ -+int pfm_alloc_fd(struct file **cfile); -+ -+int __pfm_delete_evtsets(struct pfm_context *ctx, void *arg, int count); -+int __pfm_getinfo_evtsets(struct pfm_context *ctx, struct pfarg_setinfo *req, -+ int count); -+int __pfm_create_evtsets(struct pfm_context *ctx, struct pfarg_setdesc *req, -+ int count); -+ -+ -+int pfm_init_ctx(void); -+ -+int pfm_pmu_acquire(struct pfm_context *ctx); -+void pfm_pmu_release(void); -+ -+int pfm_session_acquire(int is_system, u32 cpu); -+void pfm_session_release(int is_system, u32 cpu); -+ -+int pfm_smpl_buf_space_acquire(struct pfm_context *ctx, size_t size); -+int pfm_smpl_buf_load_context(struct pfm_context *ctx); -+void pfm_smpl_buf_unload_context(struct pfm_context *ctx); -+ -+int pfm_init_sysfs(void); -+ -+#ifdef CONFIG_PERFMON_DEBUG_FS -+int pfm_init_debugfs(void); -+int pfm_debugfs_add_cpu(int mycpu); -+void pfm_debugfs_del_cpu(int mycpu); -+#else -+static inline int pfm_init_debugfs(void) -+{ -+ return 0; -+} -+static inline int pfm_debugfs_add_cpu(int mycpu) -+{ -+ return 0; -+} -+ -+static inline void pfm_debugfs_del_cpu(int mycpu) -+{} -+#endif -+ -+ -+void pfm_reset_pmds(struct pfm_context *ctx, struct pfm_event_set *set, -+ int num_pmds, -+ int reset_mode); -+ -+struct pfm_event_set *pfm_prepare_sets(struct pfm_context *ctx, u16 load_set); -+int pfm_init_sets(void); -+ -+ssize_t pfm_sysfs_res_show(char *buf, size_t sz, int what); -+ -+void pfm_free_sets(struct pfm_context *ctx); -+int pfm_create_initial_set(struct pfm_context *ctx); -+void pfm_switch_sets_from_intr(struct pfm_context *ctx); -+void pfm_restart_timer(struct pfm_context *ctx, struct pfm_event_set *set); -+enum hrtimer_restart pfm_handle_switch_timeout(struct hrtimer *t); -+ -+enum hrtimer_restart pfm_switch_sets(struct pfm_context *ctx, -+ struct pfm_event_set *new_set, -+ int reset_mode, -+ int no_restart); -+ -+/** -+ * pfm_save_prev_ctx - check if previous context exists and save state -+ * -+ * called from pfm_load_ctx_thread() and __pfm_ctxsin_thread() to -+ * check if previous context exists. If so saved its PMU state. This is used -+ * only for UP kernels. -+ * -+ * PMU ownership is not cleared because the function is always called while -+ * trying to install a new owner. -+ */ -+static inline void pfm_check_save_prev_ctx(void) -+{ -+#ifdef CONFIG_SMP -+ struct pfm_event_set *set; -+ struct pfm_context *ctxp; -+ -+ ctxp = __get_cpu_var(pmu_ctx); -+ if (!ctxp) -+ return; -+ /* -+ * in UP per-thread, due to lazy save -+ * there could be a context from another -+ * task. We need to push it first before -+ * installing our new state -+ */ -+ set = ctxp->active_set; -+ pfm_save_pmds(ctxp, set); -+ /* -+ * do not clear ownership because we rewrite -+ * right away -+ */ -+#endif -+} -+ -+ -+int pfm_init_fs(void); -+ -+int pfm_init_hotplug(void); -+ -+void pfm_mask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set); -+void pfm_resume_after_ovfl(struct pfm_context *ctx); -+int pfm_setup_smpl_fmt(struct pfm_context *ctx, u32 ctx_flags, void *fmt_arg, -+ struct file *filp); -+ -+static inline void pfm_post_work(struct task_struct *task, -+ struct pfm_context *ctx, int type) -+{ -+ ctx->flags.work_type = type; -+ set_tsk_thread_flag(task, TIF_PERFMON_WORK); -+ pfm_arch_arm_handle_work(task); -+} -+ -+#define PFM_PMC_STK_ARG PFM_ARCH_PMC_STK_ARG -+#define PFM_PMD_STK_ARG PFM_ARCH_PMD_STK_ARG -+ -+#endif /* CONFIG_PERFMON */ -+ -+#endif /* __PERFMON_PRIV_H__ */ -diff --git a/perfmon/perfmon_res.c b/perfmon/perfmon_res.c -new file mode 100644 -index 0000000..7b0382b ---- /dev/null -+++ b/perfmon/perfmon_res.c -@@ -0,0 +1,450 @@ -+/* -+ * perfmon_res.c: perfmon2 resource allocations -+ * -+ * This file implements the perfmon2 interface which -+ * provides access to the hardware performance counters -+ * of the host processor. -+ * -+ * The initial version of perfmon.c was written by -+ * Ganesh Venkitachalam, IBM Corp. -+ * -+ * Then it was modified for perfmon-1.x by Stephane Eranian and -+ * David Mosberger, Hewlett Packard Co. -+ * -+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x -+ * by Stephane Eranian, Hewlett Packard Co. -+ * -+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * David Mosberger-Tang -+ * -+ * More information about perfmon available at: -+ * http://perfmon2.sf.net -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+#include "perfmon_priv.h" -+ -+/* -+ * global information about all sessions -+ * mostly used to synchronize between system wide and per-process -+ */ -+struct pfm_resources { -+ size_t smpl_buf_mem_cur;/* current smpl buf mem usage */ -+ cpumask_t sys_cpumask; /* bitmask of used cpus */ -+ u32 thread_sessions; /* #num loaded per-thread sessions */ -+}; -+ -+static struct pfm_resources pfm_res; -+ -+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pfm_res_lock); -+ -+/** -+ * pfm_smpl_buf_space_acquire - check memory resource usage for sampling buffer -+ * @ctx: context of interest -+ * @size: size fo requested buffer -+ * -+ * sampling buffer allocated by perfmon must be -+ * checked against max locked memory usage thresholds -+ * for security reasons. -+ * -+ * The first level check is against the system wide limit -+ * as indicated by the system administrator in /sys/kernel/perfmon -+ * -+ * The second level check is on a per-process basis using -+ * RLIMIT_MEMLOCK limit. -+ * -+ * Operating on the current task only. -+ */ -+int pfm_smpl_buf_space_acquire(struct pfm_context *ctx, size_t size) -+{ -+ struct mm_struct *mm; -+ unsigned long locked; -+ unsigned long buf_mem, buf_mem_max; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&pfm_res_lock, flags); -+ -+ /* -+ * check against global buffer limit -+ */ -+ buf_mem_max = pfm_controls.smpl_buffer_mem_max; -+ buf_mem = pfm_res.smpl_buf_mem_cur + size; -+ -+ if (buf_mem <= buf_mem_max) { -+ pfm_res.smpl_buf_mem_cur = buf_mem; -+ -+ PFM_DBG("buf_mem_max=%lu current_buf_mem=%lu", -+ buf_mem_max, -+ buf_mem); -+ } -+ -+ spin_unlock_irqrestore(&pfm_res_lock, flags); -+ -+ if (buf_mem > buf_mem_max) { -+ PFM_DBG("smpl buffer memory threshold reached"); -+ return -ENOMEM; -+ } -+ -+ /* -+ * check against per-process RLIMIT_MEMLOCK -+ */ -+ mm = get_task_mm(current); -+ -+ down_write(&mm->mmap_sem); -+ -+ locked = mm->locked_vm << PAGE_SHIFT; -+ locked += size; -+ -+ if (locked > current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) { -+ -+ PFM_DBG("RLIMIT_MEMLOCK reached ask_locked=%lu rlim_cur=%lu", -+ locked, -+ current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur); -+ -+ up_write(&mm->mmap_sem); -+ mmput(mm); -+ goto unres; -+ } -+ -+ mm->locked_vm = locked >> PAGE_SHIFT; -+ -+ up_write(&mm->mmap_sem); -+ -+ mmput(mm); -+ -+ return 0; -+ -+unres: -+ /* -+ * remove global buffer memory allocation -+ */ -+ spin_lock_irqsave(&pfm_res_lock, flags); -+ -+ pfm_res.smpl_buf_mem_cur -= size; -+ -+ spin_unlock_irqrestore(&pfm_res_lock, flags); -+ -+ return -ENOMEM; -+} -+/** -+ * pfm_smpl_buf_space_release - release resource usage for sampling buffer -+ * @ctx: perfmon context of interest -+ * -+ * There exist multiple paths leading to this function. We need to -+ * be very careful withlokcing on the mmap_sem as it may already be -+ * held by the time we come here. -+ * The following paths exist: -+ * -+ * exit path: -+ * sys_exit_group -+ * do_group_exit -+ * do_exit -+ * exit_mm -+ * mmput -+ * exit_mmap -+ * remove_vma -+ * fput -+ * __fput -+ * pfm_close -+ * __pfm_close -+ * pfm_context_free -+ * pfm_release_buf_space -+ * munmap path: -+ * sys_munmap -+ * do_munmap -+ * remove_vma -+ * fput -+ * __fput -+ * pfm_close -+ * __pfm_close -+ * pfm_context_free -+ * pfm_release_buf_space -+ * -+ * close path: -+ * sys_close -+ * filp_close -+ * fput -+ * __fput -+ * pfm_close -+ * __pfm_close -+ * pfm_context_free -+ * pfm_release_buf_space -+ * -+ * The issue is that on the munmap() path, the mmap_sem is already held -+ * in write-mode by the time we come here. To avoid the deadlock, we need -+ * to know where we are coming from and skip down_write(). If is fairly -+ * difficult to know this because of the lack of good hooks and -+ * the fact that, there may not have been any mmap() of the sampling buffer -+ * (i.e. create_context() followed by close() or exit()). -+ * -+ * We use a set flag ctx->flags.mmap_nlock which is toggled in the vm_ops -+ * callback in remove_vma() which is called systematically for the call, so -+ * on all but the pure close() path. The exit path does not already hold -+ * the lock but this is exit so there is no task->mm by the time we come here. -+ * -+ * The mmap_nlock is set only when unmapping and this is the LAST reference -+ * to the file (i.e., close() followed by munmap()). -+ */ -+void pfm_smpl_buf_space_release(struct pfm_context *ctx, size_t size) -+{ -+ unsigned long flags; -+ struct mm_struct *mm; -+ -+ mm = get_task_mm(current); -+ if (mm) { -+ if (ctx->flags.mmap_nlock == 0) { -+ PFM_DBG("doing down_write"); -+ down_write(&mm->mmap_sem); -+ } -+ -+ mm->locked_vm -= size >> PAGE_SHIFT; -+ -+ PFM_DBG("size=%zu locked_vm=%lu", size, mm->locked_vm); -+ -+ if (ctx->flags.mmap_nlock == 0) -+ up_write(&mm->mmap_sem); -+ -+ mmput(mm); -+ } -+ -+ spin_lock_irqsave(&pfm_res_lock, flags); -+ -+ pfm_res.smpl_buf_mem_cur -= size; -+ -+ spin_unlock_irqrestore(&pfm_res_lock, flags); -+} -+ -+/** -+ * pfm_session_acquire - reserve a per-thread or per-cpu session -+ * @is_system: true if per-cpu session -+ * @cpu: cpu number for per-cpu session -+ * -+ * return: -+ * 0 : success -+ * -EBUSY: if conflicting session exist -+ */ -+int pfm_session_acquire(int is_system, u32 cpu) -+{ -+ unsigned long flags; -+ u32 nsys_cpus; -+ int ret = 0; -+ -+ /* -+ * validy checks on cpu_mask have been done upstream -+ */ -+ spin_lock_irqsave(&pfm_res_lock, flags); -+ -+ nsys_cpus = cpus_weight(pfm_res.sys_cpumask); -+ -+ PFM_DBG("in sys=%u task=%u is_sys=%d cpu=%u", -+ nsys_cpus, -+ pfm_res.thread_sessions, -+ is_system, -+ cpu); -+ -+ if (is_system) { -+ /* -+ * cannot mix system wide and per-task sessions -+ */ -+ if (pfm_res.thread_sessions > 0) { -+ PFM_DBG("%u conflicting thread_sessions", -+ pfm_res.thread_sessions); -+ ret = -EBUSY; -+ goto abort; -+ } -+ -+ if (cpu_isset(cpu, pfm_res.sys_cpumask)) { -+ PFM_DBG("conflicting session on CPU%u", cpu); -+ ret = -EBUSY; -+ goto abort; -+ } -+ -+ PFM_DBG("reserved session on CPU%u", cpu); -+ -+ cpu_set(cpu, pfm_res.sys_cpumask); -+ nsys_cpus++; -+ } else { -+ if (nsys_cpus) { -+ ret = -EBUSY; -+ goto abort; -+ } -+ pfm_res.thread_sessions++; -+ } -+ -+ PFM_DBG("out sys=%u task=%u is_sys=%d cpu=%u", -+ nsys_cpus, -+ pfm_res.thread_sessions, -+ is_system, -+ cpu); -+ -+abort: -+ spin_unlock_irqrestore(&pfm_res_lock, flags); -+ -+ return ret; -+} -+ -+/** -+ * pfm_session_release - release a per-cpu or per-thread session -+ * @is_system: true if per-cpu session -+ * @cpu: cpu number for per-cpu session -+ * -+ * called from __pfm_unload_context() -+ */ -+void pfm_session_release(int is_system, u32 cpu) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&pfm_res_lock, flags); -+ -+ PFM_DBG("in sys_sessions=%u thread_sessions=%u syswide=%d cpu=%u", -+ cpus_weight(pfm_res.sys_cpumask), -+ pfm_res.thread_sessions, -+ is_system, cpu); -+ -+ if (is_system) -+ cpu_clear(cpu, pfm_res.sys_cpumask); -+ else -+ pfm_res.thread_sessions--; -+ -+ PFM_DBG("out sys_sessions=%u thread_sessions=%u syswide=%d cpu=%u", -+ cpus_weight(pfm_res.sys_cpumask), -+ pfm_res.thread_sessions, -+ is_system, cpu); -+ -+ spin_unlock_irqrestore(&pfm_res_lock, flags); -+} -+ -+/** -+ * pfm_session_allcpus_acquire - acquire per-cpu sessions on all available cpus -+ * -+ * currently used by Oprofile on X86 -+ */ -+int pfm_session_allcpus_acquire(void) -+{ -+ unsigned long flags; -+ u32 nsys_cpus, cpu; -+ int ret = -EBUSY; -+ -+ spin_lock_irqsave(&pfm_res_lock, flags); -+ -+ nsys_cpus = cpus_weight(pfm_res.sys_cpumask); -+ -+ PFM_DBG("in sys=%u task=%u", -+ nsys_cpus, -+ pfm_res.thread_sessions); -+ -+ if (nsys_cpus) { -+ PFM_DBG("already some system-wide sessions"); -+ goto abort; -+ } -+ -+ /* -+ * cannot mix system wide and per-task sessions -+ */ -+ if (pfm_res.thread_sessions) { -+ PFM_DBG("%u conflicting thread_sessions", -+ pfm_res.thread_sessions); -+ goto abort; -+ } -+ -+ for_each_online_cpu(cpu) { -+ cpu_set(cpu, pfm_res.sys_cpumask); -+ nsys_cpus++; -+ } -+ -+ PFM_DBG("out sys=%u task=%u", -+ nsys_cpus, -+ pfm_res.thread_sessions); -+ -+ ret = 0; -+abort: -+ spin_unlock_irqrestore(&pfm_res_lock, flags); -+ -+ return ret; -+} -+EXPORT_SYMBOL(pfm_session_allcpus_acquire); -+ -+/** -+ * pfm_session_allcpus_release - relase per-cpu sessions on all cpus -+ * -+ * currently used by Oprofile code -+ */ -+void pfm_session_allcpus_release(void) -+{ -+ unsigned long flags; -+ u32 nsys_cpus, cpu; -+ -+ spin_lock_irqsave(&pfm_res_lock, flags); -+ -+ nsys_cpus = cpus_weight(pfm_res.sys_cpumask); -+ -+ PFM_DBG("in sys=%u task=%u", -+ nsys_cpus, -+ pfm_res.thread_sessions); -+ -+ /* -+ * XXX: could use __cpus_clear() with nbits -+ */ -+ for_each_online_cpu(cpu) { -+ cpu_clear(cpu, pfm_res.sys_cpumask); -+ nsys_cpus--; -+ } -+ -+ PFM_DBG("out sys=%u task=%u", -+ nsys_cpus, -+ pfm_res.thread_sessions); -+ -+ spin_unlock_irqrestore(&pfm_res_lock, flags); -+} -+EXPORT_SYMBOL(pfm_session_allcpus_release); -+ -+/** -+ * pfm_sysfs_res_show - return currnt resourcde usage for sysfs -+ * @buf: buffer to hold string in return -+ * @sz: size of buf -+ * @what: what to produce -+ * what=0 : thread_sessions -+ * what=1 : cpus_weight(sys_cpumask) -+ * what=2 : smpl_buf_mem_cur -+ * what=3 : pmu model name -+ * -+ * called from perfmon_sysfs.c -+ * return number of bytes written into buf (up to sz) -+ */ -+ssize_t pfm_sysfs_res_show(char *buf, size_t sz, int what) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&pfm_res_lock, flags); -+ -+ switch (what) { -+ case 0: snprintf(buf, sz, "%u\n", pfm_res.thread_sessions); -+ break; -+ case 1: snprintf(buf, sz, "%d\n", cpus_weight(pfm_res.sys_cpumask)); -+ break; -+ case 2: snprintf(buf, sz, "%zu\n", pfm_res.smpl_buf_mem_cur); -+ break; -+ case 3: -+ snprintf(buf, sz, "%s\n", -+ pfm_pmu_conf ? pfm_pmu_conf->pmu_name -+ : "unknown\n"); -+ } -+ spin_unlock_irqrestore(&pfm_res_lock, flags); -+ return strlen(buf); -+} -diff --git a/perfmon/perfmon_rw.c b/perfmon/perfmon_rw.c -new file mode 100644 -index 0000000..3168eb7 ---- /dev/null -+++ b/perfmon/perfmon_rw.c -@@ -0,0 +1,733 @@ -+/* -+ * perfmon.c: perfmon2 PMC/PMD read/write system calls -+ * -+ * This file implements the perfmon2 interface which -+ * provides access to the hardware performance counters -+ * of the host processor. -+ * -+ * The initial version of perfmon.c was written by -+ * Ganesh Venkitachalam, IBM Corp. -+ * -+ * Then it was modified for perfmon-1.x by Stephane Eranian and -+ * David Mosberger, Hewlett Packard Co. -+ * -+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x -+ * by Stephane Eranian, Hewlett Packard Co. -+ * -+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * David Mosberger-Tang -+ * -+ * More information about perfmon available at: -+ * http://perfmon2.sf.net/ -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+#include "perfmon_priv.h" -+ -+#define PFM_REGFL_PMC_ALL (PFM_REGFL_NO_EMUL64) -+#define PFM_REGFL_PMD_ALL (PFM_REGFL_RANDOM|PFM_REGFL_OVFL_NOTIFY) -+ -+/** -+ * update_used_reg -- updated used_pmcs for a single PMD -+ * @set: set to update -+ * @cnum: new PMD to add -+ * -+ * This function adds the pmds and pmcs depending on PMD cnum -+ */ -+static inline void update_used_reg(struct pfm_context *ctx, -+ struct pfm_event_set *set, u16 cnum) -+{ -+ bitmap_or(cast_ulp(set->used_pmcs), -+ cast_ulp(set->used_pmcs), -+ cast_ulp(pfm_pmu_conf->pmd_desc[cnum].dep_pmcs), -+ ctx->regs.max_pmc); -+} -+ -+/** -+ * update_used -- update used_pmcs bitmask -+ * @set: event set to update -+ * @bv: bitmask to inspect for new PMD registers -+ * -+ * This function updates the used_pmcs bitmask for -+ * the set using bv, a bitmask of pmds. For each pmd in bv, -+ * its depending pmcs are added to used_pmcs. -+ */ -+static void update_used_pmcs(struct pfm_context *ctx, -+ struct pfm_event_set *set, unsigned long *bv) -+{ -+ u16 max_pmd; -+ int n, p, q; -+ -+ max_pmd = ctx->regs.max_pmd; -+ -+ n = bitmap_weight(bv, max_pmd); -+ for(p = 0; n; n--, p = q+1) { -+ q = find_next_bit(bv, max_pmd, p); -+ update_used_reg(ctx, set, q); -+ } -+} -+ -+/** -+ * update_changes -- update nused_pmcs, nused_pmds, write newly touched pmcs -+ * @ctx: context to use -+ * @set: event set to use -+ * @old_used_pmcs: former used_pmc bitmask -+ * @can_access: non-zero if PMU is accessible, i.e., can be written to -+ * -+ * This function updates nused_pmcs and nused_pmds after the last modificiation -+ * to an event set. When new pmcs are used, then they must be initialized such -+ * that we do not pick up stale values from another session. -+ */ -+static inline int update_changes(struct pfm_context *ctx, struct pfm_event_set *set, -+ unsigned long *old_used_pmcs) -+{ -+ struct pfarg_pmc req; -+ u16 max_pmc, max_pmd; -+ int n, p, q, ret = 0; -+ -+ max_pmd = ctx->regs.max_pmd; -+ max_pmc = ctx->regs.max_pmc; -+ -+ /* -+ * update used counts -+ */ -+ set->nused_pmds = bitmap_weight(cast_ulp(set->used_pmds), max_pmd); -+ set->nused_pmcs = bitmap_weight(cast_ulp(set->used_pmcs), max_pmc); -+ -+ PFM_DBG("set%u u_pmds=0x%llx nu_pmds=%u u_pmcs=0x%llx nu_pmcs=%u", -+ set->id, -+ (unsigned long long)set->used_pmds[0], -+ set->nused_pmds, -+ (unsigned long long)set->used_pmcs[0], -+ set->nused_pmcs); -+ -+ memset(&req, 0, sizeof(req)); -+ -+ n = bitmap_weight(cast_ulp(set->used_pmcs), max_pmc); -+ for(p = 0; n; n--, p = q+1) { -+ q = find_next_bit(cast_ulp(set->used_pmcs), max_pmc, p); -+ -+ if (test_bit(q, cast_ulp(old_used_pmcs))) -+ continue; -+ -+ req.reg_num = q; -+ req.reg_value = set->pmcs[q]; -+ -+ ret = __pfm_write_pmcs(ctx, &req, 1); -+ if (ret) -+ break; -+ } -+ return ret; -+} -+ -+/** -+ * handle_smpl_bv - checks sampling bitmasks for new PMDs -+ * @ctx: context to use -+ * @set: set to use -+ * @bv: sampling bitmask -+ * -+ * scans the smpl bitmask looking for new PMDs (not yet used), if found -+ * invoke pfm_write_pmds() on them to get them initialized and marked used -+ */ -+static int handle_smpl_bv(struct pfm_context *ctx, struct pfm_event_set *set, -+ unsigned long *bv) -+{ -+ struct pfarg_pmd req; -+ int p, q, n, ret = 0; -+ u16 max_pmd; -+ -+ memset(&req, 0, sizeof(req)); -+ -+ max_pmd = ctx->regs.max_pmd; -+ -+ n = bitmap_weight(cast_ulp(bv), max_pmd); -+ -+ for(p = 0; n; n--, p = q+1) { -+ q = find_next_bit(cast_ulp(bv), max_pmd, p); -+ -+ if (test_bit(q, cast_ulp(set->used_pmds))) -+ continue; -+ -+ req.reg_num = q; -+ req.reg_value = 0; -+ -+ ret = __pfm_write_pmds(ctx, &req, 1, 0); -+ if (ret) -+ break; -+ } -+ return ret; -+} -+ -+/** -+ * is_invalid -- check if register index is within limits -+ * @cnum: register index -+ * @impl: bitmask of implemented registers -+ * @max: highest implemented registers + 1 -+ * -+ * return: -+ * 0 is register index is valid -+ * 1 if invalid -+ */ -+static inline int is_invalid(u16 cnum, unsigned long *impl, u16 max) -+{ -+ return cnum >= max || !test_bit(cnum, impl); -+} -+ -+/** -+ * __pfm_write_pmds - modified data registers -+ * @ctx: context to operate on -+ * @req: pfarg_pmd_t request from user -+ * @count: number of element in the pfarg_pmd_t vector -+ * @compat: used only on IA-64 to maintain backward compatibility with v2.0 -+ * -+ * The function succeeds whether the context is attached or not. -+ * When attached to another thread, that thread must be stopped. -+ * -+ * The context is locked and interrupts are disabled. -+ */ -+int __pfm_write_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count, -+ int compat) -+{ -+ struct pfm_event_set *set, *active_set; -+ u64 old_used_pmcs[PFM_PMC_BV]; -+ unsigned long *smpl_pmds, *reset_pmds, *impl_pmds, *impl_rw_pmds; -+ u32 req_flags, flags; -+ u16 cnum, pmd_type, max_pmd; -+ u16 set_id; -+ int i, can_access_pmu; -+ int ret; -+ pfm_pmd_check_t wr_func; -+ -+ active_set = ctx->active_set; -+ max_pmd = ctx->regs.max_pmd; -+ impl_pmds = cast_ulp(ctx->regs.pmds); -+ impl_rw_pmds = cast_ulp(ctx->regs.rw_pmds); -+ wr_func = pfm_pmu_conf->pmd_write_check; -+ set = list_first_entry(&ctx->set_list, struct pfm_event_set, list); -+ -+ can_access_pmu = 0; -+ -+ /* -+ * we cannot access the actual PMD registers when monitoring is masked -+ */ -+ if (unlikely(ctx->state == PFM_CTX_LOADED)) -+ can_access_pmu = __get_cpu_var(pmu_owner) == ctx->task -+ || ctx->flags.system; -+ -+ bitmap_copy(cast_ulp(old_used_pmcs), -+ cast_ulp(set->used_pmcs), -+ ctx->regs.max_pmc); -+ -+ ret = -EINVAL; -+ for (i = 0; i < count; i++, req++) { -+ -+ cnum = req->reg_num; -+ set_id = req->reg_set; -+ req_flags = req->reg_flags; -+ smpl_pmds = cast_ulp(req->reg_smpl_pmds); -+ reset_pmds = cast_ulp(req->reg_reset_pmds); -+ flags = 0; -+ -+ /* -+ * cannot write to unexisting -+ * writes to read-only register are ignored -+ */ -+ if (unlikely(is_invalid(cnum, impl_pmds, max_pmd))) { -+ PFM_DBG("pmd%u is not available", cnum); -+ goto error; -+ } -+ -+ pmd_type = pfm_pmu_conf->pmd_desc[cnum].type; -+ -+ /* -+ * ensure only valid flags are set -+ */ -+ if (req_flags & ~(PFM_REGFL_PMD_ALL)) { -+ PFM_DBG("pmd%u: invalid flags=0x%x", -+ cnum, req_flags); -+ goto error; -+ } -+ -+ /* -+ * OVFL_NOTIFY is valid for all types of PMD. -+ * non counting PMD may trigger PMU interrupt -+ * and thus may trigger recording of a sample. -+ * This is true with IBS on AMD family 16. -+ */ -+ if (req_flags & PFM_REGFL_OVFL_NOTIFY) -+ flags |= PFM_REGFL_OVFL_NOTIFY; -+ -+ /* -+ * We allow randomization to non counting PMD -+ */ -+ if (req_flags & PFM_REGFL_RANDOM) -+ flags |= PFM_REGFL_RANDOM; -+ -+ /* -+ * verify validity of smpl_pmds -+ */ -+ if (unlikely(!bitmap_subset(smpl_pmds, impl_pmds, PFM_MAX_PMDS))) { -+ PFM_DBG("invalid smpl_pmds=0x%llx for pmd%u", -+ (unsigned long long)req->reg_smpl_pmds[0], -+ cnum); -+ goto error; -+ } -+ -+ /* -+ * verify validity of reset_pmds -+ * check against impl_rw_pmds because it is not -+ * possible to reset read-only PMDs -+ */ -+ if (unlikely(!bitmap_subset(reset_pmds, impl_rw_pmds, PFM_MAX_PMDS))) { -+ PFM_DBG("invalid reset_pmds=0x%llx for pmd%u", -+ (unsigned long long)req->reg_reset_pmds[0], -+ cnum); -+ goto error; -+ } -+ -+ /* -+ * locate event set -+ */ -+ if (set_id != set->id) { -+ /* update number of used register for previous set */ -+ if (i) { -+ ret = update_changes(ctx, set, cast_ulp(old_used_pmcs)); -+ if (ret) -+ goto error; -+ } -+ -+ set = pfm_find_set(ctx, set_id, 0); -+ if (set == NULL) { -+ PFM_DBG("event set%u does not exist", -+ set_id); -+ goto error; -+ } -+ bitmap_copy(cast_ulp(old_used_pmcs), -+ cast_ulp(set->used_pmcs), -+ ctx->regs.max_pmc); -+ } -+ -+ /* -+ * execute write checker, if any -+ */ -+ if (unlikely(wr_func && (pmd_type & PFM_REG_WC))) { -+ ret = (*wr_func)(ctx, set, req); -+ if (ret) -+ goto error; -+ -+ } -+ -+ -+ /* -+ * now commit changes to software state -+ */ -+ -+ if (unlikely(compat)) -+ goto skip_set; -+ -+ if (bitmap_weight(smpl_pmds, max_pmd)) { -+ ret = handle_smpl_bv(ctx, set, smpl_pmds); -+ if (ret) -+ goto error; -+ update_used_pmcs(ctx, set, cast_ulp(smpl_pmds)); -+ } -+ -+ bitmap_copy(cast_ulp(set->pmds[cnum].smpl_pmds), -+ smpl_pmds, -+ max_pmd); -+ -+ -+ if (bitmap_weight(reset_pmds, max_pmd)) { -+ ret = handle_smpl_bv(ctx, set, reset_pmds); -+ if (ret) -+ goto error; -+ update_used_pmcs(ctx, set, cast_ulp(reset_pmds)); -+ } -+ -+ bitmap_copy(cast_ulp(set->pmds[cnum].reset_pmds), -+ reset_pmds, -+ max_pmd); -+ -+ set->pmds[cnum].flags = flags; -+ -+ __set_bit(cnum, cast_ulp(set->used_pmds)); -+ update_used_reg(ctx, set, cnum); -+ -+ /* -+ * we reprogram the PMD hence, we clear any pending -+ * ovfl. Does affect ovfl switch on restart but new -+ * value has already been established here -+ */ -+ if (test_bit(cnum, cast_ulp(set->povfl_pmds))) { -+ set->npend_ovfls--; -+ __clear_bit(cnum, cast_ulp(set->povfl_pmds)); -+ } -+ __clear_bit(cnum, cast_ulp(set->ovfl_pmds)); -+ -+ /* -+ * update ovfl_notify -+ */ -+ if (flags & PFM_REGFL_OVFL_NOTIFY) -+ __set_bit(cnum, cast_ulp(set->ovfl_notify)); -+ else -+ __clear_bit(cnum, cast_ulp(set->ovfl_notify)); -+ -+ /* -+ * establish new switch count -+ */ -+ set->pmds[cnum].ovflsw_thres = req->reg_ovfl_switch_cnt; -+ set->pmds[cnum].ovflsw_ref_thres = req->reg_ovfl_switch_cnt; -+skip_set: -+ -+ /* -+ * set last value to new value for all types of PMD -+ */ -+ set->pmds[cnum].lval = req->reg_value; -+ set->pmds[cnum].value = req->reg_value; -+ -+ /* -+ * update reset values (not just for counters) -+ */ -+ set->pmds[cnum].long_reset = req->reg_long_reset; -+ set->pmds[cnum].short_reset = req->reg_short_reset; -+ -+ /* -+ * update randomization mask -+ */ -+ set->pmds[cnum].mask = req->reg_random_mask; -+ -+ set->pmds[cnum].eventid = req->reg_smpl_eventid; -+ -+ if (set == active_set) { -+ set->priv_flags |= PFM_SETFL_PRIV_MOD_PMDS; -+ if (can_access_pmu) -+ pfm_write_pmd(ctx, cnum, req->reg_value); -+ } -+ -+ -+ PFM_DBG("set%u pmd%u=0x%llx flags=0x%x a_pmu=%d " -+ "ctx_pmd=0x%llx s_reset=0x%llx " -+ "l_reset=0x%llx s_pmds=0x%llx " -+ "r_pmds=0x%llx o_pmds=0x%llx " -+ "o_thres=%llu compat=%d eventid=%llx", -+ set->id, -+ cnum, -+ (unsigned long long)req->reg_value, -+ set->pmds[cnum].flags, -+ can_access_pmu, -+ (unsigned long long)set->pmds[cnum].value, -+ (unsigned long long)set->pmds[cnum].short_reset, -+ (unsigned long long)set->pmds[cnum].long_reset, -+ (unsigned long long)set->pmds[cnum].smpl_pmds[0], -+ (unsigned long long)set->pmds[cnum].reset_pmds[0], -+ (unsigned long long)set->ovfl_pmds[0], -+ (unsigned long long)set->pmds[cnum].ovflsw_thres, -+ compat, -+ (unsigned long long)set->pmds[cnum].eventid); -+ } -+ ret = 0; -+ -+error: -+ update_changes(ctx, set, cast_ulp(old_used_pmcs)); -+ -+ /* -+ * make changes visible -+ */ -+ if (can_access_pmu) -+ pfm_arch_serialize(); -+ -+ return ret; -+} -+ -+/** -+ * __pfm_write_pmcs - modified config registers -+ * @ctx: context to operate on -+ * @req: pfarg_pmc_t request from user -+ * @count: number of element in the pfarg_pmc_t vector -+ * -+ * -+ * The function succeeds whether the context is * attached or not. -+ * When attached to another thread, that thread must be stopped. -+ * -+ * The context is locked and interrupts are disabled. -+ */ -+int __pfm_write_pmcs(struct pfm_context *ctx, struct pfarg_pmc *req, int count) -+{ -+ struct pfm_event_set *set, *active_set; -+ u64 value, dfl_val, rsvd_msk; -+ unsigned long *impl_pmcs; -+ int i, can_access_pmu; -+ int ret; -+ u16 set_id; -+ u16 cnum, pmc_type, max_pmc; -+ u32 flags, expert; -+ pfm_pmc_check_t wr_func; -+ -+ active_set = ctx->active_set; -+ -+ wr_func = pfm_pmu_conf->pmc_write_check; -+ max_pmc = ctx->regs.max_pmc; -+ impl_pmcs = cast_ulp(ctx->regs.pmcs); -+ set = list_first_entry(&ctx->set_list, struct pfm_event_set, list); -+ -+ expert = pfm_controls.flags & PFM_CTRL_FL_RW_EXPERT; -+ -+ can_access_pmu = 0; -+ -+ /* -+ * we cannot access the actual PMC registers when monitoring is masked -+ */ -+ if (unlikely(ctx->state == PFM_CTX_LOADED)) -+ can_access_pmu = __get_cpu_var(pmu_owner) == ctx->task -+ || ctx->flags.system; -+ -+ ret = -EINVAL; -+ -+ for (i = 0; i < count; i++, req++) { -+ -+ cnum = req->reg_num; -+ set_id = req->reg_set; -+ value = req->reg_value; -+ flags = req->reg_flags; -+ -+ /* -+ * no access to unavailable PMC register -+ */ -+ if (unlikely(is_invalid(cnum, impl_pmcs, max_pmc))) { -+ PFM_DBG("pmc%u is not available", cnum); -+ goto error; -+ } -+ -+ pmc_type = pfm_pmu_conf->pmc_desc[cnum].type; -+ dfl_val = pfm_pmu_conf->pmc_desc[cnum].dfl_val; -+ rsvd_msk = pfm_pmu_conf->pmc_desc[cnum].rsvd_msk; -+ -+ /* -+ * ensure only valid flags are set -+ */ -+ if (flags & ~PFM_REGFL_PMC_ALL) { -+ PFM_DBG("pmc%u: invalid flags=0x%x", cnum, flags); -+ goto error; -+ } -+ -+ /* -+ * locate event set -+ */ -+ if (set_id != set->id) { -+ set = pfm_find_set(ctx, set_id, 0); -+ if (set == NULL) { -+ PFM_DBG("event set%u does not exist", -+ set_id); -+ goto error; -+ } -+ } -+ -+ /* -+ * set reserved bits to default values -+ * (reserved bits must be 1 in rsvd_msk) -+ * -+ * bypass via /sys/kernel/perfmon/mode = 1 -+ */ -+ if (likely(!expert)) -+ value = (value & ~rsvd_msk) | (dfl_val & rsvd_msk); -+ -+ if (flags & PFM_REGFL_NO_EMUL64) { -+ if (!(pmc_type & PFM_REG_NO64)) { -+ PFM_DBG("pmc%u no support for " -+ "PFM_REGFL_NO_EMUL64", cnum); -+ goto error; -+ } -+ value &= ~pfm_pmu_conf->pmc_desc[cnum].no_emul64_msk; -+ } -+ -+ /* -+ * execute write checker, if any -+ */ -+ if (likely(wr_func && (pmc_type & PFM_REG_WC))) { -+ req->reg_value = value; -+ ret = (*wr_func)(ctx, set, req); -+ if (ret) -+ goto error; -+ value = req->reg_value; -+ } -+ -+ /* -+ * Now we commit the changes -+ */ -+ -+ /* -+ * mark PMC register as used -+ * We do not track associated PMC register based on -+ * the fact that they will likely need to be written -+ * in order to become useful at which point the statement -+ * below will catch that. -+ * -+ * The used_pmcs bitmask is only useful on architectures where -+ * the PMC needs to be modified for particular bits, especially -+ * on overflow or to stop/start. -+ */ -+ if (!test_bit(cnum, cast_ulp(set->used_pmcs))) { -+ __set_bit(cnum, cast_ulp(set->used_pmcs)); -+ set->nused_pmcs++; -+ } -+ -+ set->pmcs[cnum] = value; -+ -+ if (set == active_set) { -+ set->priv_flags |= PFM_SETFL_PRIV_MOD_PMCS; -+ if (can_access_pmu) -+ pfm_arch_write_pmc(ctx, cnum, value); -+ } -+ -+ PFM_DBG("set%u pmc%u=0x%llx a_pmu=%d " -+ "u_pmcs=0x%llx nu_pmcs=%u", -+ set->id, -+ cnum, -+ (unsigned long long)value, -+ can_access_pmu, -+ (unsigned long long)set->used_pmcs[0], -+ set->nused_pmcs); -+ } -+ ret = 0; -+error: -+ /* -+ * make sure the changes are visible -+ */ -+ if (can_access_pmu) -+ pfm_arch_serialize(); -+ -+ return ret; -+} -+ -+/** -+ * __pfm_read_pmds - read data registers -+ * @ctx: context to operate on -+ * @req: pfarg_pmd_t request from user -+ * @count: number of element in the pfarg_pmd_t vector -+ * -+ * -+ * The function succeeds whether the context is attached or not. -+ * When attached to another thread, that thread must be stopped. -+ * -+ * The context is locked and interrupts are disabled. -+ */ -+int __pfm_read_pmds(struct pfm_context *ctx, struct pfarg_pmd *req, int count) -+{ -+ u64 val = 0, lval, ovfl_mask, hw_val; -+ u64 sw_cnt; -+ unsigned long *impl_pmds; -+ struct pfm_event_set *set, *active_set; -+ int i, ret, can_access_pmu = 0; -+ u16 cnum, pmd_type, set_id, max_pmd; -+ -+ ovfl_mask = pfm_pmu_conf->ovfl_mask; -+ impl_pmds = cast_ulp(ctx->regs.pmds); -+ max_pmd = ctx->regs.max_pmd; -+ active_set = ctx->active_set; -+ set = list_first_entry(&ctx->set_list, struct pfm_event_set, list); -+ -+ if (likely(ctx->state == PFM_CTX_LOADED)) { -+ can_access_pmu = __get_cpu_var(pmu_owner) == ctx->task -+ || ctx->flags.system; -+ -+ if (can_access_pmu) -+ pfm_arch_serialize(); -+ } -+ -+ /* -+ * on both UP and SMP, we can only read the PMD from the hardware -+ * register when the task is the owner of the local PMU. -+ */ -+ ret = -EINVAL; -+ for (i = 0; i < count; i++, req++) { -+ -+ cnum = req->reg_num; -+ set_id = req->reg_set; -+ -+ if (unlikely(is_invalid(cnum, impl_pmds, max_pmd))) { -+ PFM_DBG("pmd%u is not implemented/unaccessible", cnum); -+ goto error; -+ } -+ -+ pmd_type = pfm_pmu_conf->pmd_desc[cnum].type; -+ -+ /* -+ * locate event set -+ */ -+ if (set_id != set->id) { -+ set = pfm_find_set(ctx, set_id, 0); -+ if (set == NULL) { -+ PFM_DBG("event set%u does not exist", -+ set_id); -+ goto error; -+ } -+ } -+ /* -+ * it is not possible to read a PMD which was not requested: -+ * - explicitly written via pfm_write_pmds() -+ * - provided as a reg_smpl_pmds[] to another PMD during -+ * pfm_write_pmds() -+ * -+ * This is motivated by security and for optimization purposes: -+ * - on context switch restore, we can restore only what -+ * we use (except when regs directly readable at user -+ * level, e.g., IA-64 self-monitoring, I386 RDPMC). -+ * - do not need to maintain PMC -> PMD dependencies -+ */ -+ if (unlikely(!test_bit(cnum, cast_ulp(set->used_pmds)))) { -+ PFM_DBG("pmd%u cannot read, because not used", cnum); -+ goto error; -+ } -+ -+ val = set->pmds[cnum].value; -+ lval = set->pmds[cnum].lval; -+ -+ /* -+ * extract remaining ovfl to switch -+ */ -+ sw_cnt = set->pmds[cnum].ovflsw_thres; -+ -+ /* -+ * If the task is not the current one, then we check if the -+ * PMU state is still in the local live register due to lazy -+ * ctxsw. If true, then we read directly from the registers. -+ */ -+ if (set == active_set && can_access_pmu) { -+ hw_val = pfm_read_pmd(ctx, cnum); -+ if (pmd_type & PFM_REG_C64) -+ val = (val & ~ovfl_mask) | (hw_val & ovfl_mask); -+ else -+ val = hw_val; -+ } -+ -+ PFM_DBG("set%u pmd%u=0x%llx sw_thr=%llu lval=0x%llx", -+ set->id, -+ cnum, -+ (unsigned long long)val, -+ (unsigned long long)sw_cnt, -+ (unsigned long long)lval); -+ -+ req->reg_value = val; -+ req->reg_last_reset_val = lval; -+ req->reg_ovfl_switch_cnt = sw_cnt; -+ } -+ ret = 0; -+error: -+ return ret; -+} -diff --git a/perfmon/perfmon_sets.c b/perfmon/perfmon_sets.c -new file mode 100644 -index 0000000..24534cb ---- /dev/null -+++ b/perfmon/perfmon_sets.c -@@ -0,0 +1,873 @@ -+/* -+ * perfmon_sets.c: perfmon2 event sets and multiplexing functions -+ * -+ * This file implements the perfmon2 interface which -+ * provides access to the hardware performance counters -+ * of the host processor. -+ * -+ * The initial version of perfmon.c was written by -+ * Ganesh Venkitachalam, IBM Corp. -+ * -+ * Then it was modified for perfmon-1.x by Stephane Eranian and -+ * David Mosberger, Hewlett Packard Co. -+ * -+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x -+ * by Stephane Eranian, Hewlett Packard Co. -+ * -+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * David Mosberger-Tang -+ * -+ * More information about perfmon available at: -+ * http://perfmon2.sf.net -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include "perfmon_priv.h" -+ -+static struct kmem_cache *pfm_set_cachep; -+ -+/** -+ * pfm_reload_switch_thresholds - reload overflow-based switch thresholds per set -+ * @set: the set for which to reload thresholds -+ * -+ */ -+static void pfm_reload_switch_thresholds(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ u64 *used_pmds; -+ u16 i, max, first; -+ -+ used_pmds = set->used_pmds; -+ first = ctx->regs.first_intr_pmd; -+ max = ctx->regs.max_intr_pmd; -+ -+ for (i = first; i < max; i++) { -+ if (test_bit(i, cast_ulp(used_pmds))) { -+ set->pmds[i].ovflsw_thres = set->pmds[i].ovflsw_ref_thres; -+ -+ PFM_DBG("set%u pmd%u ovflsw_thres=%llu", -+ set->id, -+ i, -+ (unsigned long long)set->pmds[i].ovflsw_thres); -+ } -+ } -+} -+ -+/** -+ * pfm_prepare_sets - initialize sets on pfm_load_context -+ * @ctx : context to operate on -+ * @load_set: set to activate first -+ * -+ * connect all sets, reset internal fields -+ */ -+struct pfm_event_set *pfm_prepare_sets(struct pfm_context *ctx, u16 load_set) -+{ -+ struct pfm_event_set *set, *p; -+ u16 max; -+ -+ /* -+ * locate first set to activate -+ */ -+ set = pfm_find_set(ctx, load_set, 0); -+ if (!set) -+ return NULL; -+ -+ if (set->flags & PFM_SETFL_OVFL_SWITCH) -+ pfm_reload_switch_thresholds(ctx, set); -+ -+ max = ctx->regs.max_intr_pmd; -+ -+ list_for_each_entry(p, &ctx->set_list, list) { -+ /* -+ * cleanup bitvectors -+ */ -+ bitmap_zero(cast_ulp(p->ovfl_pmds), max); -+ bitmap_zero(cast_ulp(p->povfl_pmds), max); -+ -+ p->npend_ovfls = 0; -+ -+ /* -+ * we cannot just use plain clear because of arch-specific flags -+ */ -+ p->priv_flags &= ~(PFM_SETFL_PRIV_MOD_BOTH|PFM_SETFL_PRIV_SWITCH); -+ /* -+ * neither duration nor runs are reset because typically loading/unloading -+ * does not mean counts are reset. To reset, the set must be modified -+ */ -+ } -+ return set; -+} -+ -+/* -+ * called by hrtimer_interrupt() -+ * -+ * This is the only function where we come with -+ * cpu_base->lock held before ctx->lock -+ * -+ * interrupts are disabled -+ */ -+enum hrtimer_restart pfm_handle_switch_timeout(struct hrtimer *t) -+{ -+ struct pfm_event_set *set; -+ struct pfm_context *ctx; -+ unsigned long flags; -+ enum hrtimer_restart ret = HRTIMER_NORESTART; -+ -+ /* -+ * prevent against race with unload -+ */ -+ ctx = __get_cpu_var(pmu_ctx); -+ if (!ctx) -+ return HRTIMER_NORESTART; -+ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ set = ctx->active_set; -+ -+ /* -+ * switching occurs only when context is attached -+ */ -+ if (ctx->state != PFM_CTX_LOADED) -+ goto done; -+ /* -+ * timer does not run while monitoring is inactive (not started) -+ */ -+ if (!pfm_arch_is_active(ctx)) -+ goto done; -+ -+ pfm_stats_inc(handle_timeout_count); -+ -+ ret = pfm_switch_sets(ctx, NULL, PFM_PMD_RESET_SHORT, 0); -+done: -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ return ret; -+} -+ -+/* -+ * -+ * always operating on the current task -+ * interrupts are masked -+ * -+ * input: -+ * - new_set: new set to switch to, if NULL follow normal chain -+ */ -+enum hrtimer_restart pfm_switch_sets(struct pfm_context *ctx, -+ struct pfm_event_set *new_set, -+ int reset_mode, -+ int no_restart) -+{ -+ struct pfm_event_set *set; -+ u64 now, end; -+ u32 new_flags; -+ int is_system, is_active, nn; -+ enum hrtimer_restart ret = HRTIMER_NORESTART; -+ -+ now = sched_clock(); -+ set = ctx->active_set; -+ is_active = pfm_arch_is_active(ctx); -+ -+ /* -+ * if no set is explicitly requested, -+ * use the set_switch_next field -+ */ -+ if (!new_set) { -+ /* -+ * we use round-robin unless the user specified -+ * a particular set to go to. -+ */ -+ new_set = list_first_entry(&set->list, struct pfm_event_set, list); -+ if (&new_set->list == &ctx->set_list) -+ new_set = list_first_entry(&ctx->set_list, struct pfm_event_set, list); -+ } -+ -+ PFM_DBG_ovfl("state=%d act=%d cur_set=%u cur_runs=%llu cur_npend=%d next_set=%u " -+ "next_runs=%llu new_npend=%d reset_mode=%d reset_pmds=%llx", -+ ctx->state, -+ is_active, -+ set->id, -+ (unsigned long long)set->runs, -+ set->npend_ovfls, -+ new_set->id, -+ (unsigned long long)new_set->runs, -+ new_set->npend_ovfls, -+ reset_mode, -+ (unsigned long long)new_set->reset_pmds[0]); -+ -+ is_system = ctx->flags.system; -+ new_flags = new_set->flags; -+ -+ /* -+ * nothing more to do -+ */ -+ if (new_set == set) -+ goto skip_same_set; -+ -+ if (is_active) { -+ pfm_arch_stop(current, ctx); -+ pfm_save_pmds(ctx, set); -+ /* -+ * compute elapsed ns for active set -+ */ -+ set->duration += now - set->duration_start; -+ } -+ -+ pfm_arch_restore_pmds(ctx, new_set); -+ /* -+ * if masked, we must restore the pmcs such that they -+ * do not capture anything. -+ */ -+ pfm_arch_restore_pmcs(ctx, new_set); -+ -+ if (new_set->npend_ovfls) { -+ pfm_arch_resend_irq(ctx); -+ pfm_stats_inc(ovfl_intr_replay_count); -+ } -+ -+ new_set->priv_flags &= ~PFM_SETFL_PRIV_MOD_BOTH; -+ -+skip_same_set: -+ new_set->runs++; -+ /* -+ * reset switch threshold -+ */ -+ if (new_flags & PFM_SETFL_OVFL_SWITCH) -+ pfm_reload_switch_thresholds(ctx, new_set); -+ -+ /* -+ * reset overflowed PMD registers in new set -+ */ -+ nn = bitmap_weight(cast_ulp(new_set->reset_pmds), ctx->regs.max_pmd); -+ if (nn) -+ pfm_reset_pmds(ctx, new_set, nn, reset_mode); -+ -+ -+ /* -+ * This is needed when coming from pfm_start() -+ * -+ * When switching to the same set, there is no -+ * need to restart -+ */ -+ if (no_restart) -+ goto skip_restart; -+ -+ if (is_active) { -+ /* -+ * do not need to restart when same set -+ */ -+ if (new_set != set) { -+ ctx->active_set = new_set; -+ new_set->duration_start = now; -+ pfm_arch_start(current, ctx); -+ } -+ /* -+ * install new timeout if necessary -+ */ -+ if (new_flags & PFM_SETFL_TIME_SWITCH) { -+ struct hrtimer *h; -+ h = &__get_cpu_var(pfm_hrtimer); -+ hrtimer_forward(h, h->base->get_time(), new_set->hrtimer_exp); -+ new_set->hrtimer_rem = new_set->hrtimer_exp; -+ ret = HRTIMER_RESTART; -+ } -+ } -+ -+skip_restart: -+ ctx->active_set = new_set; -+ -+ end = sched_clock(); -+ -+ pfm_stats_inc(set_switch_count); -+ pfm_stats_add(set_switch_ns, end - now); -+ -+ return ret; -+} -+ -+/* -+ * called from __pfm_overflow_handler() to switch event sets. -+ * monitoring is stopped, task is current, interrupts are masked. -+ * compared to pfm_switch_sets(), this version is simplified because -+ * it knows about the call path. There is no need to stop monitoring -+ * because it is already frozen by PMU handler. -+ */ -+void pfm_switch_sets_from_intr(struct pfm_context *ctx) -+{ -+ struct pfm_event_set *set, *new_set; -+ u64 now, end; -+ u32 new_flags; -+ int is_system, n; -+ -+ now = sched_clock(); -+ set = ctx->active_set; -+ new_set = list_first_entry(&set->list, struct pfm_event_set, list); -+ if (&new_set->list == &ctx->set_list) -+ new_set = list_first_entry(&ctx->set_list, struct pfm_event_set, list); -+ -+ PFM_DBG_ovfl("state=%d cur_set=%u cur_runs=%llu cur_npend=%d next_set=%u " -+ "next_runs=%llu new_npend=%d new_r_pmds=%llx", -+ ctx->state, -+ set->id, -+ (unsigned long long)set->runs, -+ set->npend_ovfls, -+ new_set->id, -+ (unsigned long long)new_set->runs, -+ new_set->npend_ovfls, -+ (unsigned long long)new_set->reset_pmds[0]); -+ -+ is_system = ctx->flags.system; -+ new_flags = new_set->flags; -+ -+ /* -+ * nothing more to do -+ */ -+ if (new_set == set) -+ goto skip_same_set; -+ -+ /* -+ * switch on intr only when set has OVFL_SWITCH -+ */ -+ BUG_ON(set->flags & PFM_SETFL_TIME_SWITCH); -+ -+ /* -+ * when called from PMU intr handler, monitoring -+ * is already stopped -+ * -+ * save current PMD registers, we use a special -+ * form for performance reason. On some architectures, -+ * such as x86, the pmds are already saved when entering -+ * the PMU interrupt handler via pfm-arch_intr_freeze() -+ * so we don't need to save them again. On the contrary, -+ * on IA-64, they are not saved by freeze, thus we have to -+ * to it here. -+ */ -+ pfm_arch_save_pmds_from_intr(ctx, set); -+ -+ /* -+ * compute elapsed ns for active set -+ */ -+ set->duration += now - set->duration_start; -+ -+ pfm_arch_restore_pmds(ctx, new_set); -+ -+ /* -+ * must not be restored active as we are still executing in the -+ * PMU interrupt handler. activation is deferred to unfreeze PMU -+ */ -+ pfm_arch_restore_pmcs(ctx, new_set); -+ -+ /* -+ * check for pending interrupt on incoming set. -+ * interrupts are masked so handler call deferred -+ */ -+ if (new_set->npend_ovfls) { -+ pfm_arch_resend_irq(ctx); -+ pfm_stats_inc(ovfl_intr_replay_count); -+ } -+ /* -+ * no need to restore anything, that is already done -+ */ -+ new_set->priv_flags &= ~PFM_SETFL_PRIV_MOD_BOTH; -+ /* -+ * reset duration counter -+ */ -+ new_set->duration_start = now; -+ -+skip_same_set: -+ new_set->runs++; -+ -+ /* -+ * reset switch threshold -+ */ -+ if (new_flags & PFM_SETFL_OVFL_SWITCH) -+ pfm_reload_switch_thresholds(ctx, new_set); -+ -+ /* -+ * reset overflowed PMD registers -+ */ -+ n = bitmap_weight(cast_ulp(new_set->reset_pmds), ctx->regs.max_pmd); -+ if (n) -+ pfm_reset_pmds(ctx, new_set, n, PFM_PMD_RESET_SHORT); -+ -+ /* -+ * XXX: isactive? -+ * -+ * Came here following a interrupt which triggered a switch, i.e., -+ * previous set was using OVFL_SWITCH, thus we just need to arm -+ * check if the next set is using timeout, and if so arm the timer. -+ * -+ * Timeout is always at least one tick away. No risk of having to -+ * invoke the timeout handler right now. In any case, cb_mode is -+ * set to HRTIMER_CB_IRQSAFE_NO_SOFTIRQ such that hrtimer_start -+ * will not try to wakeup the softirqd which could cause a locking -+ * problem. -+ */ -+ if (new_flags & PFM_SETFL_TIME_SWITCH) { -+ hrtimer_start(&__get_cpu_var(pfm_hrtimer), set->hrtimer_exp, HRTIMER_MODE_REL); -+ PFM_DBG("armed new timeout for set%u", new_set->id); -+ } -+ -+ ctx->active_set = new_set; -+ -+ end = sched_clock(); -+ -+ pfm_stats_inc(set_switch_count); -+ pfm_stats_add(set_switch_ns, end - now); -+} -+ -+ -+static int pfm_setfl_sane(struct pfm_context *ctx, u32 flags) -+{ -+#define PFM_SETFL_BOTH_SWITCH (PFM_SETFL_OVFL_SWITCH|PFM_SETFL_TIME_SWITCH) -+ int ret; -+ -+ ret = pfm_arch_setfl_sane(ctx, flags); -+ if (ret) -+ return ret; -+ -+ if ((flags & PFM_SETFL_BOTH_SWITCH) == PFM_SETFL_BOTH_SWITCH) { -+ PFM_DBG("both switch ovfl and switch time are set"); -+ return -EINVAL; -+ } -+ return 0; -+} -+ -+/* -+ * it is never possible to change the identification of an existing set -+ */ -+static int pfm_change_evtset(struct pfm_context *ctx, -+ struct pfm_event_set *set, -+ struct pfarg_setdesc *req) -+{ -+ struct timeval tv; -+ struct timespec ts; -+ ktime_t kt; -+ long d, res_ns; -+ s32 rem; -+ u32 flags; -+ int ret; -+ u16 set_id; -+ -+ BUG_ON(ctx->state == PFM_CTX_LOADED); -+ -+ set_id = req->set_id; -+ flags = req->set_flags; -+ -+ ret = pfm_setfl_sane(ctx, flags); -+ if (ret) { -+ PFM_DBG("invalid flags 0x%x set %u", flags, set_id); -+ return -EINVAL; -+ } -+ -+ /* -+ * compute timeout value -+ */ -+ if (flags & PFM_SETFL_TIME_SWITCH) { -+ /* -+ * timeout value of zero is illegal -+ */ -+ if (req->set_timeout == 0) { -+ PFM_DBG("invalid timeout 0"); -+ return -EINVAL; -+ } -+ -+ hrtimer_get_res(CLOCK_MONOTONIC, &ts); -+ res_ns = (long)ktime_to_ns(timespec_to_ktime(ts)); -+ -+ /* -+ * round-up to multiple of clock resolution -+ * timeout = ((req->set_timeout+res_ns-1)/res_ns)*res_ns; -+ * -+ * u64 division missing on 32-bit arch, so use div_s64_rem -+ */ -+ d = div_s64_rem(req->set_timeout, res_ns, &rem); -+ -+ PFM_DBG("set%u flags=0x%x req_timeout=%lluns " -+ "HZ=%u TICK_NSEC=%lu clock_res=%ldns rem=%dns", -+ set_id, -+ flags, -+ (unsigned long long)req->set_timeout, -+ HZ, TICK_NSEC, -+ res_ns, -+ rem); -+ -+ /* -+ * Only accept timeout, we can actually achieve. -+ * users can invoke clock_getres(CLOCK_MONOTONIC) -+ * to figure out resolution and adjust timeout -+ */ -+ if (rem) { -+ PFM_DBG("set%u invalid timeout=%llu", -+ set_id, -+ (unsigned long long)req->set_timeout); -+ return -EINVAL; -+ } -+ -+ tv = ns_to_timeval(req->set_timeout); -+ kt = timeval_to_ktime(tv); -+ set->hrtimer_exp = kt; -+ } else { -+ set->hrtimer_exp = ktime_set(0, 0); -+ } -+ -+ /* -+ * commit changes -+ */ -+ set->id = set_id; -+ set->flags = flags; -+ set->priv_flags = 0; -+ -+ /* -+ * activation and duration counters are reset as -+ * most likely major things will change in the set -+ */ -+ set->runs = 0; -+ set->duration = 0; -+ -+ return 0; -+} -+ -+/* -+ * this function does not modify the next field -+ */ -+static void pfm_initialize_set(struct pfm_context *ctx, -+ struct pfm_event_set *set) -+{ -+ u64 *impl_pmcs; -+ u16 i, max_pmc; -+ -+ max_pmc = ctx->regs.max_pmc; -+ impl_pmcs = ctx->regs.pmcs; -+ -+ /* -+ * install default values for all PMC registers -+ */ -+ for (i = 0; i < max_pmc; i++) { -+ if (test_bit(i, cast_ulp(impl_pmcs))) { -+ set->pmcs[i] = pfm_pmu_conf->pmc_desc[i].dfl_val; -+ PFM_DBG("set%u pmc%u=0x%llx", -+ set->id, -+ i, -+ (unsigned long long)set->pmcs[i]); -+ } -+ } -+ -+ /* -+ * PMD registers are set to 0 when the event set is allocated, -+ * hence we do not need to explicitly initialize them. -+ * -+ * For virtual PMD registers (i.e., those tied to a SW resource) -+ * their value becomes meaningful once the context is attached. -+ */ -+} -+ -+/* -+ * look for an event set using its identification. If the set does not -+ * exist: -+ * - if alloc == 0 then return error -+ * - if alloc == 1 then allocate set -+ * -+ * alloc is one ONLY when coming from pfm_create_evtsets() which can only -+ * be called when the context is detached, i.e. monitoring is stopped. -+ */ -+struct pfm_event_set *pfm_find_set(struct pfm_context *ctx, u16 set_id, int alloc) -+{ -+ struct pfm_event_set *set = NULL, *prev, *new_set; -+ -+ PFM_DBG("looking for set=%u", set_id); -+ -+ prev = NULL; -+ list_for_each_entry(set, &ctx->set_list, list) { -+ if (set->id == set_id) -+ return set; -+ if (set->id > set_id) -+ break; -+ prev = set; -+ } -+ -+ if (!alloc) -+ return NULL; -+ -+ /* -+ * we are holding the context spinlock and interrupts -+ * are unmasked. We must use GFP_ATOMIC as we cannot -+ * sleep while holding a spin lock. -+ */ -+ new_set = kmem_cache_zalloc(pfm_set_cachep, GFP_ATOMIC); -+ if (!new_set) -+ return NULL; -+ -+ new_set->id = set_id; -+ -+ INIT_LIST_HEAD(&new_set->list); -+ -+ if (prev == NULL) { -+ list_add(&(new_set->list), &ctx->set_list); -+ } else { -+ PFM_DBG("add after set=%u", prev->id); -+ list_add(&(new_set->list), &prev->list); -+ } -+ return new_set; -+} -+ -+/** -+ * pfm_create_initial_set - create initial set from __pfm_c reate_context -+ * @ctx: context to atatched the set to -+ */ -+int pfm_create_initial_set(struct pfm_context *ctx) -+{ -+ struct pfm_event_set *set; -+ -+ /* -+ * create initial set0 -+ */ -+ if (!pfm_find_set(ctx, 0, 1)) -+ return -ENOMEM; -+ -+ set = list_first_entry(&ctx->set_list, struct pfm_event_set, list); -+ -+ pfm_initialize_set(ctx, set); -+ -+ return 0; -+} -+ -+/* -+ * context is unloaded for this command. Interrupts are enabled -+ */ -+int __pfm_create_evtsets(struct pfm_context *ctx, struct pfarg_setdesc *req, -+ int count) -+{ -+ struct pfm_event_set *set; -+ u16 set_id; -+ int i, ret; -+ -+ for (i = 0; i < count; i++, req++) { -+ set_id = req->set_id; -+ -+ PFM_DBG("set_id=%u", set_id); -+ -+ set = pfm_find_set(ctx, set_id, 1); -+ if (set == NULL) -+ goto error_mem; -+ -+ ret = pfm_change_evtset(ctx, set, req); -+ if (ret) -+ goto error_params; -+ -+ pfm_initialize_set(ctx, set); -+ } -+ return 0; -+error_mem: -+ PFM_DBG("cannot allocate set %u", set_id); -+ return -ENOMEM; -+error_params: -+ return ret; -+} -+ -+int __pfm_getinfo_evtsets(struct pfm_context *ctx, struct pfarg_setinfo *req, -+ int count) -+{ -+ struct pfm_event_set *set; -+ int i, is_system, is_loaded, is_self, ret; -+ u16 set_id; -+ u64 end; -+ -+ end = sched_clock(); -+ -+ is_system = ctx->flags.system; -+ is_loaded = ctx->state == PFM_CTX_LOADED; -+ is_self = ctx->task == current || is_system; -+ -+ ret = -EINVAL; -+ for (i = 0; i < count; i++, req++) { -+ -+ set_id = req->set_id; -+ -+ list_for_each_entry(set, &ctx->set_list, list) { -+ if (set->id == set_id) -+ goto found; -+ if (set->id > set_id) -+ goto error; -+ } -+found: -+ req->set_flags = set->flags; -+ -+ /* -+ * compute leftover timeout -+ * -+ * lockdep may complain about lock inversion -+ * because of get_remaining() however, this -+ * applies to self-montoring only, thus the -+ * thread cannot be in the timeout handler -+ * and here at the same time given that we -+ * run with interrupts disabled -+ */ -+ if (is_loaded && is_self) { -+ struct hrtimer *h; -+ h = &__get_cpu_var(pfm_hrtimer); -+ req->set_timeout = ktime_to_ns(hrtimer_get_remaining(h)); -+ } else { -+ /* -+ * hrtimer_rem zero when not using -+ * timeout-based switching -+ */ -+ req->set_timeout = ktime_to_ns(set->hrtimer_rem); -+ } -+ -+ req->set_runs = set->runs; -+ req->set_act_duration = set->duration; -+ -+ /* -+ * adjust for active set if needed -+ */ -+ if (is_system && is_loaded && ctx->flags.started -+ && set == ctx->active_set) -+ req->set_act_duration += end - set->duration_start; -+ -+ /* -+ * copy the list of pmds which last overflowed -+ */ -+ bitmap_copy(cast_ulp(req->set_ovfl_pmds), -+ cast_ulp(set->ovfl_pmds), -+ PFM_MAX_PMDS); -+ -+ /* -+ * copy bitmask of available PMU registers -+ * -+ * must copy over the entire vector to avoid -+ * returning bogus upper bits pass by user -+ */ -+ bitmap_copy(cast_ulp(req->set_avail_pmcs), -+ cast_ulp(ctx->regs.pmcs), -+ PFM_MAX_PMCS); -+ -+ bitmap_copy(cast_ulp(req->set_avail_pmds), -+ cast_ulp(ctx->regs.pmds), -+ PFM_MAX_PMDS); -+ -+ PFM_DBG("set%u flags=0x%x eff_usec=%llu runs=%llu " -+ "a_pmcs=0x%llx a_pmds=0x%llx", -+ set_id, -+ set->flags, -+ (unsigned long long)req->set_timeout, -+ (unsigned long long)set->runs, -+ (unsigned long long)ctx->regs.pmcs[0], -+ (unsigned long long)ctx->regs.pmds[0]); -+ } -+ ret = 0; -+error: -+ return ret; -+} -+ -+/* -+ * context is unloaded for this command. Interrupts are enabled -+ */ -+int __pfm_delete_evtsets(struct pfm_context *ctx, void *arg, int count) -+{ -+ struct pfarg_setdesc *req = arg; -+ struct pfm_event_set *set; -+ u16 set_id; -+ int i, ret; -+ -+ ret = -EINVAL; -+ for (i = 0; i < count; i++, req++) { -+ set_id = req->set_id; -+ -+ list_for_each_entry(set, &ctx->set_list, list) { -+ if (set->id == set_id) -+ goto found; -+ if (set->id > set_id) -+ goto error; -+ } -+ goto error; -+found: -+ /* -+ * clear active set if necessary. -+ * will be updated when context is loaded -+ */ -+ if (set == ctx->active_set) -+ ctx->active_set = NULL; -+ -+ list_del(&set->list); -+ -+ kmem_cache_free(pfm_set_cachep, set); -+ -+ PFM_DBG("set%u deleted", set_id); -+ } -+ ret = 0; -+error: -+ return ret; -+} -+ -+/* -+ * called from pfm_context_free() to free all sets -+ */ -+void pfm_free_sets(struct pfm_context *ctx) -+{ -+ struct pfm_event_set *set, *tmp; -+ -+ list_for_each_entry_safe(set, tmp, &ctx->set_list, list) { -+ list_del(&set->list); -+ kmem_cache_free(pfm_set_cachep, set); -+ } -+} -+ -+/** -+ * pfm_restart_timer - restart hrtimer taking care of expired timeout -+ * @ctx : context to work with -+ * @set : current active set -+ * -+ * Must be called on the processor on which the timer is to be armed. -+ * Assumes context is locked and interrupts are masked -+ * -+ * Upon return the active set for the context may have changed -+ */ -+void pfm_restart_timer(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ struct hrtimer *h; -+ enum hrtimer_restart ret; -+ -+ h = &__get_cpu_var(pfm_hrtimer); -+ -+ PFM_DBG_ovfl("hrtimer=%lld", (long long)ktime_to_ns(set->hrtimer_rem)); -+ -+ if (ktime_to_ns(set->hrtimer_rem) > 0) { -+ hrtimer_start(h, set->hrtimer_rem, HRTIMER_MODE_REL); -+ } else { -+ /* -+ * timer was not re-armed because it has already expired -+ * timer was not enqueued, we need to switch set now -+ */ -+ pfm_stats_inc(set_switch_exp); -+ -+ ret = pfm_switch_sets(ctx, NULL, 1, 0); -+ set = ctx->active_set; -+ if (ret == HRTIMER_RESTART) -+ hrtimer_start(h, set->hrtimer_rem, HRTIMER_MODE_REL); -+ } -+} -+ -+int __init pfm_init_sets(void) -+{ -+ pfm_set_cachep = kmem_cache_create("pfm_event_set", -+ sizeof(struct pfm_event_set), -+ SLAB_HWCACHE_ALIGN, 0, NULL); -+ if (!pfm_set_cachep) { -+ PFM_ERR("cannot initialize event set slab"); -+ return -ENOMEM; -+ } -+ return 0; -+} -diff --git a/perfmon/perfmon_smpl.c b/perfmon/perfmon_smpl.c -new file mode 100644 -index 0000000..e31fb15 ---- /dev/null -+++ b/perfmon/perfmon_smpl.c -@@ -0,0 +1,865 @@ -+/* -+ * perfmon_smpl.c: perfmon2 sampling management -+ * -+ * This file implements the perfmon2 interface which -+ * provides access to the hardware performance counters -+ * of the host processor. -+ * -+ * -+ * The initial version of perfmon.c was written by -+ * Ganesh Venkitachalam, IBM Corp. -+ * -+ * Then it was modified for perfmon-1.x by Stephane Eranian and -+ * David Mosberger, Hewlett Packard Co. -+ * -+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x -+ * by Stephane Eranian, Hewlett Packard Co. -+ * -+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * David Mosberger-Tang -+ * -+ * More information about perfmon available at: -+ * http://perfmon2.sf.net -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "perfmon_priv.h" -+ -+/** -+ * pfm_smpl_buf_alloc - allocate memory for sampling buffer -+ * @ctx: context to operate on -+ * @rsize: requested size -+ * -+ * called from pfm_smpl_buffer_alloc_old() (IA64-COMPAT) -+ * and pfm_setup_smpl_fmt() -+ * -+ * interrupts are enabled, context is not locked. -+ * -+ * function is not static because it is called from the IA-64 -+ * compatibility module (perfmon_compat.c) -+ */ -+int pfm_smpl_buf_alloc(struct pfm_context *ctx, size_t rsize) -+{ -+#if PFM_ARCH_SMPL_ALIGN_SIZE > 0 -+#define PFM_ALIGN_SMPL(a, f) (void *)((((unsigned long)(a))+(f-1)) & ~(f-1)) -+#else -+#define PFM_ALIGN_SMPL(a, f) (a) -+#endif -+ void *addr, *real_addr; -+ size_t size, real_size; -+ int ret; -+ -+ might_sleep(); -+ -+ /* -+ * align page boundary -+ */ -+ size = PAGE_ALIGN(rsize); -+ -+ /* -+ * On some arch, it may be necessary to get an alignment greater -+ * than page size to avoid certain cache effects (e.g., MIPS). -+ * This is the reason for PFM_ARCH_SMPL_ALIGN_SIZE. -+ */ -+ real_size = size + PFM_ARCH_SMPL_ALIGN_SIZE; -+ -+ PFM_DBG("req_size=%zu size=%zu real_size=%zu", -+ rsize, -+ size, -+ real_size); -+ -+ ret = pfm_smpl_buf_space_acquire(ctx, real_size); -+ if (ret) -+ return ret; -+ -+ /* -+ * vmalloc can sleep. we do not hold -+ * any spinlock and interrupts are enabled -+ */ -+ real_addr = addr = vmalloc(real_size); -+ if (!real_addr) { -+ PFM_DBG("cannot allocate sampling buffer"); -+ goto unres; -+ } -+ -+ /* -+ * align the useable sampling buffer address to the arch requirement -+ * This is a nop on most architectures -+ */ -+ addr = PFM_ALIGN_SMPL(real_addr, PFM_ARCH_SMPL_ALIGN_SIZE); -+ -+ memset(addr, 0, real_size); -+ -+ /* -+ * due to cache aliasing, it may be necessary to flush the pages -+ * on certain architectures (e.g., MIPS) -+ */ -+ pfm_cacheflush(addr, real_size); -+ -+ /* -+ * what needs to be freed -+ */ -+ ctx->smpl_real_addr = real_addr; -+ ctx->smpl_real_size = real_size; -+ -+ /* -+ * what is actually available to user -+ */ -+ ctx->smpl_addr = addr; -+ ctx->smpl_size = size; -+ -+ PFM_DBG("addr=%p real_addr=%p", addr, real_addr); -+ -+ return 0; -+unres: -+ /* -+ * smpl_addr is NULL, no double freeing possible in pfm_context_free() -+ */ -+ pfm_smpl_buf_space_release(ctx, real_size); -+ -+ return -ENOMEM; -+} -+ -+/** -+ * pfm_smpl_buf_free - free resources associated with sampling -+ * @ctx: context to operate on -+ */ -+void pfm_smpl_buf_free(struct pfm_context *ctx) -+{ -+ struct pfm_smpl_fmt *fmt; -+ -+ fmt = ctx->smpl_fmt; -+ -+ /* -+ * some formats may not use a buffer, yet they may -+ * need to be called on exit -+ */ -+ if (fmt) { -+ if (fmt->fmt_exit) -+ (*fmt->fmt_exit)(ctx->smpl_addr); -+ /* -+ * decrease refcount of sampling format -+ */ -+ pfm_smpl_fmt_put(fmt); -+ } -+ -+ if (ctx->smpl_addr) { -+ pfm_smpl_buf_space_release(ctx, ctx->smpl_real_size); -+ -+ PFM_DBG("free buffer real_addr=0x%p real_size=%zu", -+ ctx->smpl_real_addr, -+ ctx->smpl_real_size); -+ -+ vfree(ctx->smpl_real_addr); -+ } -+} -+ -+/** -+ * pfm_setup_smpl_fmt - initialization of sampling format and buffer -+ * @ctx: context to operate on -+ * @fmt_arg: smapling format arguments -+ * @ctx_flags: context flags as passed by user -+ * @filp: file descriptor associated with context -+ * -+ * called from __pfm_create_context() -+ */ -+int pfm_setup_smpl_fmt(struct pfm_context *ctx, u32 ctx_flags, void *fmt_arg, -+ struct file *filp) -+{ -+ struct pfm_smpl_fmt *fmt; -+ size_t size = 0; -+ int ret = 0; -+ -+ fmt = ctx->smpl_fmt; -+ -+ /* -+ * validate parameters -+ */ -+ if (fmt->fmt_validate) { -+ ret = (*fmt->fmt_validate)(ctx_flags, -+ ctx->regs.num_pmds, -+ fmt_arg); -+ PFM_DBG("validate(0x%x,%p)=%d", ctx_flags, fmt_arg, ret); -+ if (ret) -+ goto error; -+ } -+ -+ /* -+ * check if buffer format needs buffer allocation -+ */ -+ size = 0; -+ if (fmt->fmt_getsize) { -+ ret = (*fmt->fmt_getsize)(ctx_flags, fmt_arg, &size); -+ if (ret) { -+ PFM_DBG("cannot get size ret=%d", ret); -+ goto error; -+ } -+ } -+ -+ /* -+ * allocate buffer -+ * v20_compat is for IA-64 backward compatibility with perfmon v2.0 -+ */ -+ if (size) { -+#ifdef CONFIG_IA64_PERFMON_COMPAT -+ /* -+ * backward compatibility with perfmon v2.0 on Ia-64 -+ */ -+ if (ctx->flags.ia64_v20_compat) -+ ret = pfm_smpl_buf_alloc_compat(ctx, size, filp); -+ else -+#endif -+ ret = pfm_smpl_buf_alloc(ctx, size); -+ -+ if (ret) -+ goto error; -+ -+ } -+ -+ if (fmt->fmt_init) { -+ ret = (*fmt->fmt_init)(ctx, ctx->smpl_addr, ctx_flags, -+ ctx->regs.num_pmds, -+ fmt_arg); -+ } -+ /* -+ * if there was an error, the buffer/resource will be freed by -+ * via pfm_context_free() -+ */ -+error: -+ return ret; -+} -+ -+void pfm_mask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ u64 now; -+ -+ now = sched_clock(); -+ -+ /* -+ * we save the PMD values such that we can read them while -+ * MASKED without having the thread stopped -+ * because monitoring is stopped -+ * -+ * pfm_save_pmds() could be avoided if we knew -+ * that pfm_arch_intr_freeze() had saved them already -+ */ -+ pfm_save_pmds(ctx, set); -+ pfm_arch_mask_monitoring(ctx, set); -+ /* -+ * accumulate the set duration up to this point -+ */ -+ set->duration += now - set->duration_start; -+ -+ ctx->state = PFM_CTX_MASKED; -+ -+ /* -+ * need to stop timer and remember remaining time -+ * will be reloaded in pfm_unmask_monitoring -+ * hrtimer is cancelled in the tail of the interrupt -+ * handler once the context is unlocked -+ */ -+ if (set->flags & PFM_SETFL_TIME_SWITCH) { -+ struct hrtimer *h = &__get_cpu_var(pfm_hrtimer); -+ hrtimer_cancel(h); -+ set->hrtimer_rem = hrtimer_get_remaining(h); -+ } -+ PFM_DBG_ovfl("can_restart=%u", ctx->flags.can_restart); -+} -+ -+/** -+ * pfm_unmask_monitoring - unmask monitoring -+ * @ctx: context to work with -+ * @set: current active set -+ * -+ * interrupts are masked when entering this function. -+ * context must be in MASKED state when calling. -+ * -+ * Upon return, the active set may have changed when using timeout -+ * based switching. -+ */ -+static void pfm_unmask_monitoring(struct pfm_context *ctx, struct pfm_event_set *set) -+{ -+ if (ctx->state != PFM_CTX_MASKED) -+ return; -+ -+ PFM_DBG_ovfl("unmasking monitoring"); -+ -+ /* -+ * must be done before calling -+ * pfm_arch_unmask_monitoring() -+ */ -+ ctx->state = PFM_CTX_LOADED; -+ -+ /* -+ * we need to restore the PMDs because they -+ * may have been modified by user while MASKED in -+ * which case the actual registers have no yet -+ * been updated -+ */ -+ pfm_arch_restore_pmds(ctx, set); -+ -+ /* -+ * call arch specific handler -+ */ -+ pfm_arch_unmask_monitoring(ctx, set); -+ -+ /* -+ * clear force reload flag. May have been set -+ * in pfm_write_pmcs or pfm_write_pmds -+ */ -+ set->priv_flags &= ~PFM_SETFL_PRIV_MOD_BOTH; -+ -+ /* -+ * reset set duration timer -+ */ -+ set->duration_start = sched_clock(); -+ -+ /* -+ * restart hrtimer if needed -+ */ -+ if (set->flags & PFM_SETFL_TIME_SWITCH) { -+ pfm_restart_timer(ctx, set); -+ /* careful here as pfm_restart_timer may switch sets */ -+ } -+} -+ -+void pfm_reset_pmds(struct pfm_context *ctx, -+ struct pfm_event_set *set, -+ int num_pmds, -+ int reset_mode) -+{ -+ u64 val, mask, new_seed; -+ struct pfm_pmd *reg; -+ unsigned int i, not_masked; -+ -+ not_masked = ctx->state != PFM_CTX_MASKED; -+ -+ PFM_DBG_ovfl("%s r_pmds=0x%llx not_masked=%d", -+ reset_mode == PFM_PMD_RESET_LONG ? "long" : "short", -+ (unsigned long long)set->reset_pmds[0], -+ not_masked); -+ -+ pfm_stats_inc(reset_pmds_count); -+ -+ for (i = 0; num_pmds; i++) { -+ if (test_bit(i, cast_ulp(set->reset_pmds))) { -+ num_pmds--; -+ -+ reg = set->pmds + i; -+ -+ val = reset_mode == PFM_PMD_RESET_LONG ? -+ reg->long_reset : reg->short_reset; -+ -+ if (reg->flags & PFM_REGFL_RANDOM) { -+ mask = reg->mask; -+ new_seed = random32(); -+ -+ /* construct a full 64-bit random value: */ -+ if ((unlikely(mask >> 32) != 0)) -+ new_seed |= (u64)random32() << 32; -+ -+ /* counter values are negative numbers! */ -+ val -= (new_seed & mask); -+ } -+ -+ set->pmds[i].value = val; -+ reg->lval = val; -+ -+ /* -+ * not all PMD to reset are necessarily -+ * counters -+ */ -+ if (not_masked) -+ pfm_write_pmd(ctx, i, val); -+ -+ PFM_DBG_ovfl("set%u pmd%u sval=0x%llx", -+ set->id, -+ i, -+ (unsigned long long)val); -+ } -+ } -+ -+ /* -+ * done with reset -+ */ -+ bitmap_zero(cast_ulp(set->reset_pmds), i); -+ -+ /* -+ * make changes visible -+ */ -+ if (not_masked) -+ pfm_arch_serialize(); -+} -+ -+/* -+ * called from pfm_handle_work() and __pfm_restart() -+ * for system-wide and per-thread context to resume -+ * monitoring after a user level notification. -+ * -+ * In both cases, the context is locked and interrupts -+ * are disabled. -+ */ -+void pfm_resume_after_ovfl(struct pfm_context *ctx) -+{ -+ struct pfm_smpl_fmt *fmt; -+ u32 rst_ctrl; -+ struct pfm_event_set *set; -+ u64 *reset_pmds; -+ void *hdr; -+ int state, ret; -+ -+ hdr = ctx->smpl_addr; -+ fmt = ctx->smpl_fmt; -+ state = ctx->state; -+ set = ctx->active_set; -+ ret = 0; -+ -+ if (hdr) { -+ rst_ctrl = 0; -+ prefetch(hdr); -+ } else { -+ rst_ctrl = PFM_OVFL_CTRL_RESET; -+ } -+ -+ /* -+ * if using a sampling buffer format and it has a restart callback, -+ * then invoke it. hdr may be NULL, if the format does not use a -+ * perfmon buffer -+ */ -+ if (fmt && fmt->fmt_restart) -+ ret = (*fmt->fmt_restart)(state == PFM_CTX_LOADED, &rst_ctrl, -+ hdr); -+ -+ reset_pmds = set->reset_pmds; -+ -+ PFM_DBG("fmt_restart=%d reset_count=%d set=%u r_pmds=0x%llx switch=%d " -+ "ctx_state=%d", -+ ret, -+ ctx->flags.reset_count, -+ set->id, -+ (unsigned long long)reset_pmds[0], -+ (set->priv_flags & PFM_SETFL_PRIV_SWITCH), -+ state); -+ -+ if (!ret) { -+ /* -+ * switch set if needed -+ */ -+ if (set->priv_flags & PFM_SETFL_PRIV_SWITCH) { -+ set->priv_flags &= ~PFM_SETFL_PRIV_SWITCH; -+ pfm_switch_sets(ctx, NULL, PFM_PMD_RESET_LONG, 0); -+ set = ctx->active_set; -+ } else if (rst_ctrl & PFM_OVFL_CTRL_RESET) { -+ int nn; -+ nn = bitmap_weight(cast_ulp(set->reset_pmds), -+ ctx->regs.max_pmd); -+ if (nn) -+ pfm_reset_pmds(ctx, set, nn, PFM_PMD_RESET_LONG); -+ } -+ -+ if (!(rst_ctrl & PFM_OVFL_CTRL_MASK)) -+ pfm_unmask_monitoring(ctx, set); -+ else -+ PFM_DBG("stopping monitoring?"); -+ ctx->state = PFM_CTX_LOADED; -+ } -+} -+ -+/* -+ * This function is called when we need to perform asynchronous -+ * work on a context. This function is called ONLY when about to -+ * return to user mode (very much like with signal handling). -+ * -+ * There are several reasons why we come here: -+ * -+ * - per-thread mode, not self-monitoring, to reset the counters -+ * after a pfm_restart() -+ * -+ * - we are zombie and we need to cleanup our state -+ * -+ * - we need to block after an overflow notification -+ * on a context with the PFM_OVFL_NOTIFY_BLOCK flag -+ * -+ * This function is never called for a system-wide context. -+ * -+ * pfm_handle_work() can be called with interrupts enabled -+ * (TIF_NEED_RESCHED) or disabled. The down_interruptible -+ * call may sleep, therefore we must re-enable interrupts -+ * to avoid deadlocks. It is safe to do so because this function -+ * is called ONLY when returning to user level, in which case -+ * there is no risk of kernel stack overflow due to deep -+ * interrupt nesting. -+ */ -+void pfm_handle_work(struct pt_regs *regs) -+{ -+ struct pfm_context *ctx; -+ unsigned long flags, dummy_flags; -+ int type, ret, info; -+ -+#ifdef CONFIG_PPC -+ /* -+ * This is just a temporary fix. Obviously we'd like to fix the powerpc -+ * code to make that check before calling __pfm_handle_work() to -+ * prevent the function call overhead, but the call is made from -+ * assembly code, so it will take a little while to figure out how to -+ * perform the check correctly. -+ */ -+ if (!test_thread_flag(TIF_PERFMON_WORK)) -+ return; -+#endif -+ -+ if (!user_mode(regs)) -+ return; -+ -+ clear_thread_flag(TIF_PERFMON_WORK); -+ -+ pfm_stats_inc(handle_work_count); -+ -+ ctx = current->pfm_context; -+ if (ctx == NULL) { -+ PFM_DBG("[%d] has no ctx", current->pid); -+ return; -+ } -+ -+ BUG_ON(ctx->flags.system); -+ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ type = ctx->flags.work_type; -+ ctx->flags.work_type = PFM_WORK_NONE; -+ -+ PFM_DBG("work_type=%d reset_count=%d", -+ type, -+ ctx->flags.reset_count); -+ -+ switch (type) { -+ case PFM_WORK_ZOMBIE: -+ goto do_zombie; -+ case PFM_WORK_RESET: -+ /* simply reset, no blocking */ -+ goto skip_blocking; -+ case PFM_WORK_NONE: -+ PFM_DBG("unexpected PFM_WORK_NONE"); -+ goto nothing_todo; -+ case PFM_WORK_BLOCK: -+ break; -+ default: -+ PFM_DBG("unkown type=%d", type); -+ goto nothing_todo; -+ } -+ -+ /* -+ * restore interrupt mask to what it was on entry. -+ * Could be enabled/disabled. -+ */ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ /* -+ * force interrupt enable because of down_interruptible() -+ */ -+ local_irq_enable(); -+ -+ PFM_DBG("before block sleeping"); -+ -+ /* -+ * may go through without blocking on SMP systems -+ * if restart has been received already by the time we call down() -+ */ -+ ret = wait_for_completion_interruptible(&ctx->restart_complete); -+ -+ PFM_DBG("after block sleeping ret=%d", ret); -+ -+ /* -+ * lock context and mask interrupts again -+ * We save flags into a dummy because we may have -+ * altered interrupts mask compared to entry in this -+ * function. -+ */ -+ spin_lock_irqsave(&ctx->lock, dummy_flags); -+ -+ if (ctx->state == PFM_CTX_ZOMBIE) -+ goto do_zombie; -+ -+ /* -+ * in case of interruption of down() we don't restart anything -+ */ -+ if (ret < 0) -+ goto nothing_todo; -+ -+skip_blocking: -+ /* -+ * iterate over the number of pending resets -+ * There are certain situations where there may be -+ * multiple notifications sent before a pfm_restart(). -+ * As such, it may be that multiple pfm_restart() are -+ * issued before the monitored thread gets to -+ * pfm_handle_work(). To avoid losing restarts, pfm_restart() -+ * increments a counter (reset_counts). Here, we take this -+ * into account by potentially calling pfm_resume_after_ovfl() -+ * multiple times. It is up to the sampling format to take the -+ * appropriate actions. -+ */ -+ while (ctx->flags.reset_count) { -+ pfm_resume_after_ovfl(ctx); -+ /* careful as active set may have changed */ -+ ctx->flags.reset_count--; -+ } -+ -+nothing_todo: -+ /* -+ * restore flags as they were upon entry -+ */ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ return; -+ -+do_zombie: -+ PFM_DBG("context is zombie, bailing out"); -+ -+ __pfm_unload_context(ctx, &info); -+ -+ /* -+ * keep the spinlock check happy -+ */ -+ spin_unlock(&ctx->lock); -+ -+ /* -+ * enable interrupt for vfree() -+ */ -+ local_irq_enable(); -+ -+ /* -+ * cancel timer now that context is unlocked -+ */ -+ if (info & 0x2) { -+ ret = hrtimer_cancel(&__get_cpu_var(pfm_hrtimer)); -+ PFM_DBG("timeout cancel=%d", ret); -+ } -+ -+ /* -+ * actual context free -+ */ -+ pfm_free_context(ctx); -+ -+ /* -+ * restore interrupts as they were upon entry -+ */ -+ local_irq_restore(flags); -+ -+ /* always true */ -+ if (info & 0x1) -+ pfm_session_release(0, 0); -+} -+ -+/** -+ * __pfm_restart - resume monitoring after user-level notification -+ * @ctx: context to operate on -+ * @info: return information used to free resource once unlocked -+ * -+ * function called from sys_pfm_restart(). It is used when overflow -+ * notification is requested. For each notification received, the user -+ * must call pfm_restart() to indicate to the kernel that it is done -+ * processing the notification. -+ * -+ * When the caller is doing user level sampling, this function resets -+ * the overflowed counters and resumes monitoring which is normally stopped -+ * during notification (always the consequence of a counter overflow). -+ * -+ * When using a sampling format, the format restart() callback is invoked, -+ * overflowed PMDS may be reset based upon decision from sampling format. -+ * -+ * When operating in per-thread mode, and when not self-monitoring, the -+ * monitored thread DOES NOT need to be stopped, unlike for many other calls. -+ * -+ * This means that the effect of the restart may not necessarily be observed -+ * right when returning from the call. For instance, counters may not already -+ * be reset in the other thread. -+ * -+ * When operating in system-wide, the caller must be running on the monitored -+ * CPU. -+ * -+ * The context is locked and interrupts are disabled. -+ * -+ * info value upon return: -+ * - bit 0: when set, mudt issue complete() on restart semaphore -+ */ -+int __pfm_restart(struct pfm_context *ctx, int *info) -+{ -+ int state; -+ -+ state = ctx->state; -+ -+ PFM_DBG("state=%d can_restart=%d reset_count=%d", -+ state, -+ ctx->flags.can_restart, -+ ctx->flags.reset_count); -+ -+ *info = 0; -+ -+ switch (state) { -+ case PFM_CTX_MASKED: -+ break; -+ case PFM_CTX_LOADED: -+ if (ctx->smpl_addr && ctx->smpl_fmt->fmt_restart) -+ break; -+ default: -+ PFM_DBG("invalid state=%d", state); -+ return -EBUSY; -+ } -+ -+ /* -+ * first check if allowed to restart, i.e., notifications received -+ */ -+ if (!ctx->flags.can_restart) { -+ PFM_DBG("no restart can_restart=0"); -+ return -EBUSY; -+ } -+ -+ pfm_stats_inc(pfm_restart_count); -+ -+ /* -+ * at this point, the context is either LOADED or MASKED -+ */ -+ ctx->flags.can_restart--; -+ -+ /* -+ * handle self-monitoring case and system-wide -+ */ -+ if (ctx->task == current || ctx->flags.system) { -+ pfm_resume_after_ovfl(ctx); -+ return 0; -+ } -+ -+ /* -+ * restart another task -+ */ -+ -+ /* -+ * if blocking, then post the semaphore if PFM_CTX_MASKED, i.e. -+ * the task is blocked or on its way to block. That's the normal -+ * restart path. If the monitoring is not masked, then the task -+ * can be actively monitoring and we cannot directly intervene. -+ * Therefore we use the trap mechanism to catch the task and -+ * force it to reset the buffer/reset PMDs. -+ * -+ * if non-blocking, then we ensure that the task will go into -+ * pfm_handle_work() before returning to user mode. -+ * -+ * We cannot explicitly reset another task, it MUST always -+ * be done by the task itself. This works for system wide because -+ * the tool that is controlling the session is logically doing -+ * "self-monitoring". -+ */ -+ if (ctx->flags.block && state == PFM_CTX_MASKED) { -+ PFM_DBG("unblocking [%d]", ctx->task->pid); -+ /* -+ * It is not possible to call complete() with the context locked -+ * otherwise we have a potential deadlock with the PMU context -+ * switch code due to a lock inversion between task_rq_lock() -+ * and the context lock. -+ * Instead we mark whether or not we need to issue the complete -+ * and we invoke the function once the context lock is released -+ * in sys_pfm_restart() -+ */ -+ *info = 1; -+ } else { -+ PFM_DBG("[%d] armed exit trap", ctx->task->pid); -+ pfm_post_work(ctx->task, ctx, PFM_WORK_RESET); -+ } -+ ctx->flags.reset_count++; -+ return 0; -+} -+ -+/** -+ * pfm_get_smpl_arg -- copy user arguments to pfm_create_context() related to sampling format -+ * @name: format name as passed by user -+ * @fmt_arg: format optional argument as passed by user -+ * @uszie: size of structure pass in fmt_arg -+ * @arg: kernel copy of fmt_arg -+ * @fmt: pointer to sampling format upon success -+ * -+ * arg is kmalloc'ed, thus it needs a kfree by caller -+ */ -+int pfm_get_smpl_arg(char __user *fmt_uname, void __user *fmt_uarg, size_t usize, void **arg, -+ struct pfm_smpl_fmt **fmt) -+{ -+ struct pfm_smpl_fmt *f; -+ char *fmt_name; -+ void *addr = NULL; -+ size_t sz; -+ int ret; -+ -+ fmt_name = getname(fmt_uname); -+ if (!fmt_name) { -+ PFM_DBG("getname failed"); -+ return -ENOMEM; -+ } -+ -+ /* -+ * find fmt and increase refcount -+ */ -+ f = pfm_smpl_fmt_get(fmt_name); -+ -+ putname(fmt_name); -+ -+ if (f == NULL) { -+ PFM_DBG("buffer format not found"); -+ return -EINVAL; -+ } -+ -+ /* -+ * expected format argument size -+ */ -+ sz = f->fmt_arg_size; -+ -+ /* -+ * check user size matches expected size -+ * usize = -1 is for IA-64 backward compatibility -+ */ -+ ret = -EINVAL; -+ if (sz != usize && usize != -1) { -+ PFM_DBG("invalid arg size %zu, format expects %zu", -+ usize, sz); -+ goto error; -+ } -+ -+ if (sz) { -+ ret = -ENOMEM; -+ addr = kmalloc(sz, GFP_KERNEL); -+ if (addr == NULL) -+ goto error; -+ -+ ret = -EFAULT; -+ if (copy_from_user(addr, fmt_uarg, sz)) -+ goto error; -+ } -+ *arg = addr; -+ *fmt = f; -+ return 0; -+ -+error: -+ kfree(addr); -+ pfm_smpl_fmt_put(f); -+ return ret; -+} -diff --git a/perfmon/perfmon_syscalls.c b/perfmon/perfmon_syscalls.c -new file mode 100644 -index 0000000..8777b58 ---- /dev/null -+++ b/perfmon/perfmon_syscalls.c -@@ -0,0 +1,1060 @@ -+/* -+ * perfmon_syscalls.c: perfmon2 system call interface -+ * -+ * This file implements the perfmon2 interface which -+ * provides access to the hardware performance counters -+ * of the host processor. -+ * -+ * The initial version of perfmon.c was written by -+ * Ganesh Venkitachalam, IBM Corp. -+ * -+ * Then it was modified for perfmon-1.x by Stephane Eranian and -+ * David Mosberger, Hewlett Packard Co. -+ * -+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x -+ * by Stephane Eranian, Hewlett Packard Co. -+ * -+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * David Mosberger-Tang -+ * -+ * More information about perfmon available at: -+ * http://perfmon2.sf.net -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include -+#include -+#include -+#include -+#include "perfmon_priv.h" -+ -+/* -+ * Context locking rules: -+ * --------------------- -+ * - any thread with access to the file descriptor of a context can -+ * potentially issue perfmon calls -+ * -+ * - calls must be serialized to guarantee correctness -+ * -+ * - as soon as a context is attached to a thread or CPU, it may be -+ * actively monitoring. On some architectures, such as IA-64, this -+ * is true even though the pfm_start() call has not been made. This -+ * comes from the fact that on some architectures, it is possible to -+ * start/stop monitoring from userland. -+ * -+ * - If monitoring is active, then there can PMU interrupts. Because -+ * context accesses must be serialized, the perfmon system calls -+ * must mask interrupts as soon as the context is attached. -+ * -+ * - perfmon system calls that operate with the context unloaded cannot -+ * assume it is actually unloaded when they are called. They first need -+ * to check and for that they need interrupts masked. Then, if the -+ * context is actually unloaded, they can unmask interrupts. -+ * -+ * - interrupt masking holds true for other internal perfmon functions as -+ * well. Except for PMU interrupt handler because those interrupts -+ * cannot be nested. -+ * -+ * - we mask ALL interrupts instead of just the PMU interrupt because we -+ * also need to protect against timer interrupts which could trigger -+ * a set switch. -+ */ -+#ifdef CONFIG_UTRACE -+#include -+ -+static u32 -+stopper_quiesce(struct utrace_attached_engine *engine, struct task_struct *tsk) -+{ -+ PFM_DBG("quiesced [%d]", tsk->pid); -+ complete(engine->data); -+ return UTRACE_ACTION_RESUME; -+} -+ -+void -+pfm_resume_task(struct task_struct *t, void *data) -+{ -+ PFM_DBG("utrace detach [%d]", t->pid); -+ (void) utrace_detach(t, data); -+} -+ -+static const struct utrace_engine_ops utrace_ops = -+{ -+ .report_quiesce = stopper_quiesce, -+}; -+ -+static int pfm_wait_task_stopped(struct task_struct *task, void **data) -+{ -+ DECLARE_COMPLETION_ONSTACK(done); -+ struct utrace_attached_engine *eng; -+ int ret; -+ -+ eng = utrace_attach(task, UTRACE_ATTACH_CREATE, &utrace_ops, &done); -+ if (IS_ERR(eng)) -+ return PTR_ERR(eng); -+ -+ ret = utrace_set_flags(task, eng, -+ UTRACE_ACTION_QUIESCE | UTRACE_EVENT(QUIESCE)); -+ PFM_DBG("wait quiesce [%d]", task->pid); -+ if (!ret) -+ ret = wait_for_completion_interruptible(&done); -+ -+ if (ret) -+ (void) utrace_detach(task, eng); -+ else -+ *data = eng; -+ return 0; -+} -+#else /* !CONFIG_UTRACE */ -+static int pfm_wait_task_stopped(struct task_struct *task, void **data) -+{ -+ int ret; -+ -+ *data = NULL; -+ -+ /* -+ * returns 0 if cannot attach -+ */ -+ ret = ptrace_may_access(task, PTRACE_MODE_ATTACH); -+ PFM_DBG("may_attach=%d", ret); -+ if (!ret) -+ return -EPERM; -+ -+ ret = ptrace_check_attach(task, 0); -+ PFM_DBG("check_attach=%d", ret); -+ return ret; -+} -+void pfm_resume_task(struct task_struct *t, void *data) -+{} -+#endif -+ -+struct pfm_syscall_cookie { -+ struct file *filp; -+ int fput_needed; -+}; -+ -+/* -+ * cannot attach if : -+ * - kernel task -+ * - task not owned by caller (checked by ptrace_may_attach()) -+ * - task is dead or zombie -+ * - cannot use blocking notification when self-monitoring -+ */ -+static int pfm_task_incompatible(struct pfm_context *ctx, -+ struct task_struct *task) -+{ -+ /* -+ * cannot attach to a kernel thread -+ */ -+ if (!task->mm) { -+ PFM_DBG("cannot attach to kernel thread [%d]", task->pid); -+ return -EPERM; -+ } -+ -+ /* -+ * cannot use block on notification when -+ * self-monitoring. -+ */ -+ if (ctx->flags.block && task == current) { -+ PFM_DBG("cannot use block on notification when self-monitoring" -+ "[%d]", task->pid); -+ return -EINVAL; -+ } -+ /* -+ * cannot attach to a zombie task -+ */ -+ if (task->exit_state == EXIT_ZOMBIE || task->exit_state == EXIT_DEAD) { -+ PFM_DBG("cannot attach to zombie/dead task [%d]", task->pid); -+ return -EBUSY; -+ } -+ return 0; -+} -+ -+/** -+ * pfm_get_task -- check permission and acquire task to monitor -+ * @ctx: perfmon context -+ * @pid: identification of the task to check -+ * @task: upon return, a pointer to the task to monitor -+ * -+ * This function is used in per-thread mode only AND when not -+ * self-monitoring. It finds the task to monitor and checks -+ * that the caller has permissions to attach. It also checks -+ * that the task is stopped via ptrace so that we can safely -+ * modify its state. -+ * -+ * task refcount is incremented when succesful. -+ */ -+static int pfm_get_task(struct pfm_context *ctx, pid_t pid, -+ struct task_struct **task, void **data) -+{ -+ struct task_struct *p; -+ int ret = 0, ret1 = 0; -+ -+ *data = NULL; -+ -+ /* -+ * When attaching to another thread we must ensure -+ * that the thread is actually stopped. -+ * -+ * As a consequence, only the ptracing parent can actually -+ * attach a context to a thread. Obviously, this constraint -+ * does not exist for self-monitoring threads. -+ * -+ * We use ptrace_may_attach() to check for permission. -+ */ -+ read_lock(&tasklist_lock); -+ -+ p = find_task_by_vpid(pid); -+ if (p) -+ get_task_struct(p); -+ -+ read_unlock(&tasklist_lock); -+ -+ if (!p) { -+ PFM_DBG("task not found %d", pid); -+ return -ESRCH; -+ } -+ -+ ret = pfm_task_incompatible(ctx, p); -+ if (ret) -+ goto error; -+ -+ ret = pfm_wait_task_stopped(p, data); -+ if (ret) -+ goto error; -+ -+ *task = p; -+ -+ return 0; -+error: -+ if (!(ret1 || ret)) -+ ret = -EPERM; -+ -+ put_task_struct(p); -+ -+ return ret; -+} -+ -+/* -+ * context must be locked when calling this function -+ */ -+int pfm_check_task_state(struct pfm_context *ctx, int check_mask, -+ unsigned long *flags, void **resume) -+{ -+ struct task_struct *task; -+ unsigned long local_flags, new_flags; -+ int state, ret; -+ -+ *resume = NULL; -+ -+recheck: -+ /* -+ * task is NULL for system-wide context -+ */ -+ task = ctx->task; -+ state = ctx->state; -+ local_flags = *flags; -+ -+ PFM_DBG("state=%d check_mask=0x%x", state, check_mask); -+ /* -+ * if the context is detached, then we do not touch -+ * hardware, therefore there is not restriction on when we can -+ * access it. -+ */ -+ if (state == PFM_CTX_UNLOADED) -+ return 0; -+ /* -+ * no command can operate on a zombie context. -+ * A context becomes zombie when the file that identifies -+ * it is closed while the context is still attached to the -+ * thread it monitors. -+ */ -+ if (state == PFM_CTX_ZOMBIE) -+ return -EINVAL; -+ -+ /* -+ * at this point, state is PFM_CTX_LOADED or PFM_CTX_MASKED -+ */ -+ -+ /* -+ * some commands require the context to be unloaded to operate -+ */ -+ if (check_mask & PFM_CMD_UNLOADED) { -+ PFM_DBG("state=%d, cmd needs context unloaded", state); -+ return -EBUSY; -+ } -+ -+ /* -+ * self-monitoring always ok. -+ */ -+ if (task == current) -+ return 0; -+ -+ /* -+ * for syswide, the calling thread must be running on the cpu -+ * the context is bound to. -+ */ -+ if (ctx->flags.system) { -+ if (ctx->cpu != smp_processor_id()) -+ return -EBUSY; -+ return 0; -+ } -+ -+ /* -+ * at this point, monitoring another thread -+ */ -+ -+ /* -+ * the pfm_unload_context() command is allowed on masked context -+ */ -+ if (state == PFM_CTX_MASKED && !(check_mask & PFM_CMD_UNLOAD)) -+ return 0; -+ -+ /* -+ * When we operate on another thread, we must wait for it to be -+ * stopped and completely off any CPU as we need to access the -+ * PMU state (or machine state). -+ * -+ * A thread can be put in the STOPPED state in various ways -+ * including PTRACE_ATTACH, or when it receives a SIGSTOP signal. -+ * We enforce that the thread must be ptraced, so it is stopped -+ * AND it CANNOT wake up while we operate on it because this -+ * would require an action from the ptracing parent which is the -+ * thread that is calling this function. -+ * -+ * The dependency on ptrace, imposes that only the ptracing -+ * parent can issue command on a thread. This is unfortunate -+ * but we do not know of a better way of doing this. -+ */ -+ if (check_mask & PFM_CMD_STOPPED) { -+ -+ spin_unlock_irqrestore(&ctx->lock, local_flags); -+ -+ /* -+ * check that the thread is ptraced AND STOPPED -+ */ -+ ret = pfm_wait_task_stopped(task, resume); -+ -+ spin_lock_irqsave(&ctx->lock, new_flags); -+ -+ /* -+ * flags may be different than when we released the lock -+ */ -+ *flags = new_flags; -+ -+ if (ret) -+ return ret; -+ /* -+ * we must recheck to verify if state has changed -+ */ -+ if (unlikely(ctx->state != state)) { -+ PFM_DBG("old_state=%d new_state=%d", -+ state, -+ ctx->state); -+ goto recheck; -+ } -+ } -+ return 0; -+} -+ -+/* -+ * pfm_get_args - Function used to copy the syscall argument into kernel memory. -+ * @ureq: user argument -+ * @sz: user argument size -+ * @lsz: size of stack buffer -+ * @laddr: stack buffer address -+ * @req: point to start of kernel copy of the argument -+ * @ptr_free: address of kernel copy to free -+ * -+ * There are two options: -+ * - use a stack buffer described by laddr (addresses) and lsz (size) -+ * - allocate memory -+ * -+ * return: -+ * < 0 : in case of error (ptr_free may not be updated) -+ * 0 : success -+ * - req: points to base of kernel copy of arguments -+ * - ptr_free: address of buffer to free by caller on exit. -+ * NULL if using the stack buffer -+ * -+ * when ptr_free is not NULL upon return, the caller must kfree() -+ */ -+int pfm_get_args(void __user *ureq, size_t sz, size_t lsz, void *laddr, -+ void **req, void **ptr_free) -+{ -+ void *addr; -+ -+ /* -+ * check syadmin argument limit -+ */ -+ if (unlikely(sz > pfm_controls.arg_mem_max)) { -+ PFM_DBG("argument too big %zu max=%zu", -+ sz, -+ pfm_controls.arg_mem_max); -+ return -E2BIG; -+ } -+ -+ /* -+ * check if vector fits on stack buffer -+ */ -+ if (sz > lsz) { -+ addr = kmalloc(sz, GFP_KERNEL); -+ if (unlikely(addr == NULL)) -+ return -ENOMEM; -+ *ptr_free = addr; -+ } else { -+ addr = laddr; -+ *req = laddr; -+ *ptr_free = NULL; -+ } -+ -+ /* -+ * bring the data in -+ */ -+ if (unlikely(copy_from_user(addr, ureq, sz))) { -+ if (addr != laddr) -+ kfree(addr); -+ return -EFAULT; -+ } -+ -+ /* -+ * base address of kernel buffer -+ */ -+ *req = addr; -+ -+ return 0; -+} -+ -+/** -+ * pfm_acquire_ctx_from_fd -- get ctx from file descriptor -+ * @fd: file descriptor -+ * @ctx: pointer to pointer of context updated on return -+ * @cookie: opaque structure to use for release -+ * -+ * This helper function extracts the ctx from the file descriptor. -+ * It also increments the refcount of the file structure. Thus -+ * it updates the cookie so the refcount can be decreased when -+ * leaving the perfmon syscall via pfm_release_ctx_from_fd -+ */ -+static int pfm_acquire_ctx_from_fd(int fd, struct pfm_context **ctx, -+ struct pfm_syscall_cookie *cookie) -+{ -+ struct file *filp; -+ int fput_needed; -+ -+ filp = fget_light(fd, &fput_needed); -+ if (unlikely(filp == NULL)) { -+ PFM_DBG("invalid fd %d", fd); -+ return -EBADF; -+ } -+ -+ *ctx = filp->private_data; -+ -+ if (unlikely(!*ctx || filp->f_op != &pfm_file_ops)) { -+ PFM_DBG("fd %d not related to perfmon", fd); -+ return -EBADF; -+ } -+ cookie->filp = filp; -+ cookie->fput_needed = fput_needed; -+ -+ return 0; -+} -+ -+/** -+ * pfm_release_ctx_from_fd -- decrease refcount of file associated with context -+ * @cookie: the cookie structure initialized by pfm_acquire_ctx_from_fd -+ */ -+static inline void pfm_release_ctx_from_fd(struct pfm_syscall_cookie *cookie) -+{ -+ fput_light(cookie->filp, cookie->fput_needed); -+} -+ -+/* -+ * unlike the other perfmon system calls, this one returns a file descriptor -+ * or a value < 0 in case of error, very much like open() or socket() -+ */ -+asmlinkage long sys_pfm_create_context(struct pfarg_ctx __user *ureq, -+ char __user *fmt_name, -+ void __user *fmt_uarg, size_t fmt_size) -+{ -+ struct pfarg_ctx req; -+ struct pfm_smpl_fmt *fmt = NULL; -+ void *fmt_arg = NULL; -+ int ret; -+ -+ PFM_DBG("req=%p fmt=%p fmt_arg=%p size=%zu", -+ ureq, fmt_name, fmt_uarg, fmt_size); -+ -+ if (perfmon_disabled) -+ return -ENOSYS; -+ -+ if (copy_from_user(&req, ureq, sizeof(req))) -+ return -EFAULT; -+ -+ if (fmt_name) { -+ ret = pfm_get_smpl_arg(fmt_name, fmt_uarg, fmt_size, &fmt_arg, &fmt); -+ if (ret) -+ goto abort; -+ } -+ -+ ret = __pfm_create_context(&req, fmt, fmt_arg, PFM_NORMAL, NULL); -+ -+ kfree(fmt_arg); -+abort: -+ return ret; -+} -+ -+asmlinkage long sys_pfm_write_pmcs(int fd, struct pfarg_pmc __user *ureq, int count) -+{ -+ struct pfm_context *ctx; -+ struct task_struct *task; -+ struct pfm_syscall_cookie cookie; -+ struct pfarg_pmc pmcs[PFM_PMC_STK_ARG]; -+ struct pfarg_pmc *req; -+ void *fptr, *resume; -+ unsigned long flags; -+ size_t sz; -+ int ret; -+ -+ PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count); -+ -+ if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq)) { -+ PFM_DBG("invalid arg count %d", count); -+ return -EINVAL; -+ } -+ -+ sz = count*sizeof(*ureq); -+ -+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); -+ if (ret) -+ return ret; -+ -+ ret = pfm_get_args(ureq, sz, sizeof(pmcs), pmcs, (void **)&req, &fptr); -+ if (ret) -+ goto error; -+ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ task = ctx->task; -+ -+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume); -+ if (!ret) -+ ret = __pfm_write_pmcs(ctx, req, count); -+ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ if (resume) -+ pfm_resume_task(task, resume); -+ -+ /* -+ * This function may be on the critical path. -+ * We want to avoid the branch if unecessary. -+ */ -+ if (fptr) -+ kfree(fptr); -+error: -+ pfm_release_ctx_from_fd(&cookie); -+ return ret; -+} -+ -+asmlinkage long sys_pfm_write_pmds(int fd, struct pfarg_pmd __user *ureq, int count) -+{ -+ struct pfm_context *ctx; -+ struct task_struct *task; -+ struct pfm_syscall_cookie cookie; -+ struct pfarg_pmd pmds[PFM_PMD_STK_ARG]; -+ struct pfarg_pmd *req; -+ void *fptr, *resume; -+ unsigned long flags; -+ size_t sz; -+ int ret; -+ -+ PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count); -+ -+ if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq)) { -+ PFM_DBG("invalid arg count %d", count); -+ return -EINVAL; -+ } -+ -+ sz = count*sizeof(*ureq); -+ -+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); -+ if (ret) -+ return ret; -+ -+ ret = pfm_get_args(ureq, sz, sizeof(pmds), pmds, (void **)&req, &fptr); -+ if (ret) -+ goto error; -+ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ task = ctx->task; -+ -+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume); -+ if (!ret) -+ ret = __pfm_write_pmds(ctx, req, count, 0); -+ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ if (resume) -+ pfm_resume_task(task, resume); -+ -+ if (fptr) -+ kfree(fptr); -+error: -+ pfm_release_ctx_from_fd(&cookie); -+ return ret; -+} -+ -+asmlinkage long sys_pfm_read_pmds(int fd, struct pfarg_pmd __user *ureq, int count) -+{ -+ struct pfm_context *ctx; -+ struct task_struct *task; -+ struct pfm_syscall_cookie cookie; -+ struct pfarg_pmd pmds[PFM_PMD_STK_ARG]; -+ struct pfarg_pmd *req; -+ void *fptr, *resume; -+ unsigned long flags; -+ size_t sz; -+ int ret; -+ -+ PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count); -+ -+ if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq)) -+ return -EINVAL; -+ -+ sz = count*sizeof(*ureq); -+ -+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); -+ if (ret) -+ return ret; -+ -+ ret = pfm_get_args(ureq, sz, sizeof(pmds), pmds, (void **)&req, &fptr); -+ if (ret) -+ goto error; -+ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ task = ctx->task; -+ -+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume); -+ if (!ret) -+ ret = __pfm_read_pmds(ctx, req, count); -+ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ if (copy_to_user(ureq, req, sz)) -+ ret = -EFAULT; -+ -+ if (resume) -+ pfm_resume_task(task, resume); -+ -+ if (fptr) -+ kfree(fptr); -+error: -+ pfm_release_ctx_from_fd(&cookie); -+ return ret; -+} -+ -+asmlinkage long sys_pfm_restart(int fd) -+{ -+ struct pfm_context *ctx; -+ struct task_struct *task; -+ struct pfm_syscall_cookie cookie; -+ void *resume; -+ unsigned long flags; -+ int ret, info; -+ -+ PFM_DBG("fd=%d", fd); -+ -+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); -+ if (ret) -+ return ret; -+ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ task = ctx->task; -+ -+ ret = pfm_check_task_state(ctx, 0, &flags, &resume); -+ if (!ret) -+ ret = __pfm_restart(ctx, &info); -+ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ if (resume) -+ pfm_resume_task(task, resume); -+ /* -+ * In per-thread mode with blocking notification, i.e. -+ * ctx->flags.blocking=1, we need to defer issuing the -+ * complete to unblock the blocked monitored thread. -+ * Otherwise we have a potential deadlock due to a lock -+ * inversion between the context lock and the task_rq_lock() -+ * which can happen if one thread is in this call and the other -+ * (the monitored thread) is in the context switch code. -+ * -+ * It is safe to access the context outside the critical section -+ * because: -+ * - we are protected by the fget_light(), thus the context -+ * cannot disappear -+ */ -+ if (ret == 0 && info == 1) -+ complete(&ctx->restart_complete); -+ -+ pfm_release_ctx_from_fd(&cookie); -+ return ret; -+} -+ -+asmlinkage long sys_pfm_stop(int fd) -+{ -+ struct pfm_context *ctx; -+ struct task_struct *task; -+ struct pfm_syscall_cookie cookie; -+ void *resume; -+ unsigned long flags; -+ int ret; -+ int release_info; -+ -+ PFM_DBG("fd=%d", fd); -+ -+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); -+ if (ret) -+ return ret; -+ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ task = ctx->task; -+ -+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume); -+ if (!ret) -+ ret = __pfm_stop(ctx, &release_info); -+ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ if (resume) -+ pfm_resume_task(task, resume); -+ -+ /* -+ * defer cancellation of timer to avoid race -+ * with pfm_handle_switch_timeout() -+ * -+ * applies only when self-monitoring -+ */ -+ if (release_info & 0x2) -+ hrtimer_cancel(&__get_cpu_var(pfm_hrtimer)); -+ -+ pfm_release_ctx_from_fd(&cookie); -+ return ret; -+} -+ -+asmlinkage long sys_pfm_start(int fd, struct pfarg_start __user *ureq) -+{ -+ struct pfm_context *ctx; -+ struct task_struct *task; -+ struct pfm_syscall_cookie cookie; -+ void *resume; -+ struct pfarg_start req; -+ unsigned long flags; -+ int ret; -+ -+ PFM_DBG("fd=%d req=%p", fd, ureq); -+ -+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); -+ if (ret) -+ return ret; -+ -+ /* -+ * the one argument is actually optional -+ */ -+ if (ureq && copy_from_user(&req, ureq, sizeof(req))) -+ return -EFAULT; -+ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ task = ctx->task; -+ -+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED, &flags, &resume); -+ if (!ret) -+ ret = __pfm_start(ctx, ureq ? &req : NULL); -+ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ if (resume) -+ pfm_resume_task(task, resume); -+ -+ pfm_release_ctx_from_fd(&cookie); -+ return ret; -+} -+ -+asmlinkage long sys_pfm_load_context(int fd, struct pfarg_load __user *ureq) -+{ -+ struct pfm_context *ctx; -+ struct task_struct *task; -+ struct pfm_syscall_cookie cookie; -+ void *resume, *dummy_resume; -+ unsigned long flags; -+ struct pfarg_load req; -+ int ret; -+ -+ PFM_DBG("fd=%d req=%p", fd, ureq); -+ -+ if (copy_from_user(&req, ureq, sizeof(req))) -+ return -EFAULT; -+ -+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); -+ if (ret) -+ return ret; -+ -+ task = current; -+ -+ /* -+ * in per-thread mode (not self-monitoring), get a reference -+ * on task to monitor. This must be done with interrupts enabled -+ * Upon succesful return, refcount on task is increased. -+ * -+ * fget_light() is protecting the context. -+ */ -+ if (!ctx->flags.system && req.load_pid != current->pid) { -+ ret = pfm_get_task(ctx, req.load_pid, &task, &resume); -+ if (ret) -+ goto error; -+ } -+ -+ /* -+ * irqsave is required to avoid race in case context is already -+ * loaded or with switch timeout in the case of self-monitoring -+ */ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ ret = pfm_check_task_state(ctx, PFM_CMD_UNLOADED, &flags, &dummy_resume); -+ if (!ret) -+ ret = __pfm_load_context(ctx, &req, task); -+ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ if (resume) -+ pfm_resume_task(task, resume); -+ -+ /* -+ * in per-thread mode (not self-monitoring), we need -+ * to decrease refcount on task to monitor: -+ * - load successful: we have a reference to the task in ctx->task -+ * - load failed : undo the effect of pfm_get_task() -+ */ -+ if (task != current) -+ put_task_struct(task); -+error: -+ pfm_release_ctx_from_fd(&cookie); -+ return ret; -+} -+ -+asmlinkage long sys_pfm_unload_context(int fd) -+{ -+ struct pfm_context *ctx; -+ struct task_struct *task; -+ struct pfm_syscall_cookie cookie; -+ void *resume; -+ unsigned long flags; -+ int ret; -+ int is_system, release_info = 0; -+ u32 cpu; -+ -+ PFM_DBG("fd=%d", fd); -+ -+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); -+ if (ret) -+ return ret; -+ -+ is_system = ctx->flags.system; -+ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ cpu = ctx->cpu; -+ task = ctx->task; -+ -+ ret = pfm_check_task_state(ctx, PFM_CMD_STOPPED|PFM_CMD_UNLOAD, -+ &flags, &resume); -+ if (!ret) -+ ret = __pfm_unload_context(ctx, &release_info); -+ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ if (resume) -+ pfm_resume_task(task, resume); -+ -+ /* -+ * cancel time now that context is unlocked -+ * avoid race with pfm_handle_switch_timeout() -+ */ -+ if (release_info & 0x2) { -+ int r; -+ r = hrtimer_cancel(&__get_cpu_var(pfm_hrtimer)); -+ PFM_DBG("timeout cancel=%d", r); -+ } -+ -+ if (release_info & 0x1) -+ pfm_session_release(is_system, cpu); -+ -+ pfm_release_ctx_from_fd(&cookie); -+ return ret; -+} -+ -+asmlinkage long sys_pfm_create_evtsets(int fd, struct pfarg_setdesc __user *ureq, int count) -+{ -+ struct pfm_context *ctx; -+ struct pfm_syscall_cookie cookie; -+ struct pfarg_setdesc *req; -+ void *fptr, *resume; -+ unsigned long flags; -+ size_t sz; -+ int ret; -+ -+ PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count); -+ -+ if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq)) -+ return -EINVAL; -+ -+ sz = count*sizeof(*ureq); -+ -+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); -+ if (ret) -+ return ret; -+ -+ ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); -+ if (ret) -+ goto error; -+ -+ /* -+ * must mask interrupts because we do not know the state of context, -+ * could be attached and we could be getting PMU interrupts. So -+ * we mask and lock context and we check and possibly relax masking -+ */ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ ret = pfm_check_task_state(ctx, PFM_CMD_UNLOADED, &flags, &resume); -+ if (!ret) -+ ret = __pfm_create_evtsets(ctx, req, count); -+ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ /* -+ * context must be unloaded for this command. The resume pointer -+ * is necessarily NULL, thus no need to call pfm_resume_task() -+ */ -+ kfree(fptr); -+ -+error: -+ pfm_release_ctx_from_fd(&cookie); -+ return ret; -+} -+ -+asmlinkage long sys_pfm_getinfo_evtsets(int fd, struct pfarg_setinfo __user *ureq, int count) -+{ -+ struct pfm_context *ctx; -+ struct task_struct *task; -+ struct pfm_syscall_cookie cookie; -+ struct pfarg_setinfo *req; -+ void *fptr, *resume; -+ unsigned long flags; -+ size_t sz; -+ int ret; -+ -+ PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count); -+ -+ if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq)) -+ return -EINVAL; -+ -+ sz = count*sizeof(*ureq); -+ -+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); -+ if (ret) -+ return ret; -+ -+ ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); -+ if (ret) -+ goto error; -+ -+ /* -+ * this command operates even when context is loaded, so we need -+ * to keep interrupts masked to avoid a race with PMU interrupt -+ * which may switch the active set -+ */ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ task = ctx->task; -+ -+ ret = pfm_check_task_state(ctx, 0, &flags, &resume); -+ if (!ret) -+ ret = __pfm_getinfo_evtsets(ctx, req, count); -+ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ -+ if (resume) -+ pfm_resume_task(task, resume); -+ -+ if (copy_to_user(ureq, req, sz)) -+ ret = -EFAULT; -+ -+ kfree(fptr); -+error: -+ pfm_release_ctx_from_fd(&cookie); -+ return ret; -+} -+ -+asmlinkage long sys_pfm_delete_evtsets(int fd, struct pfarg_setinfo __user *ureq, int count) -+{ -+ struct pfm_context *ctx; -+ struct pfm_syscall_cookie cookie; -+ struct pfarg_setinfo *req; -+ void *fptr, *resume; -+ unsigned long flags; -+ size_t sz; -+ int ret; -+ -+ PFM_DBG("fd=%d req=%p count=%d", fd, ureq, count); -+ -+ if (count < 0 || count >= PFM_MAX_ARG_COUNT(ureq)) -+ return -EINVAL; -+ -+ sz = count*sizeof(*ureq); -+ -+ ret = pfm_acquire_ctx_from_fd(fd, &ctx, &cookie); -+ if (ret) -+ return ret; -+ -+ ret = pfm_get_args(ureq, sz, 0, NULL, (void **)&req, &fptr); -+ if (ret) -+ goto error; -+ -+ /* -+ * must mask interrupts because we do not know the state of context, -+ * could be attached and we could be getting PMU interrupts -+ */ -+ spin_lock_irqsave(&ctx->lock, flags); -+ -+ ret = pfm_check_task_state(ctx, PFM_CMD_UNLOADED, &flags, &resume); -+ if (!ret) -+ ret = __pfm_delete_evtsets(ctx, req, count); -+ -+ spin_unlock_irqrestore(&ctx->lock, flags); -+ /* -+ * context must be unloaded for this command. The resume pointer -+ * is necessarily NULL, thus no need to call pfm_resume_task() -+ */ -+ kfree(fptr); -+ -+error: -+ pfm_release_ctx_from_fd(&cookie); -+ return ret; -+} -diff --git a/perfmon/perfmon_sysfs.c b/perfmon/perfmon_sysfs.c -new file mode 100644 -index 0000000..7353c3b ---- /dev/null -+++ b/perfmon/perfmon_sysfs.c -@@ -0,0 +1,525 @@ -+/* -+ * perfmon_sysfs.c: perfmon2 sysfs interface -+ * -+ * This file implements the perfmon2 interface which -+ * provides access to the hardware performance counters -+ * of the host processor. -+ * -+ * The initial version of perfmon.c was written by -+ * Ganesh Venkitachalam, IBM Corp. -+ * -+ * Then it was modified for perfmon-1.x by Stephane Eranian and -+ * David Mosberger, Hewlett Packard Co. -+ * -+ * Version Perfmon-2.x is a complete rewrite of perfmon-1.x -+ * by Stephane Eranian, Hewlett Packard Co. -+ * -+ * Copyright (c) 1999-2006 Hewlett-Packard Development Company, L.P. -+ * Contributed by Stephane Eranian -+ * David Mosberger-Tang -+ * -+ * More information about perfmon available at: -+ * http://perfmon2.sf.net -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of version 2 of the GNU General Public -+ * License as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -+ * 02111-1307 USA -+ */ -+#include -+#include /* for EXPORT_SYMBOL */ -+#include -+#include "perfmon_priv.h" -+ -+struct pfm_attribute { -+ struct attribute attr; -+ ssize_t (*show)(void *, struct pfm_attribute *attr, char *); -+ ssize_t (*store)(void *, const char *, size_t); -+}; -+#define to_attr(n) container_of(n, struct pfm_attribute, attr); -+ -+#define PFM_RO_ATTR(_name, _show) \ -+ struct kobj_attribute attr_##_name = __ATTR(_name, 0444, _show, NULL) -+ -+#define PFM_RW_ATTR(_name, _show, _store) \ -+ struct kobj_attribute attr_##_name = __ATTR(_name, 0644, _show, _store) -+ -+#define PFM_ROS_ATTR(_name, _show) \ -+ struct pfm_attribute attr_##_name = __ATTR(_name, 0444, _show, NULL) -+ -+#define is_attr_name(a, n) (!strcmp((a)->attr.name, n)) -+int pfm_sysfs_add_pmu(struct pfm_pmu_config *pmu); -+ -+static struct kobject *pfm_kernel_kobj, *pfm_fmt_kobj; -+static struct kobject *pfm_pmu_kobj; -+ -+static ssize_t pfm_regs_attr_show(struct kobject *kobj, -+ struct attribute *attr, char *buf) -+{ -+ struct pfm_regmap_desc *reg = to_reg(kobj); -+ struct pfm_attribute *attribute = to_attr(attr); -+ return attribute->show ? attribute->show(reg, attribute, buf) : -EIO; -+} -+ -+static ssize_t pfm_fmt_attr_show(struct kobject *kobj, -+ struct attribute *attr, char *buf) -+{ -+ struct pfm_smpl_fmt *fmt = to_smpl_fmt(kobj); -+ struct pfm_attribute *attribute = to_attr(attr); -+ return attribute->show ? attribute->show(fmt, attribute, buf) : -EIO; -+} -+ -+static struct sysfs_ops pfm_regs_sysfs_ops = { -+ .show = pfm_regs_attr_show -+}; -+ -+static struct sysfs_ops pfm_fmt_sysfs_ops = { -+ .show = pfm_fmt_attr_show -+}; -+ -+static struct kobj_type pfm_regs_ktype = { -+ .sysfs_ops = &pfm_regs_sysfs_ops, -+}; -+ -+static struct kobj_type pfm_fmt_ktype = { -+ .sysfs_ops = &pfm_fmt_sysfs_ops, -+}; -+ -+static ssize_t pfm_controls_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) -+{ -+ int base; -+ -+ if (is_attr_name(attr, "version")) -+ return snprintf(buf, PAGE_SIZE, "%u.%u\n", PFM_VERSION_MAJ, PFM_VERSION_MIN); -+ -+ if (is_attr_name(attr, "task_sessions_count")) -+ return pfm_sysfs_res_show(buf, PAGE_SIZE, 0); -+ -+ if (is_attr_name(attr, "debug")) -+ return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.debug); -+ -+ if (is_attr_name(attr, "task_group")) -+ return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.task_group); -+ -+ if (is_attr_name(attr, "mode")) -+ return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.flags); -+ -+ if (is_attr_name(attr, "arg_mem_max")) -+ return snprintf(buf, PAGE_SIZE, "%zu\n", pfm_controls.arg_mem_max); -+ -+ if (is_attr_name(attr, "syscall")) { -+ base = pfm_arch_get_base_syscall(); -+ return snprintf(buf, PAGE_SIZE, "%d\n", base); -+ } -+ -+ if (is_attr_name(attr, "sys_sessions_count")) -+ return pfm_sysfs_res_show(buf, PAGE_SIZE, 1); -+ -+ if (is_attr_name(attr, "smpl_buffer_mem_max")) -+ return snprintf(buf, PAGE_SIZE, "%zu\n", pfm_controls.smpl_buffer_mem_max); -+ -+ if (is_attr_name(attr, "smpl_buffer_mem_cur")) -+ return pfm_sysfs_res_show(buf, PAGE_SIZE, 2); -+ -+ if (is_attr_name(attr, "sys_group")) -+ return snprintf(buf, PAGE_SIZE, "%d\n", pfm_controls.sys_group); -+ -+ /* XXX: could be set to write-only */ -+ if (is_attr_name(attr, "reset_stats")) { -+ buf[0] = '0'; -+ buf[1] = '\0'; -+ return strnlen(buf, PAGE_SIZE); -+ } -+ return 0; -+} -+ -+static ssize_t pfm_controls_store(struct kobject *kobj, struct kobj_attribute *attr, -+ const char *buf, size_t count) -+{ -+ int i; -+ size_t d; -+ -+ if (sscanf(buf, "%zu", &d) != 1) -+ goto skip; -+ -+ if (is_attr_name(attr, "debug")) -+ pfm_controls.debug = d; -+ -+ if (is_attr_name(attr, "task_group")) -+ pfm_controls.task_group = d; -+ -+ if (is_attr_name(attr, "sys_group")) -+ pfm_controls.sys_group = d; -+ -+ if (is_attr_name(attr, "mode")) -+ pfm_controls.flags = d ? PFM_CTRL_FL_RW_EXPERT : 0; -+ -+ if (is_attr_name(attr, "arg_mem_max")) { -+ /* -+ * we impose a page as the minimum. -+ * -+ * This limit may be smaller than the stack buffer -+ * available and that is fine. -+ */ -+ if (d >= PAGE_SIZE) -+ pfm_controls.arg_mem_max = d; -+ } -+ if (is_attr_name(attr, "reset_stats")) { -+ for_each_online_cpu(i) { -+ pfm_reset_stats(i); -+ } -+ } -+ -+ if (is_attr_name(attr, "smpl_buffer_mem_max")) { -+ if (d >= PAGE_SIZE) -+ pfm_controls.smpl_buffer_mem_max = d; -+ } -+skip: -+ return count; -+} -+ -+/* -+ * /sys/kernel/perfmon attributes -+ */ -+static PFM_RO_ATTR(version, pfm_controls_show); -+static PFM_RO_ATTR(task_sessions_count, pfm_controls_show); -+static PFM_RO_ATTR(syscall, pfm_controls_show); -+static PFM_RO_ATTR(sys_sessions_count, pfm_controls_show); -+static PFM_RO_ATTR(smpl_buffer_mem_cur, pfm_controls_show); -+ -+static PFM_RW_ATTR(debug, pfm_controls_show, pfm_controls_store); -+static PFM_RW_ATTR(task_group, pfm_controls_show, pfm_controls_store); -+static PFM_RW_ATTR(mode, pfm_controls_show, pfm_controls_store); -+static PFM_RW_ATTR(sys_group, pfm_controls_show, pfm_controls_store); -+static PFM_RW_ATTR(arg_mem_max, pfm_controls_show, pfm_controls_store); -+static PFM_RW_ATTR(smpl_buffer_mem_max, pfm_controls_show, pfm_controls_store); -+static PFM_RW_ATTR(reset_stats, pfm_controls_show, pfm_controls_store); -+ -+static struct attribute *pfm_kernel_attrs[] = { -+ &attr_version.attr, -+ &attr_syscall.attr, -+ &attr_task_sessions_count.attr, -+ &attr_sys_sessions_count.attr, -+ &attr_smpl_buffer_mem_cur.attr, -+ &attr_debug.attr, -+ &attr_reset_stats.attr, -+ &attr_sys_group.attr, -+ &attr_task_group.attr, -+ &attr_mode.attr, -+ &attr_smpl_buffer_mem_max.attr, -+ &attr_arg_mem_max.attr, -+ NULL -+}; -+ -+static struct attribute_group pfm_kernel_attr_group = { -+ .attrs = pfm_kernel_attrs, -+}; -+ -+/* -+ * per-reg attributes -+ */ -+static ssize_t pfm_reg_show(void *data, struct pfm_attribute *attr, char *buf) -+{ -+ struct pfm_regmap_desc *reg; -+ int w; -+ -+ reg = data; -+ -+ if (is_attr_name(attr, "name")) -+ return snprintf(buf, PAGE_SIZE, "%s\n", reg->desc); -+ -+ if (is_attr_name(attr, "dfl_val")) -+ return snprintf(buf, PAGE_SIZE, "0x%llx\n", -+ (unsigned long long)reg->dfl_val); -+ -+ if (is_attr_name(attr, "width")) { -+ w = (reg->type & PFM_REG_C64) ? -+ pfm_pmu_conf->counter_width : 64; -+ return snprintf(buf, PAGE_SIZE, "%d\n", w); -+ } -+ -+ if (is_attr_name(attr, "rsvd_msk")) -+ return snprintf(buf, PAGE_SIZE, "0x%llx\n", -+ (unsigned long long)reg->rsvd_msk); -+ -+ if (is_attr_name(attr, "addr")) -+ return snprintf(buf, PAGE_SIZE, "0x%lx\n", reg->hw_addr); -+ -+ return 0; -+} -+ -+static PFM_ROS_ATTR(name, pfm_reg_show); -+static PFM_ROS_ATTR(dfl_val, pfm_reg_show); -+static PFM_ROS_ATTR(rsvd_msk, pfm_reg_show); -+static PFM_ROS_ATTR(width, pfm_reg_show); -+static PFM_ROS_ATTR(addr, pfm_reg_show); -+ -+static struct attribute *pfm_reg_attrs[] = { -+ &attr_name.attr, -+ &attr_dfl_val.attr, -+ &attr_rsvd_msk.attr, -+ &attr_width.attr, -+ &attr_addr.attr, -+ NULL -+}; -+ -+static struct attribute_group pfm_reg_attr_group = { -+ .attrs = pfm_reg_attrs, -+}; -+ -+static ssize_t pfm_pmu_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) -+{ -+ if (is_attr_name(attr, "model")) -+ return snprintf(buf, PAGE_SIZE, "%s\n", pfm_pmu_conf->pmu_name); -+ return 0; -+} -+static PFM_RO_ATTR(model, pfm_pmu_show); -+ -+static struct attribute *pfm_pmu_desc_attrs[] = { -+ &attr_model.attr, -+ NULL -+}; -+ -+static struct attribute_group pfm_pmu_desc_attr_group = { -+ .attrs = pfm_pmu_desc_attrs, -+}; -+ -+static int pfm_sysfs_add_pmu_regs(struct pfm_pmu_config *pmu) -+{ -+ struct pfm_regmap_desc *reg; -+ unsigned int i, k; -+ int ret; -+ -+ reg = pmu->pmc_desc; -+ for (i = 0; i < pmu->num_pmc_entries; i++, reg++) { -+ -+ if (!(reg->type & PFM_REG_I)) -+ continue; -+ -+ ret = kobject_init_and_add(®->kobj, &pfm_regs_ktype, -+ pfm_pmu_kobj, "pmc%u", i); -+ if (ret) -+ goto undo_pmcs; -+ -+ ret = sysfs_create_group(®->kobj, &pfm_reg_attr_group); -+ if (ret) { -+ kobject_del(®->kobj); -+ goto undo_pmcs; -+ } -+ } -+ -+ reg = pmu->pmd_desc; -+ for (i = 0; i < pmu->num_pmd_entries; i++, reg++) { -+ -+ if (!(reg->type & PFM_REG_I)) -+ continue; -+ -+ ret = kobject_init_and_add(®->kobj, &pfm_regs_ktype, -+ pfm_pmu_kobj, "pmd%u", i); -+ if (ret) -+ goto undo_pmds; -+ -+ ret = sysfs_create_group(®->kobj, &pfm_reg_attr_group); -+ if (ret) { -+ kobject_del(®->kobj); -+ goto undo_pmds; -+ } -+ } -+ return 0; -+undo_pmds: -+ reg = pmu->pmd_desc; -+ for (k = 0; k < i; k++, reg++) { -+ if (!(reg->type & PFM_REG_I)) -+ continue; -+ sysfs_remove_group(®->kobj, &pfm_reg_attr_group); -+ kobject_del(®->kobj); -+ } -+ i = pmu->num_pmc_entries; -+ /* fall through */ -+undo_pmcs: -+ reg = pmu->pmc_desc; -+ for (k = 0; k < i; k++, reg++) { -+ if (!(reg->type & PFM_REG_I)) -+ continue; -+ sysfs_remove_group(®->kobj, &pfm_reg_attr_group); -+ kobject_del(®->kobj); -+ } -+ return ret; -+} -+ -+static int pfm_sysfs_del_pmu_regs(struct pfm_pmu_config *pmu) -+{ -+ struct pfm_regmap_desc *reg; -+ unsigned int i; -+ -+ reg = pmu->pmc_desc; -+ for (i = 0; i < pmu->num_pmc_entries; i++, reg++) { -+ -+ if (!(reg->type & PFM_REG_I)) -+ continue; -+ -+ sysfs_remove_group(®->kobj, &pfm_reg_attr_group); -+ kobject_del(®->kobj); -+ } -+ -+ reg = pmu->pmd_desc; -+ for (i = 0; i < pmu->num_pmd_entries; i++, reg++) { -+ -+ if (!(reg->type & PFM_REG_I)) -+ continue; -+ -+ sysfs_remove_group(®->kobj, &pfm_reg_attr_group); -+ kobject_del(®->kobj); -+ } -+ return 0; -+} -+ -+/* -+ * when a PMU description module is inserted, we create -+ * a pmu_desc subdir in sysfs and we populate it with -+ * PMU specific information, such as register mappings -+ */ -+int pfm_sysfs_add_pmu(struct pfm_pmu_config *pmu) -+{ -+ int ret; -+ -+ pfm_pmu_kobj = kobject_create_and_add("pmu_desc", pfm_kernel_kobj); -+ if (!pfm_pmu_kobj) -+ return -ENOMEM; -+ -+ ret = sysfs_create_group(pfm_pmu_kobj, &pfm_pmu_desc_attr_group); -+ if (ret) { -+ /* will release pfm_pmu_kobj */ -+ kobject_put(pfm_pmu_kobj); -+ return ret; -+ } -+ -+ ret = pfm_sysfs_add_pmu_regs(pmu); -+ if (ret) { -+ sysfs_remove_group(pfm_pmu_kobj, &pfm_pmu_desc_attr_group); -+ /* will release pfm_pmu_kobj */ -+ kobject_put(pfm_pmu_kobj); -+ } else -+ kobject_uevent(pfm_pmu_kobj, KOBJ_ADD); -+ -+ return ret; -+} -+ -+/* -+ * when a PMU description module is removed, we also remove -+ * all its information from sysfs, i.e., the pmu_desc subdir -+ * disappears -+ */ -+int pfm_sysfs_remove_pmu(struct pfm_pmu_config *pmu) -+{ -+ pfm_sysfs_del_pmu_regs(pmu); -+ sysfs_remove_group(pfm_pmu_kobj, &pfm_pmu_desc_attr_group); -+ kobject_uevent(pfm_pmu_kobj, KOBJ_REMOVE); -+ kobject_put(pfm_pmu_kobj); -+ pfm_pmu_kobj = NULL; -+ return 0; -+} -+ -+static ssize_t pfm_fmt_show(void *data, struct pfm_attribute *attr, char *buf) -+{ -+ struct pfm_smpl_fmt *fmt = data; -+ -+ if (is_attr_name(attr, "version")) -+ return snprintf(buf, PAGE_SIZE, "%u.%u\n", -+ fmt->fmt_version >> 16 & 0xffff, -+ fmt->fmt_version & 0xffff); -+ return 0; -+} -+ -+/* -+ * do not use predefined macros because of name conflict -+ * with /sys/kernel/perfmon/version -+ */ -+struct pfm_attribute attr_fmt_version = { -+ .attr = { .name = "version", .mode = 0444 }, -+ .show = pfm_fmt_show, -+}; -+ -+static struct attribute *pfm_fmt_attrs[] = { -+ &attr_fmt_version.attr, -+ NULL -+}; -+ -+static struct attribute_group pfm_fmt_attr_group = { -+ .attrs = pfm_fmt_attrs, -+}; -+ -+/* -+ * when a sampling format module is inserted, we populate -+ * sysfs with some information -+ */ -+int pfm_sysfs_add_fmt(struct pfm_smpl_fmt *fmt) -+{ -+ int ret; -+ -+ ret = kobject_init_and_add(&fmt->kobj, &pfm_fmt_ktype, -+ pfm_fmt_kobj, fmt->fmt_name); -+ if (ret) -+ return ret; -+ -+ ret = sysfs_create_group(&fmt->kobj, &pfm_fmt_attr_group); -+ if (ret) -+ kobject_del(&fmt->kobj); -+ else -+ kobject_uevent(&fmt->kobj, KOBJ_ADD); -+ -+ return ret; -+} -+ -+/* -+ * when a sampling format module is removed, its information -+ * must also be removed from sysfs -+ */ -+void pfm_sysfs_remove_fmt(struct pfm_smpl_fmt *fmt) -+{ -+ sysfs_remove_group(&fmt->kobj, &pfm_fmt_attr_group); -+ kobject_uevent(&fmt->kobj, KOBJ_REMOVE); -+ kobject_del(&fmt->kobj); -+} -+ -+int __init pfm_init_sysfs(void) -+{ -+ int ret; -+ -+ pfm_kernel_kobj = kobject_create_and_add("perfmon", kernel_kobj); -+ if (!pfm_kernel_kobj) { -+ PFM_ERR("cannot add kernel object: /sys/kernel/perfmon"); -+ return -ENOMEM; -+ } -+ -+ ret = sysfs_create_group(pfm_kernel_kobj, &pfm_kernel_attr_group); -+ if (ret) { -+ kobject_put(pfm_kernel_kobj); -+ return ret; -+ } -+ -+ pfm_fmt_kobj = kobject_create_and_add("formats", pfm_kernel_kobj); -+ if (ret) { -+ PFM_ERR("cannot add fmt object: %d", ret); -+ goto error_fmt; -+ } -+ if (pfm_pmu_conf) -+ pfm_sysfs_add_pmu(pfm_pmu_conf); -+ -+ pfm_sysfs_builtin_fmt_add(); -+ -+ return 0; -+ -+error_fmt: -+ kobject_del(pfm_kernel_kobj); -+ return ret; -+}